From 60917ad715a4157859c551b30ef57676764606b3 Mon Sep 17 00:00:00 2001 From: Louis Date: Mon, 15 Apr 2013 22:37:13 +0200 Subject: [PATCH] Remplacement de la table de conversion \IeC => ASCII par un traitement par PlasTeX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le code est plus propre, et tous les caractères sont gérés. --- index.py | 43 +++++++------------------------------------ 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/index.py b/index.py index 4bd529b4..41d09d15 100644 --- a/index.py +++ b/index.py @@ -9,6 +9,8 @@ # src is the .sxd file generated by latex # +from plasTeX.TeX import TeX +from unidecode import unidecode import sys import re import locale @@ -17,47 +19,16 @@ import warnings # Pattern set to ignore latex command in title prefix keywordPattern = re.compile(r"^%(\w+)\s?(.*)$") firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)") -iecPattern = re.compile(r"\IeC {\\(.*?)}") -replacePattern = { - '`A': 'À', - '`a': 'à', - '^a': 'â', - "'a": 'á', - "~a": 'ã', - 'oe': 'œ', - "'e" : 'é', - "`e" : 'è', - "^e" : 'ê', - '"e' : 'ë', - "'E" : 'É', - "`E" : 'È', - "'o" : 'ó', - "^o" : 'ô', - r'"\i' : 'i', - r'^\i' : 'i', - '"u' : 'ü', - '`u' : 'ù', - '`u' : 'ù', - '~n' : 'ñ', - "c C" : 'Ç', - "c c" : 'ç', - "textquoteright" : "'", -} def sortkey(value): ''' From a title, return something usable for sorting. It handles locale (but - don't forget to call locale.setlocale(locale.LC_ALL, '')). It also try to - handle the sort with crappy latex escape sequences. Some chars may not be - handled by this function, so add them to *replacePattern* dictionnary. + don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles + the sort with latex escape sequences. ''' - def repl(match): - try: - return replacePattern[match.group(1).strip()] - except KeyError: - warnings.warn("Error, no match to replace %s in %s. You should add it in the coresponding table in title_sort.py" % (match.group(0), match.group(1))) - - return locale.strxfrm(iecPattern.sub(repl, value).replace(' ', 'A')) + tex = TeX() + tex.input(value) + return locale.strxfrm(unidecode(tex.parse().textContent.replace(' ', 'A'))) def processSXDEntry(tab): return (tab[0], tab[1], tab[2])