Browse Source

Remplacement de la table de conversion \IeC => ASCII par un traitement par PlasTeX

Le code est plus propre, et tous les caractères sont gérés.
remotes/origin/next
Louis 12 years ago
parent
commit
60917ad715
  1. 43
      index.py

43
index.py

@ -9,6 +9,8 @@
# src is the .sxd file generated by latex # src is the .sxd file generated by latex
# #
from plasTeX.TeX import TeX
from unidecode import unidecode
import sys import sys
import re import re
import locale import locale
@ -17,47 +19,16 @@ import warnings
# Pattern set to ignore latex command in title prefix # Pattern set to ignore latex command in title prefix
keywordPattern = re.compile(r"^%(\w+)\s?(.*)$") keywordPattern = re.compile(r"^%(\w+)\s?(.*)$")
firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)") firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")
iecPattern = re.compile(r"\IeC {\\(.*?)}")
replacePattern = {
'`A': 'À',
'`a': 'à',
'^a': 'â',
"'a": 'á',
"~a": 'ã',
'oe': 'œ',
"'e" : 'é',
"`e" : 'è',
"^e" : 'ê',
'"e' : 'ë',
"'E" : 'É',
"`E" : 'È',
"'o" : 'ó',
"^o" : 'ô',
r'"\i' : 'i',
r'^\i' : 'i',
'"u' : 'ü',
'`u' : 'ù',
'`u' : 'ù',
'~n' : 'ñ',
"c C" : 'Ç',
"c c" : 'ç',
"textquoteright" : "'",
}
def sortkey(value): def sortkey(value):
''' '''
From a title, return something usable for sorting. It handles locale (but From a title, return something usable for sorting. It handles locale (but
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also try to don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
handle the sort with crappy latex escape sequences. Some chars may not be the sort with latex escape sequences.
handled by this function, so add them to *replacePattern* dictionnary.
''' '''
def repl(match): tex = TeX()
try: tex.input(value)
return replacePattern[match.group(1).strip()] return locale.strxfrm(unidecode(tex.parse().textContent.replace(' ', 'A')))
except KeyError:
warnings.warn("Error, no match to replace %s in %s. You should add it in the coresponding table in title_sort.py" % (match.group(0), match.group(1)))
return locale.strxfrm(iecPattern.sub(repl, value).replace(' ', 'A'))
def processSXDEntry(tab): def processSXDEntry(tab):
return (tab[0], tab[1], tab[2]) return (tab[0], tab[1], tab[2])

Loading…
Cancel
Save