|
|
@ -9,6 +9,8 @@ |
|
|
|
# src is the .sxd file generated by latex |
|
|
|
# |
|
|
|
|
|
|
|
from plasTeX.TeX import TeX |
|
|
|
from unidecode import unidecode |
|
|
|
import sys |
|
|
|
import re |
|
|
|
import locale |
|
|
@ -17,47 +19,16 @@ import warnings |
|
|
|
# Pattern set to ignore latex command in title prefix |
|
|
|
keywordPattern = re.compile(r"^%(\w+)\s?(.*)$") |
|
|
|
firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)") |
|
|
|
iecPattern = re.compile(r"\IeC {\\(.*?)}") |
|
|
|
replacePattern = { |
|
|
|
'`A': 'À', |
|
|
|
'`a': 'à', |
|
|
|
'^a': 'â', |
|
|
|
"'a": 'á', |
|
|
|
"~a": 'ã', |
|
|
|
'oe': 'œ', |
|
|
|
"'e" : 'é', |
|
|
|
"`e" : 'è', |
|
|
|
"^e" : 'ê', |
|
|
|
'"e' : 'ë', |
|
|
|
"'E" : 'É', |
|
|
|
"`E" : 'È', |
|
|
|
"'o" : 'ó', |
|
|
|
"^o" : 'ô', |
|
|
|
r'"\i' : 'i', |
|
|
|
r'^\i' : 'i', |
|
|
|
'"u' : 'ü', |
|
|
|
'`u' : 'ù', |
|
|
|
'`u' : 'ù', |
|
|
|
'~n' : 'ñ', |
|
|
|
"c C" : 'Ç', |
|
|
|
"c c" : 'ç', |
|
|
|
"textquoteright" : "'", |
|
|
|
} |
|
|
|
|
|
|
|
def sortkey(value): |
|
|
|
''' |
|
|
|
From a title, return something usable for sorting. It handles locale (but |
|
|
|
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also try to |
|
|
|
handle the sort with crappy latex escape sequences. Some chars may not be |
|
|
|
handled by this function, so add them to *replacePattern* dictionnary. |
|
|
|
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles |
|
|
|
the sort with latex escape sequences. |
|
|
|
''' |
|
|
|
def repl(match): |
|
|
|
try: |
|
|
|
return replacePattern[match.group(1).strip()] |
|
|
|
except KeyError: |
|
|
|
warnings.warn("Error, no match to replace %s in %s. You should add it in the coresponding table in title_sort.py" % (match.group(0), match.group(1))) |
|
|
|
|
|
|
|
return locale.strxfrm(iecPattern.sub(repl, value).replace(' ', 'A')) |
|
|
|
tex = TeX() |
|
|
|
tex.input(value) |
|
|
|
return locale.strxfrm(unidecode(tex.parse().textContent.replace(' ', 'A'))) |
|
|
|
|
|
|
|
def processSXDEntry(tab): |
|
|
|
return (tab[0], tab[1], tab[2]) |
|
|
|