From cfcfe719baac6fb5379b6afb9ab858da48c62a17 Mon Sep 17 00:00:00 2001 From: Luthaf Date: Mon, 14 Apr 2014 21:45:42 +0100 Subject: [PATCH] WIP: gestion des problemes d'encodages des index --- songbook_core/build.py | 4 ++-- songbook_core/index.py | 29 +++++++++++++++++------------ songbook_core/plastex.py | 4 +++- 3 files changed, 22 insertions(+), 15 deletions(-) mode change 100644 => 100755 songbook_core/index.py diff --git a/songbook_core/build.py b/songbook_core/build.py index bd50d0a3..a3bb9adb 100644 --- a/songbook_core/build.py +++ b/songbook_core/build.py @@ -253,8 +253,8 @@ class SongbookBuilder(object): for sxd_file in sxd_files: LOGGER.debug("Processing " + sxd_file) idx = process_sxd(sxd_file) - with open(sxd_file[:-3] + "sbx", "w") as index_file: - index_file.write(idx.entries_to_str().encode('utf8')) + with codecs.open(sxd_file[:-3] + "sbx", "w", "utf-8") as index_file: + index_file.write(idx.entries_to_str()) @staticmethod def build_custom(command): diff --git a/songbook_core/index.py b/songbook_core/index.py old mode 100644 new mode 100755 index 75045c2d..504082d9 --- a/songbook_core/index.py +++ b/songbook_core/index.py @@ -11,11 +11,12 @@ from a file generated by the latex compilation of the songbook (.sxd). from unidecode import unidecode import locale import re +import codecs from songbook_core.authors import processauthors from songbook_core.plastex import simpleparse -EOL = "\n" +EOL = u"\n" # Pattern set to ignore latex command in title prefix KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$") @@ -37,11 +38,10 @@ def process_sxd(filename): Return an Index object. """ - index_file = open(filename) data = [] - for line in index_file: - data.append(line.strip()) - index_file.close() + with codecs.open(filename, 'r', 'utf-8') as index_file: + for line in index_file: + data.append(line.strip()) i = 1 idx = Index(data[0]) @@ -124,7 +124,7 @@ class Index(object): def _raw_add(self, key, number, link): """Add a song to the list. - No processing is done one data. It is added raw. See add() for a + No processing is done on data. It is added raw. See add() for a similar method with processing. """ first = self.get_first_letter(key) @@ -142,13 +142,16 @@ class Index(object): if self.indextype == "TITLE": # Removing prefixes before titles for pattern in self.prefix_patterns: - match = pattern.match(key) + match = pattern.match(key.encode('utf-8')) if match: self._raw_add( "{} ({})".format( match.group(2) + match.group(3), - match.group(1)), - number, link) + match.group(1) + ), + number, + link + ) return self._raw_add(key, number, link) @@ -162,13 +165,15 @@ class Index(object): @staticmethod def ref_to_str(ref): """Return the LaTeX code corresponding to the reference.""" - return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) + return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) def entry_to_str(self, key, entry): """Return the LaTeX code corresponding to the entry.""" - return unicode(r'\idxentry{{{0}}}{{{1}}}' + EOL).format( + if not isinstance(key, unicode): + key = unicode(key, "UTF-8") + return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format( key, - r'\\'.join([self.ref_to_str(ref) for ref in entry]), + ur'\\'.join([self.ref_to_str(ref) for ref in entry]), ) def idxblock_to_str(self, letter, entries): diff --git a/songbook_core/plastex.py b/songbook_core/plastex.py index 3f3cefc9..84b29b96 100644 --- a/songbook_core/plastex.py +++ b/songbook_core/plastex.py @@ -38,7 +38,9 @@ def simpleparse(text): """Parse a simple LaTeX string. """ tex = TeX() - tex.input(text.decode('utf8')) + if not isinstance(text, unicode): + text = text.decode("utf-8") + tex.input(text) doc = tex.parse() return process_unbr_spaces(doc.textContent)