Merge pull request #27 from patacrep/encoding

Gestion des problemes d'encodages des index
12 years ago · 80713d4a95
3 changed files with 22 additions and 15 deletions
--- a/songbook_core/build.py
+++ b/songbook_core/build.py
@ -253,8 +253,8 @@ class SongbookBuilder(object):
        for sxd_file in sxd_files:
            LOGGER.debug("Processing " + sxd_file)
            idx = process_sxd(sxd_file)
-            with open(sxd_file[:-3] + "sbx", "w") as index_file:
+            with codecs.open(sxd_file[:-3] + "sbx", "w", "utf-8") as index_file:
-                index_file.write(idx.entries_to_str().encode('utf8'))
+                index_file.write(idx.entries_to_str())
    @staticmethod
    def build_custom(command):
--- a/songbook_core/index.py
+++ b/songbook_core/index.py
@ -11,11 +11,12 @@ from a file generated by the latex compilation of the songbook (.sxd).
 from unidecode import unidecode
 import locale
 import re
 import codecs
 from songbook_core.authors import processauthors
 from songbook_core.plastex import simpleparse
-EOL = "\n"
+EOL = u"\n"
 # Pattern set to ignore latex command in title prefix
 KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$")
@ -37,11 +38,10 @@ def process_sxd(filename):
    Return an Index object.
    """
    index_file = open(filename)
    data = []
-    for line in index_file:
+    with codecs.open(filename, 'r', 'utf-8') as index_file:
-        data.append(line.strip())
+	for line in index_file:
-    index_file.close()
+            data.append(line.strip())
    i = 1
    idx = Index(data[0])
@ -124,7 +124,7 @@ class Index(object):
    def _raw_add(self, key, number, link):
        """Add a song to the list.
-        No processing is done one data. It is added raw. See add() for a
+        No processing is done on data. It is added raw. See add() for a
        similar method with processing.
        """
        first = self.get_first_letter(key)
@ -142,13 +142,16 @@ class Index(object):
        if self.indextype == "TITLE":
            # Removing prefixes before titles
            for pattern in self.prefix_patterns:
-                match = pattern.match(key)
+                match = pattern.match(key.encode('utf-8'))
                if match:
                    self._raw_add(
                                  "{} ({})".format(
                                        match.group(2) + match.group(3),
-                                        match.group(1)),
+                                        match.group(1)
-                                  number, link)
+						                            ),
                                    number,
 				                    link
 				                    )
                    return
            self._raw_add(key, number, link)
@ -162,13 +165,15 @@ class Index(object):
    @staticmethod
    def ref_to_str(ref):
        """Return the LaTeX code corresponding to the reference."""
-        return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
+	return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
    def entry_to_str(self, key, entry):
        """Return the LaTeX code corresponding to the entry."""
-        return unicode(r'\idxentry{{{0}}}{{{1}}}' + EOL).format(
+	if not isinstance(key, unicode):
              key = unicode(key, "UTF-8")
 	return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format(
                key,
-                r'\\'.join([self.ref_to_str(ref) for ref in entry]),
+                ur'\\'.join([self.ref_to_str(ref) for ref in entry]),
                )
    def idxblock_to_str(self, letter, entries):
--- a/songbook_core/plastex.py
+++ b/songbook_core/plastex.py
@ -38,7 +38,9 @@ def simpleparse(text):
    """Parse a simple LaTeX string.
    """
    tex = TeX()
-    tex.input(text.decode('utf8'))
+    if not isinstance(text, unicode):
        text = text.decode("utf-8")
    tex.input(text)
    doc = tex.parse()
    return process_unbr_spaces(doc.textContent)