Browse Source

WIP: gestion des problemes d'encodages des index

pull/27/head
Luthaf 11 years ago
parent
commit
cfcfe719ba
  1. 4
      songbook_core/build.py
  2. 29
      songbook_core/index.py
  3. 4
      songbook_core/plastex.py

4
songbook_core/build.py

@ -253,8 +253,8 @@ class SongbookBuilder(object):
for sxd_file in sxd_files: for sxd_file in sxd_files:
LOGGER.debug("Processing " + sxd_file) LOGGER.debug("Processing " + sxd_file)
idx = process_sxd(sxd_file) idx = process_sxd(sxd_file)
with open(sxd_file[:-3] + "sbx", "w") as index_file: with codecs.open(sxd_file[:-3] + "sbx", "w", "utf-8") as index_file:
index_file.write(idx.entries_to_str().encode('utf8')) index_file.write(idx.entries_to_str())
@staticmethod @staticmethod
def build_custom(command): def build_custom(command):

29
songbook_core/index.py

@ -11,11 +11,12 @@ from a file generated by the latex compilation of the songbook (.sxd).
from unidecode import unidecode from unidecode import unidecode
import locale import locale
import re import re
import codecs
from songbook_core.authors import processauthors from songbook_core.authors import processauthors
from songbook_core.plastex import simpleparse from songbook_core.plastex import simpleparse
EOL = "\n" EOL = u"\n"
# Pattern set to ignore latex command in title prefix # Pattern set to ignore latex command in title prefix
KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$") KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$")
@ -37,11 +38,10 @@ def process_sxd(filename):
Return an Index object. Return an Index object.
""" """
index_file = open(filename)
data = [] data = []
for line in index_file: with codecs.open(filename, 'r', 'utf-8') as index_file:
data.append(line.strip()) for line in index_file:
index_file.close() data.append(line.strip())
i = 1 i = 1
idx = Index(data[0]) idx = Index(data[0])
@ -124,7 +124,7 @@ class Index(object):
def _raw_add(self, key, number, link): def _raw_add(self, key, number, link):
"""Add a song to the list. """Add a song to the list.
No processing is done one data. It is added raw. See add() for a No processing is done on data. It is added raw. See add() for a
similar method with processing. similar method with processing.
""" """
first = self.get_first_letter(key) first = self.get_first_letter(key)
@ -142,13 +142,16 @@ class Index(object):
if self.indextype == "TITLE": if self.indextype == "TITLE":
# Removing prefixes before titles # Removing prefixes before titles
for pattern in self.prefix_patterns: for pattern in self.prefix_patterns:
match = pattern.match(key) match = pattern.match(key.encode('utf-8'))
if match: if match:
self._raw_add( self._raw_add(
"{} ({})".format( "{} ({})".format(
match.group(2) + match.group(3), match.group(2) + match.group(3),
match.group(1)), match.group(1)
number, link) ),
number,
link
)
return return
self._raw_add(key, number, link) self._raw_add(key, number, link)
@ -162,13 +165,15 @@ class Index(object):
@staticmethod @staticmethod
def ref_to_str(ref): def ref_to_str(ref):
"""Return the LaTeX code corresponding to the reference.""" """Return the LaTeX code corresponding to the reference."""
return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
def entry_to_str(self, key, entry): def entry_to_str(self, key, entry):
"""Return the LaTeX code corresponding to the entry.""" """Return the LaTeX code corresponding to the entry."""
return unicode(r'\idxentry{{{0}}}{{{1}}}' + EOL).format( if not isinstance(key, unicode):
key = unicode(key, "UTF-8")
return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format(
key, key,
r'\\'.join([self.ref_to_str(ref) for ref in entry]), ur'\\'.join([self.ref_to_str(ref) for ref in entry]),
) )
def idxblock_to_str(self, letter, entries): def idxblock_to_str(self, letter, entries):

4
songbook_core/plastex.py

@ -38,7 +38,9 @@ def simpleparse(text):
"""Parse a simple LaTeX string. """Parse a simple LaTeX string.
""" """
tex = TeX() tex = TeX()
tex.input(text.decode('utf8')) if not isinstance(text, unicode):
text = text.decode("utf-8")
tex.input(text)
doc = tex.parse() doc = tex.parse()
return process_unbr_spaces(doc.textContent) return process_unbr_spaces(doc.textContent)

Loading…
Cancel
Save