diff --git a/patacrep/build.py b/patacrep/build.py index be9b48f4..1bc683a4 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -33,6 +33,7 @@ DEFAULT_CONFIG = { 'lang': 'english', 'content': [], 'titleprefixwords': [], + 'encoding': None, } @@ -91,6 +92,7 @@ class Songbook(object): config['template'], config['datadir'], config['lang'], + config['encoding'], ) config.update(renderer.get_variables()) config.update(self.config) diff --git a/patacrep/content/include.py b/patacrep/content/include.py index dcc969a9..03492544 100644 --- a/patacrep/content/include.py +++ b/patacrep/content/include.py @@ -48,15 +48,15 @@ def parse(keyword, config, argument, contentlist): filepath = load_from_datadirs(path, config) content_file = None try: - content_file = encoding.open_read(filepath, 'r') - new_content = json.load(content_file) + with encoding.open_read( + filepath, + encoding=config['encoding'] + ) as content_file: + new_content = json.load(content_file) except Exception as error: # pylint: disable=broad-except LOGGER.error(error) LOGGER.error("Error while loading file '{}'.".format(filepath)) sys.exit(1) - finally: - if content_file: - content_file.close() config["datadir"].append(os.path.abspath(os.path.dirname(filepath))) new_contentlist += process_content(new_content, config) diff --git a/patacrep/encoding.py b/patacrep/encoding.py index ca917295..a0501e4d 100644 --- a/patacrep/encoding.py +++ b/patacrep/encoding.py @@ -5,17 +5,27 @@ import codecs import chardet import logging +import contextlib LOGGER = logging.getLogger(__name__) -def open_read(filename, mode='r'): + +@contextlib.contextmanager +def open_read(filename, mode='r', encoding=None): """Open a file for reading, guessing the right encoding. Return a fileobject, reading unicode strings. + If `encoding` is set, use it as the encoding (do not guess). """ - return codecs.open( + if encoding is None: + fileencoding = chardet.detect(open(filename, 'rb').read())['encoding'] + else: + fileencoding = encoding + + with codecs.open( filename, mode=mode, - encoding=chardet.detect(open(filename, 'rb').read())['encoding'], + encoding=fileencoding, errors='replace', - ) + ) as fileobject: + yield fileobject diff --git a/patacrep/index.py b/patacrep/index.py index c715918a..351e155e 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -29,13 +29,9 @@ def process_sxd(filename): """ data = [] index_file = None - try: - index_file = encoding.open_read(filename, 'r') + with encoding.open_read(filename) as index_file: for line in index_file: data.append(line.strip()) - finally: - if index_file: - index_file.close() i = 1 idx = Index(data[0]) diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index fce5470f..8d0efed9 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -11,10 +11,11 @@ from patacrep.latex.syntax import tex2plain from patacrep.latex.syntax import parsesong as syntax_parsesong from patacrep import encoding -def parsesong(path): +def parsesong(path, fileencoding=None): """Return a dictonary of data read from the latex file `path`. """ - data = syntax_parsesong(encoding.open_read(path).read(), path) + with encoding.open_read(path, encoding=fileencoding) as songfile: + data = syntax_parsesong(songfile.read(), path) data['@path'] = path return data diff --git a/patacrep/songbook.py b/patacrep/songbook.py index e3bcf555..8417be91 100755 --- a/patacrep/songbook.py +++ b/patacrep/songbook.py @@ -14,7 +14,7 @@ import sys from patacrep.build import SongbookBuilder, DEFAULT_STEPS from patacrep import __version__ from patacrep import errors -from patacrep import encoding +import patacrep.encoding # Logging configuration logging.basicConfig(level=logging.INFO) @@ -100,17 +100,19 @@ def main(): basename = os.path.basename(songbook_path)[:-3] - songbook_file = None try: - songbook_file = encoding.open_read(songbook_path) - songbook = json.load(songbook_file) + with patacrep.encoding.open_read(songbook_path) as songbook_file: + songbook = json.load(songbook_file) + if 'encoding' in songbook: + with patacrep.encoding.open_read( + songbook_path, + encoding=songbook['encoding'] + ) as songbook_file: + songbook = json.load(songbook_file) except Exception as error: # pylint: disable=broad-except LOGGER.error(error) LOGGER.error("Error while loading file '{}'.".format(songbook_path)) sys.exit(1) - finally: - if songbook_file: - songbook_file.close() # Gathering datadirs datadirs = [] diff --git a/patacrep/songs/__init__.py b/patacrep/songs/__init__.py index 512a2ced..0a1072f9 100644 --- a/patacrep/songs/__init__.py +++ b/patacrep/songs/__init__.py @@ -100,6 +100,8 @@ class Song(Content): def __init__(self, datadir, subpath, config): self.fullpath = os.path.join(datadir, subpath) self.datadir = datadir + self.encoding = config["encoding"] + if datadir: # Only songs in datadirs are cached self._filehash = hashlib.md5( diff --git a/patacrep/songs/tex.py b/patacrep/songs/tex.py index b5710346..c2a51d62 100644 --- a/patacrep/songs/tex.py +++ b/patacrep/songs/tex.py @@ -13,7 +13,7 @@ class TexRenderer(Song): def parse(self): """Parse song and set metadata.""" - self.data = parsesong(self.fullpath) + self.data = parsesong(self.fullpath, self.encoding) self.titles = self.data['@titles'] self.languages = self.data['@languages'] self.authors = self.data['by'] diff --git a/patacrep/templates.py b/patacrep/templates.py index d1e9a616..be507a27 100644 --- a/patacrep/templates.py +++ b/patacrep/templates.py @@ -9,7 +9,8 @@ import os import re import json -from patacrep import encoding, errors, files +from patacrep import errors, files +import patacrep.encoding _LATEX_SUBS = ( (re.compile(r'\\'), r'\\textbackslash'), @@ -67,7 +68,7 @@ def _escape_tex(value): class TexRenderer(object): """Render a template to a LaTeX file.""" - def __init__(self, template, datadirs, lang): + def __init__(self, template, datadirs, lang, encoding=None): '''Start a new jinja2 environment for .tex creation. Arguments: @@ -75,8 +76,10 @@ class TexRenderer(object): - datadirs: list of locations of the data directory (which may contain file /templates/