diff --git a/Requirements.txt b/Requirements.txt index 1673cecb..1fd1ebb8 100644 --- a/Requirements.txt +++ b/Requirements.txt @@ -1,4 +1,3 @@ ply Jinja2==2.7.3 -chardet==2.2.1 unidecode>=0.04.16 diff --git a/patacrep/encoding.py b/patacrep/encoding.py index fd58fc2f..3f1512cf 100644 --- a/patacrep/encoding.py +++ b/patacrep/encoding.py @@ -1,7 +1,6 @@ """Dealing with encoding problems.""" import codecs -import chardet import logging import contextlib @@ -16,15 +15,31 @@ def open_read(filename, mode='r', encoding=None): If `encoding` is set, use it as the encoding (do not guess). """ if encoding is None: - with open(filename, 'rb') as file: - fileencoding = chardet.detect(file.read())['encoding'] - else: - fileencoding = encoding + encoding = detect_encoding(filename) with codecs.open( filename, mode=mode, - encoding=fileencoding, + encoding=encoding, errors='replace', ) as fileobject: yield fileobject + +def detect_encoding(filename): + """Return the most likely encoding of the file + """ + encodings = ['utf-8', 'windows-1250', 'windows-1252'] + for encoding in encodings: + try: + filehandler = codecs.open(filename, 'r', encoding=encoding) + filehandler.readlines() + filehandler.seek(0) + except UnicodeDecodeError: + pass + else: + if encoding != 'utf-8': + LOGGER.info('Opening `{}` with `{}` encoding'.format(filename, encoding)) + return encoding + finally: + filehandler.close() + raise UnicodeError('Not suitable encoding found for {}'.format(filename)) diff --git a/setup.py b/setup.py index 8d8717d6..e7102bb9 100755 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( packages=find_packages(exclude=["test*"]), license="GPLv2 or any later version", install_requires=[ - "unidecode", "jinja2", "chardet", "ply", + "unidecode", "jinja2", "ply", ], setup_requires=["hgtools"], package_data={'patacrep': data_files}, diff --git a/test/test_compilation/syntax.tex.control b/test/test_compilation/syntax.tex.control index 2794e2fa..6f213ef2 100644 --- a/test/test_compilation/syntax.tex.control +++ b/test/test_compilation/syntax.tex.control @@ -92,7 +92,7 @@ guitar, \selectlanguage{english} -\beginsong{Song with Sharp in musicnote}[ +\beginsong{Song with Sharp in musicnote}[ by={ }, ]