Browse Source

Merge pull request #161 from patacrep/file_encoding

Detect file encoding with a priority list
pull/168/head
oliverpool 9 years ago
parent
commit
98de8bc040
  1. 1
      Requirements.txt
  2. 27
      patacrep/encoding.py
  3. 2
      setup.py
  4. 2
      test/test_compilation/syntax.tex.control

1
Requirements.txt

@ -1,4 +1,3 @@
ply ply
Jinja2==2.7.3 Jinja2==2.7.3
chardet==2.2.1
unidecode>=0.04.16 unidecode>=0.04.16

27
patacrep/encoding.py

@ -1,7 +1,6 @@
"""Dealing with encoding problems.""" """Dealing with encoding problems."""
import codecs import codecs
import chardet
import logging import logging
import contextlib import contextlib
@ -16,15 +15,31 @@ def open_read(filename, mode='r', encoding=None):
If `encoding` is set, use it as the encoding (do not guess). If `encoding` is set, use it as the encoding (do not guess).
""" """
if encoding is None: if encoding is None:
with open(filename, 'rb') as file: encoding = detect_encoding(filename)
fileencoding = chardet.detect(file.read())['encoding']
else:
fileencoding = encoding
with codecs.open( with codecs.open(
filename, filename,
mode=mode, mode=mode,
encoding=fileencoding, encoding=encoding,
errors='replace', errors='replace',
) as fileobject: ) as fileobject:
yield fileobject yield fileobject
def detect_encoding(filename):
"""Return the most likely encoding of the file
"""
encodings = ['utf-8', 'windows-1250', 'windows-1252']
for encoding in encodings:
try:
filehandler = codecs.open(filename, 'r', encoding=encoding)
filehandler.readlines()
filehandler.seek(0)
except UnicodeDecodeError:
pass
else:
if encoding != 'utf-8':
LOGGER.info('Opening `{}` with `{}` encoding'.format(filename, encoding))
return encoding
finally:
filehandler.close()
raise UnicodeError('Not suitable encoding found for {}'.format(filename))

2
setup.py

@ -22,7 +22,7 @@ setup(
packages=find_packages(exclude=["test*"]), packages=find_packages(exclude=["test*"]),
license="GPLv2 or any later version", license="GPLv2 or any later version",
install_requires=[ install_requires=[
"unidecode", "jinja2", "chardet", "ply", "unidecode", "jinja2", "ply",
], ],
setup_requires=["hgtools"], setup_requires=["hgtools"],
package_data={'patacrep': data_files}, package_data={'patacrep': data_files},

2
test/test_compilation/syntax.tex.control

@ -92,7 +92,7 @@ guitar,
\selectlanguage{english} \selectlanguage{english}
\beginsong{Song with Sharp in musicnote}[ \beginsong{Song with Sharp in musicnote}[
by={ by={
}, },
] ]

Loading…
Cancel
Save