Browse Source

Merge pull request #161 from patacrep/file_encoding

Detect file encoding with a priority list
pull/168/head
oliverpool 9 years ago
parent
commit
98de8bc040
  1. 1
      Requirements.txt
  2. 27
      patacrep/encoding.py
  3. 2
      setup.py
  4. 2
      test/test_compilation/syntax.tex.control

1
Requirements.txt

@ -1,4 +1,3 @@
ply
Jinja2==2.7.3
chardet==2.2.1
unidecode>=0.04.16

27
patacrep/encoding.py

@ -1,7 +1,6 @@
"""Dealing with encoding problems."""
import codecs
import chardet
import logging
import contextlib
@ -16,15 +15,31 @@ def open_read(filename, mode='r', encoding=None):
If `encoding` is set, use it as the encoding (do not guess).
"""
if encoding is None:
with open(filename, 'rb') as file:
fileencoding = chardet.detect(file.read())['encoding']
else:
fileencoding = encoding
encoding = detect_encoding(filename)
with codecs.open(
filename,
mode=mode,
encoding=fileencoding,
encoding=encoding,
errors='replace',
) as fileobject:
yield fileobject
def detect_encoding(filename):
"""Return the most likely encoding of the file
"""
encodings = ['utf-8', 'windows-1250', 'windows-1252']
for encoding in encodings:
try:
filehandler = codecs.open(filename, 'r', encoding=encoding)
filehandler.readlines()
filehandler.seek(0)
except UnicodeDecodeError:
pass
else:
if encoding != 'utf-8':
LOGGER.info('Opening `{}` with `{}` encoding'.format(filename, encoding))
return encoding
finally:
filehandler.close()
raise UnicodeError('Not suitable encoding found for {}'.format(filename))

2
setup.py

@ -22,7 +22,7 @@ setup(
packages=find_packages(exclude=["test*"]),
license="GPLv2 or any later version",
install_requires=[
"unidecode", "jinja2", "chardet", "ply",
"unidecode", "jinja2", "ply",
],
setup_requires=["hgtools"],
package_data={'patacrep': data_files},

2
test/test_compilation/syntax.tex.control

@ -92,7 +92,7 @@ guitar,
\selectlanguage{english}
\beginsong{Song with Sharp in musicnote}[
\beginsong{Song with Sharp in musicnote}[
by={
},
]

Loading…
Cancel
Save