|
@ -16,15 +16,32 @@ def open_read(filename, mode='r', encoding=None): |
|
|
If `encoding` is set, use it as the encoding (do not guess). |
|
|
If `encoding` is set, use it as the encoding (do not guess). |
|
|
""" |
|
|
""" |
|
|
if encoding is None: |
|
|
if encoding is None: |
|
|
with open(filename, 'rb') as file: |
|
|
encoding = detect_encoding(filename) |
|
|
fileencoding = chardet.detect(file.read())['encoding'] |
|
|
|
|
|
else: |
|
|
|
|
|
fileencoding = encoding |
|
|
|
|
|
|
|
|
|
|
|
with codecs.open( |
|
|
with codecs.open( |
|
|
filename, |
|
|
filename, |
|
|
mode=mode, |
|
|
mode=mode, |
|
|
encoding=fileencoding, |
|
|
encoding=encoding, |
|
|
errors='replace', |
|
|
errors='replace', |
|
|
) as fileobject: |
|
|
) as fileobject: |
|
|
yield fileobject |
|
|
yield fileobject |
|
|
|
|
|
|
|
|
|
|
|
def detect_encoding(filename): |
|
|
|
|
|
"""Return the most likely encoding of the file |
|
|
|
|
|
""" |
|
|
|
|
|
encodings = ['utf-8', 'windows-1250', 'windows-1252'] |
|
|
|
|
|
for e in encodings: |
|
|
|
|
|
try: |
|
|
|
|
|
fh = codecs.open(filename, 'r', encoding=e) |
|
|
|
|
|
fh.readlines() |
|
|
|
|
|
fh.seek(0) |
|
|
|
|
|
except UnicodeDecodeError: |
|
|
|
|
|
pass |
|
|
|
|
|
else: |
|
|
|
|
|
if e != 'utf-8': |
|
|
|
|
|
print('Opening `%s` with `%s` encoding' % (filename, e)) |
|
|
|
|
|
return e |
|
|
|
|
|
finally: |
|
|
|
|
|
fh.close() |
|
|
|
|
|
|
|
|
|
|
|
raise Exception('Not suitable encoding found for {}'.format(filename)) |
|
|