From a0c3e83e2f2ba7ca8df8b2255ccf0a3f134cbe82 Mon Sep 17 00:00:00 2001
From: Oliverpool <oliverpool@hotmail.fr>
Date: Sat, 7 Nov 2015 11:27:51 +0100
Subject: [PATCH] Detect file encoding with a priority list

---
 patacrep/encoding.py | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/patacrep/encoding.py b/patacrep/encoding.py
index fd58fc2f..4cecd11a 100644
--- a/patacrep/encoding.py
+++ b/patacrep/encoding.py
@@ -16,15 +16,32 @@ def open_read(filename, mode='r', encoding=None):
     If `encoding` is set, use it as the encoding (do not guess).
     """
     if encoding is None:
-        with open(filename, 'rb') as file:
-            fileencoding = chardet.detect(file.read())['encoding']
-    else:
-        fileencoding = encoding
+        encoding = detect_encoding(filename)
 
     with codecs.open(
         filename,
         mode=mode,
-        encoding=fileencoding,
+        encoding=encoding,
         errors='replace',
         ) as fileobject:
         yield fileobject
+
+def detect_encoding(filename):
+    """Return the most likely encoding of the file
+    """
+    encodings = ['utf-8', 'windows-1250', 'windows-1252']
+    for e in encodings:
+        try:
+            fh = codecs.open(filename, 'r', encoding=e)
+            fh.readlines()
+            fh.seek(0)
+        except UnicodeDecodeError:
+            pass
+        else:
+            if e != 'utf-8':
+                print('Opening `%s` with `%s` encoding' % (filename, e))
+            return e
+        finally:
+            fh.close()
+
+    raise Exception('Not suitable encoding found for {}'.format(filename))