mirror of https://github.com/patacrep/patacrep.git
Engine for LaTeX songbooks
http://www.patacrep.com
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.4 KiB
48 lines
1.4 KiB
# -*- coding: utf-8 -*-
|
|
|
|
"""Dealing with encoding problems."""
|
|
|
|
import codecs
|
|
import chardet
|
|
import logging
|
|
from unidecode import unidecode as unidecode_orig
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
def open_read(filename, mode='r'):
|
|
"""Open a file for reading, guessing the right encoding.
|
|
|
|
Return a fileobject, reading unicode strings.
|
|
"""
|
|
return codecs.open(
|
|
filename,
|
|
mode=mode,
|
|
encoding=chardet.detect(open(filename, "r").read())['encoding'],
|
|
errors='replace',
|
|
)
|
|
|
|
def basestring2unicode(arg):
|
|
"""Return the unicode version of the argument, guessing original encoding.
|
|
"""
|
|
if isinstance(arg, unicode):
|
|
return arg
|
|
elif isinstance(arg, basestring):
|
|
return arg.decode(
|
|
encoding=chardet.detect(arg)['encoding'],
|
|
errors='replace',
|
|
)
|
|
else:
|
|
LOGGER.warning("Cannot decode string {}. Ignored.".format(str(arg)))
|
|
return ""
|
|
|
|
def list2unicode(arg):
|
|
"""Return the unicode version of the argument, guessing original encoding.
|
|
|
|
Argument is a list of strings. If an item is of another type, it is
|
|
silently ignored (an empty string is returned).
|
|
"""
|
|
return [basestring2unicode(item) for item in arg]
|
|
|
|
def unidecode(arg):
|
|
"""Return a unicode version of a unidecoded string."""
|
|
return unicode(unidecode_orig(arg))
|
|
|