Browse Source

Merge branch 'master' into cache (mainly managing string encoding)

Conflicts:
	patacrep/content/song.py
	patacrep/songs.py
pull/51/head
Louis 11 years ago
parent
commit
e3edc2b9e7
  1. 3
      Requirements.txt
  2. 15
      patacrep/authors.py
  3. 2
      patacrep/content/__init__.py
  4. 64
      patacrep/content/include.py
  5. 4
      patacrep/content/section.py
  6. 6
      patacrep/content/song.py
  7. 2
      patacrep/content/songsection.py
  8. 5
      patacrep/content/sorted.py
  9. 2
      patacrep/content/tex.py
  10. 24
      patacrep/data/examples/example_encoding.sb
  11. 2
      patacrep/data/templates/default.tex
  12. 49
      patacrep/encoding.py
  13. 34
      patacrep/index.py
  14. 6
      patacrep/plastex.py
  15. 2
      patacrep/plastex_chord.py
  16. 14
      patacrep/plastex_songs.py
  17. 26
      patacrep/songs.py
  18. 27
      patacrep/templates.py
  19. 11
      songbook

3
Requirements.txt

@ -1,3 +1,4 @@
Jinja2==2.7.3
argparse==1.2.1
-e git+https://github.com/tiarno/plastex#egg=plasTeX
chardet==2.2.1
https://github.com/tiarno/plastex/archive/master.zip

15
patacrep/authors.py

@ -14,16 +14,6 @@ DEFAULT_AUTHWORDS = {
"sep": ["and"],
}
def to_utf8(string):
"""Convert a string (encoded in unicode or iso-8859-1 to utf-8"""
if type(string) is unicode:
return string.encode('utf-8')
elif type(string) is str:
return string.decode('iso-8859-1').encode('utf-8')
else:
LOGGER.warning("Ignoring a word I can not decode...")
return None
def compile_authwords(authwords):
"""Convert strings of authwords to compiled regular expressions.
@ -43,7 +33,6 @@ def compile_authwords(authwords):
re.compile(r"^(.*)%s +(.*)$" % word, re.LOCALE)
for word in ([" %s" % word for word in authwords['sep']] + [','])
]
authwords['ignore'] = [to_utf8(word) for word in authwords['ignore'] if to_utf8(word)]
return authwords
@ -154,7 +143,7 @@ def processauthors_ignore_authors(authors_list, ignore):
for author in authors_list:
ignored = False
for ignoreword in ignore:
if author.find(str(ignoreword)) != -1:
if author.find(ignoreword) != -1:
ignored = True
break
if not ignored:
@ -182,7 +171,7 @@ def processauthors_invert_names(authors_list):
for author in authors_list:
first, last = split_author_names(author)
if first:
dest.append(r"\indexauthor{{{first}}}{{{last}}}".format(
dest.append(ur"\indexauthor{{{first}}}{{{last}}}".format(
first=first.strip(),
last=last.strip(),
))

2
patacrep/content/__init__.py

@ -225,7 +225,7 @@ def process_content(content, config=None):
"""
contentlist = []
plugins = load_plugins(config)
keyword_re = re.compile(r'^ *(?P<keyword>\w*) *(\((?P<argument>.*)\))? *$')
keyword_re = re.compile(ur'^ *(?P<keyword>\w*) *(\((?P<argument>.*)\))? *$')
if not content:
content = [["song"]]
for elem in content:

64
patacrep/content/include.py

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
"""Include an external list of songs
This plugin provides keyword 'include', used to include an external list of
songs in JSON format.
"""
import json
import os
import sys
import logging
from patacrep.content import process_content, ContentError
from patacrep import encoding
LOGGER = logging.getLogger(__name__)
def load_from_datadirs(path, config=None):
"""Load 'path' from one of the datadirs.
Raise an exception if it was found if none of the datadirs of 'config'.
"""
for datadir in config.get("datadir", []):
filepath = os.path.join(datadir, path)
if os.path.exists(filepath):
return filepath
# File not found
raise ContentError("include", "The file '{0}' was not found in the "
"datadirs.".format(path))
#pylint: disable=unused-argument
def parse(keyword, config, argument, contentlist):
"""Include an external file content.
Arguments:
- keyword: the string 'include';
- config: the current songbook configuration dictionary;
- argument: None;
- contentlist: a list of file paths to be included.
"""
new_contentlist = []
for path in contentlist:
filepath = load_from_datadirs(path, config)
content_file = None
try:
content_file = encoding.open_read(filepath, 'r')
new_content = json.load(content_file)
except Exception as error: # pylint: disable=broad-except
LOGGER.error(error)
LOGGER.error("Error while loading file '{}'.".format(filepath))
sys.exit(1)
finally:
if content_file:
content_file.close()
config["datadir"].append(os.path.abspath(os.path.dirname(filepath)))
new_contentlist += process_content(new_content, config)
config["datadir"].pop()
return new_contentlist
CONTENT_PLUGINS = {'include': parse}

4
patacrep/content/section.py

@ -26,9 +26,9 @@ class Section(Content):
def render(self, __context):
if self.short is None:
return r'\{}{{{}}}'.format(self.keyword, self.name)
return ur'\{}{{{}}}'.format(self.keyword, self.name)
else:
return r'\{}[{}]{{{}}}'.format(self.keyword, self.short, self.name)
return ur'\{}[{}]{{{}}}'.format(self.keyword, self.short, self.name)
#pylint: disable=unused-argument
def parse(keyword, argument, contentlist, config):

6
patacrep/content/song.py

@ -26,15 +26,15 @@ class SongRenderer(Content, Song):
indexes = context.resolve("indexes")
if isinstance(indexes, jinja2.runtime.Undefined):
indexes = ""
return r'\begin{songs}{%s}' % indexes
return ur'\begin{songs}{%s}' % indexes
def end_block(self, __context):
"""Return the string to end a block."""
return r'\end{songs}'
return ur'\end{songs}'
def render(self, context):
"""Return the string that will render the song."""
return r'\input{{{}}}'.format(files.relpath(
return ur'\input{{{}}}'.format(files.relpath(
self.fullpath,
os.path.dirname(context['filename'])
))

2
patacrep/content/songsection.py

@ -19,7 +19,7 @@ class SongSection(Content):
def render(self, __context):
"""Render this section or chapter."""
return r'\{}{{{}}}'.format(self.keyword, self.name)
return ur'\{}{{{}}}'.format(self.keyword, self.name)
#pylint: disable=unused-argument
def parse(keyword, argument, contentlist, config):

5
patacrep/content/sorted.py

@ -11,6 +11,7 @@ import locale
import logging
from patacrep import files
from patacrep import encoding
from patacrep.content import ContentError
from patacrep.content.song import OnlySongsError, process_songs
@ -26,7 +27,7 @@ def normalize_string(string):
- lower case;
- passed through locale.strxfrm().
"""
return locale.strxfrm(string.lower().strip())
return locale.strxfrm(encoding.unidecode(string.lower().strip()))
def normalize_field(field):
"""Return a normalized field, it being a string or a list of strings."""
@ -62,7 +63,7 @@ def key_generator(sort):
files.relpath(song.fullpath),
)
)
field = ""
field = u""
songkey.append(normalize_field(field))
return songkey
return ordered_song_keys

2
patacrep/content/tex.py

@ -18,7 +18,7 @@ class LaTeX(Content):
self.filename = filename
def render(self, context):
return r'\input{{{}}}'.format(files.relpath(
return ur'\input{{{}}}'.format(files.relpath(
self.filename,
os.path.dirname(context['filename']),
))

24
patacrep/data/examples/example_encoding.sb

@ -0,0 +1,24 @@
{
"bookoptions" : [
"importantdiagramonly",
"repeatchords",
"lilypond",
"pictures"
],
"booktype" : "chorded",
"lang" : "french",
"authwords" : {
"sep" : ["and", "et", "À"],
"ignore" : ["À"],
"after" : ["À"]
},
"titleprefixwords": ["À"],
"datadir" : ".",
"content" : [["section", "Traditional"],
"chevaliers_de_la_table_ronde.sg",
"greensleeves.sg",
"vent_frais.sg",
["section", "Example"],
"example-fr.sg",
"example-en.sg"]
}

2
patacrep/data/templates/default.tex

@ -117,9 +117,11 @@
(* block chords *)
% list of chords
\ifchorded
\ifdiagram
\phantomsection
\addcontentsline{toc}{section}{\chordlistname}
\chords
\fi
\fi
(* endblock *)

49
patacrep/encoding.py

@ -0,0 +1,49 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Dealing with encoding problems."""
import codecs
import chardet
import logging
from unidecode import unidecode as unidecode_orig
LOGGER = logging.getLogger(__name__)
def open_read(filename, mode='r'):
"""Open a file for reading, guessing the right encoding.
Return a fileobject, reading unicode strings.
"""
return codecs.open(
filename,
mode=mode,
encoding=chardet.detect(open(filename, "r").read())['encoding'],
errors='replace',
)
def basestring2unicode(arg):
"""Return the unicode version of the argument, guessing original encoding.
"""
if isinstance(arg, unicode):
return arg
elif isinstance(arg, basestring):
return arg.decode(
encoding=chardet.detect(arg)['encoding'],
errors='replace',
)
else:
LOGGER.warning("Cannot decode string {}. Ignored.".format(str(arg)))
return ""
def list2unicode(arg):
"""Return the unicode version of the argument, guessing original encoding.
Argument is a list of strings. If an item is of another type, it is
silently ignored (an empty string is returned).
"""
return [basestring2unicode(item) for item in arg]
def unidecode(arg):
"""Return a unicode version of a unidecoded string."""
return unicode(unidecode_orig(arg))

34
patacrep/index.py

@ -8,19 +8,18 @@ the original makeindex program written in C that produces an index file (.sbx)
from a file generated by the latex compilation of the songbook (.sxd).
"""
from unidecode import unidecode
import locale
import re
import codecs
from patacrep import authors
from patacrep import encoding
from patacrep.plastex import simpleparse
EOL = u"\n"
# Pattern set to ignore latex command in title prefix
KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$", re.LOCALE)
FIRST_LETTER_PATTERN = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE)
KEYWORD_PATTERN = re.compile(ur"^%(\w+)\s?(.*)$", re.LOCALE)
FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE)
def sortkey(value):
@ -31,7 +30,7 @@ def sortkey(value):
the sort with latex escape sequences.
"""
return locale.strxfrm(
unidecode(simpleparse(value).replace(' ', 'A')).lower()
encoding.unidecode(simpleparse(value).replace(' ', 'A')).lower()
)
@ -41,9 +40,14 @@ def process_sxd(filename):
Return an Index object.
"""
data = []
with codecs.open(filename, 'r', 'iso-8859-1') as index_file:
index_file = None
try:
index_file = encoding.open_read(filename, 'r')
for line in index_file:
data.append(line.strip().encode('utf-8'))
data.append(line.strip())
finally:
if index_file:
index_file.close()
i = 1
idx = Index(data[0])
@ -82,7 +86,7 @@ class Index(object):
def get_first_letter(key):
"""Return the uppercase first letter of key."""
letter = FIRST_LETTER_PATTERN.match(key).group(1)
if re.match(r'\d', letter):
if re.match(ur'\d', letter):
letter = '0-9'
return letter.upper()
@ -98,7 +102,7 @@ class Index(object):
if 'prefix' in self.keywords:
for prefix in self.keywords['prefix']:
self.prefix_patterns.append(re.compile(
r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix),
ur"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix),
re.LOCALE
))
@ -126,10 +130,10 @@ class Index(object):
if self.indextype == "TITLE":
# Removing prefixes before titles
for pattern in self.prefix_patterns:
match = pattern.match(key.encode('utf-8'))
match = pattern.match(key)
if match:
self._raw_add(
r"\indextitle{{{}}}{{{}}}".format(
ur"\indextitle{{{}}}{{{}}}".format(
match.group(1).strip(),
(match.group(2) + match.group(3)).strip(),
),
@ -149,12 +153,10 @@ class Index(object):
@staticmethod
def ref_to_str(ref):
"""Return the LaTeX code corresponding to the reference."""
return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
def entry_to_str(self, key, entry):
"""Return the LaTeX code corresponding to the entry."""
if not isinstance(key, unicode):
key = unicode(key, "UTF-8")
return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format(
key,
ur'\\'.join([self.ref_to_str(ref) for ref in entry]),
@ -166,10 +168,10 @@ class Index(object):
Here, an index block is a letter, and all data beginning with this
letter.
"""
string = r'\begin{idxblock}{' + letter + '}' + EOL
string = ur'\begin{idxblock}{' + letter + '}' + EOL
for key in sorted(entries.keys(), key=sortkey):
string += self.entry_to_str(key, entries[key])
string += r'\end{idxblock}' + EOL
string += ur'\end{idxblock}' + EOL
return string
def entries_to_str(self):

6
patacrep/plastex.py

@ -6,11 +6,11 @@
from plasTeX.TeX import TeX
from plasTeX.Base.LaTeX import Sentences
import codecs
import locale
import os
import sys
from patacrep import encoding
def process_unbr_spaces(node):
#pylint: disable=line-too-long
@ -39,8 +39,6 @@ def simpleparse(text):
"""Parse a simple LaTeX string.
"""
tex = TeX()
if not isinstance(text, unicode):
text = text.decode("utf-8")
tex.input(text)
doc = tex.parse()
return process_unbr_spaces(doc.textContent)
@ -66,7 +64,7 @@ class SongParser(object):
def parse(cls, filename):
"""Parse a TeX file, and return its plasTeX representation."""
tex = cls.create_tex()
tex.input(codecs.open(filename, 'r', 'utf-8', 'replace'))
tex.input(encoding.open_read(filename, 'r'))
return tex.parse()

2
patacrep/plastex_chord.py

@ -74,7 +74,7 @@ class Chord(Command):
@property
def source(self):
"""Return chord LaTeX code."""
return r'\[{}]'.format(self.chord)
return ur'\[{}]'.format(self.chord)
class BeginChordOrDisplayMath(BeginDisplayMath):
r"""Wrapper to BeginDisplayMath

14
patacrep/plastex_songs.py

@ -6,6 +6,7 @@
import plasTeX
from patacrep import encoding
from patacrep.plastex import process_unbr_spaces
@ -28,8 +29,9 @@ def split_linebreak(texlist):
return_list.append(current)
current = []
else:
current.append(
process_unbr_spaces(token).textContent.encode('utf-8'))
current.append(encoding.basestring2unicode(
process_unbr_spaces(token).textContent
))
if current:
return_list.append(current)
return return_list
@ -49,15 +51,17 @@ class beginsong(plasTeX.Command): # pylint: disable=invalid-name,too-many-public
titles = []
for tokens in split_linebreak(self.attributes['titles'].allChildNodes):
titles.append("".join(tokens))
self.attributes['titles'] = titles
self.attributes['titles'] = encoding.list2unicode(titles)
# Parsing keyval arguments
args = {}
for (key, val) in self.attributes['args'].iteritems():
if isinstance(val, plasTeX.DOM.Element):
args[key] = process_unbr_spaces(val).textContent.encode('utf-8')
args[key] = encoding.basestring2unicode(
process_unbr_spaces(val).textContent
)
elif isinstance(val, basestring):
args[key] = val.encode('utf-8')
args[key] = encoding.basestring2unicode(val)
else:
args[key] = unicode(val)
self.attributes['args'] = args

26
patacrep/songs.py

@ -3,9 +3,9 @@
"""Song management."""
from unidecode import unidecode
import errno
import hashlib
import logging
import os
import re
@ -17,10 +17,11 @@ except ImportError:
from patacrep.authors import processauthors
from patacrep.plastex import parsetex
LOGGER = logging.getLogger(__name__)
def cached_name(datadir, filename):
"""Return the filename of the cache version of the file."""
fullpath = os.path.join(datadir, '.cache', filename)
fullpath = os.path.abspath(os.path.join(datadir, '.cache', filename))
directory = os.path.dirname(fullpath)
try:
os.makedirs(directory)
@ -96,7 +97,11 @@ class Song(object):
open(self.fullpath, 'rb').read()
).hexdigest()
if os.path.exists(cached_name(datadir, subpath)):
cached = pickle.load(open(cached_name(datadir, subpath), 'rb'))
try:
cached = pickle.load(open(
cached_name(datadir, subpath),
'rb',
))
if (
cached['_filehash'] == self._filehash
and cached['_version'] == self.CACHE_VERSION
@ -104,6 +109,10 @@ class Song(object):
for attribute in self.cached_attributes:
setattr(self, attribute, cached[attribute])
return
except: # pylint: disable=bare-except
LOGGER.warning("Could not use cached version of {}.".format(
self.fullpath
))
# Data extraction from the song with plastex
data = parsetex(self.fullpath)
@ -111,7 +120,7 @@ class Song(object):
self.datadir = datadir
self.unprefixed_titles = [
unprefixed_title(
unidecode(unicode(title, "utf-8")),
title,
config['titleprefixwords']
)
for title
@ -136,6 +145,13 @@ class Song(object):
if self.datadir:
cached = {}
for attribute in self.cached_attributes:
if attribute == "args":
cached[attribute] = dict([
(key, u"{}".format(value)) # Force conversion to unicode
for (key, value)
in self.args.iteritems()
])
else:
cached[attribute] = getattr(self, attribute)
pickle.dump(
cached,
@ -149,7 +165,7 @@ def unprefixed_title(title, prefixes):
"""Remove the first prefix of the list in the beginning of title (if any).
"""
for prefix in prefixes:
match = re.compile(r"^(%s)\b\s*(.*)$" % prefix, re.LOCALE).match(title)
match = re.compile(ur"^(%s)\b\s*(.*)$" % prefix, re.LOCALE).match(title)
if match:
return match.group(2)
return title

27
patacrep/templates.py

@ -6,23 +6,23 @@ from jinja2 import Environment, FileSystemLoader, ChoiceLoader, \
TemplateNotFound, nodes
from jinja2.ext import Extension
from jinja2.meta import find_referenced_templates as find_templates
import codecs
import os
import re
import json
from patacrep import encoding
from patacrep import errors
_LATEX_SUBS = (
(re.compile(r'\\'), r'\\textbackslash'),
(re.compile(r'([{}_#%&$])'), r'\\\1'),
(re.compile(r'~'), r'\~{}'),
(re.compile(r'\^'), r'\^{}'),
(re.compile(r'"'), r"''"),
(re.compile(r'\.\.\.+'), r'\\ldots'),
(re.compile(ur'\\'), ur'\\textbackslash'),
(re.compile(ur'([{}_#%&$])'), ur'\\\1'),
(re.compile(ur'~'), ur'\~{}'),
(re.compile(ur'\^'), ur'\^{}'),
(re.compile(ur'"'), ur"''"),
(re.compile(ur'\.\.\.+'), ur'\\ldots'),
)
_VARIABLE_REGEXP = re.compile(r"""
_VARIABLE_REGEXP = re.compile(ur"""
\(\*\ *variables\ *\*\) # Match (* variables *)
( # Match and capture the following:
(?: # Start of non-capturing group, used to match a single character
@ -177,12 +177,10 @@ class TexRenderer(object):
"""
subvariables = {}
template_file = None
templatename = self.texenv.get_template(template).filename
with codecs.open(
templatename,
'r',
'utf-8'
) as template_file:
try:
template_file = encoding.open_read(templatename, 'r')
content = template_file.read()
subtemplates = list(find_templates(self.texenv.parse(content)))
match = re.findall(_VARIABLE_REGEXP, content)
@ -202,6 +200,9 @@ class TexRenderer(object):
jsonstring=var,
)
)
finally:
if template_file:
template_file.close()
return (subvariables, subtemplates)

11
songbook

@ -15,6 +15,7 @@ import sys
from patacrep.build import SongbookBuilder, DEFAULT_STEPS
from patacrep import __STR_VERSION__
from patacrep import errors
from patacrep import encoding
# Logging configuration
logging.basicConfig(level=logging.INFO)
@ -100,13 +101,17 @@ def main():
basename = os.path.basename(songbook_path)[:-3]
songbook_file = None
try:
with open(songbook_path) as songbook_file:
songbook_file = encoding.open_read(songbook_path)
songbook = json.load(songbook_file)
except Exception as error: # pylint: disable=broad-except
LOGGER.error(error)
LOGGER.error("Error while loading file '{}'.".format(songbook_path))
sys.exit(1)
finally:
if songbook_file:
songbook_file.close()
# Gathering datadirs
datadirs = []
@ -124,9 +129,9 @@ def main():
)
for path in songbook['datadir']
]
if not datadirs:
# Default value
datadirs = [os.path.dirname(os.path.abspath(songbook_path))]
datadirs.append(os.path.dirname(os.path.abspath(songbook_path)))
songbook['datadir'] = datadirs
try:

Loading…
Cancel
Save