Browse Source

Merge pull request #51 from patacrep/cache

Mise en place d'un cache
pull/58/head
Luthaf 11 years ago
parent
commit
a3beec2148
  1. 38
      patacrep/authors.py
  2. 3
      patacrep/build.py
  3. 7
      patacrep/content/cwd.py
  4. 4
      patacrep/content/include.py
  5. 25
      patacrep/content/song.py
  6. 8
      patacrep/content/sorted.py
  7. 7
      patacrep/content/tex.py
  8. 1
      patacrep/data/examples/.gitignore
  9. 30
      patacrep/files.py
  10. 61
      patacrep/index.py
  11. 1
      patacrep/plastex.py
  12. 134
      patacrep/songs.py

38
patacrep/authors.py

@ -64,7 +64,7 @@ def split_author_names(string):
brace_count += 1 brace_count += 1
if char == "{": if char == "{":
brace_count -= 1 brace_count -= 1
return string[:last_space], string[last_space:] return string[last_space:], string[:last_space]
def split_sep_author(string, sep): def split_sep_author(string, sep):
@ -162,23 +162,6 @@ def processauthors_clean_authors(authors_list):
if author.lstrip() if author.lstrip()
] ]
def processauthors_invert_names(authors_list):
"""Move first names after last names
See docstring of processauthors() for more information.
"""
dest = []
for author in authors_list:
first, last = split_author_names(author)
if first:
dest.append(ur"\indexauthor{{{first}}}{{{last}}}".format(
first=first.strip(),
last=last.strip(),
))
else:
dest.append(last.lstrip())
return dest
def processauthors(authors_string, after=None, ignore=None, sep=None): def processauthors(authors_string, after=None, ignore=None, sep=None):
r"""Return a list of authors r"""Return a list of authors
@ -210,10 +193,12 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
4) Strings containing words of "ignore" are dropped. 4) Strings containing words of "ignore" are dropped.
# ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"] # ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
5) First and last names are processed through LaTeX command \indexauthor 5) First and last names are splitted
(which will, by default, invert first and last names). # [
# ["\indexauthor{William}{Blake}", "\indexauthor{Hubert}{Parry}", # ("Blake", "William"),
# \indexthaor{The}{Royal\ Choir~of~Nowhere}"] # ("Parry", "Hubert"),
# ("Royal\ Choir~of~Nowhere", "The"),
# ]
""" """
if not sep: if not sep:
@ -223,8 +208,10 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
if not ignore: if not ignore:
ignore = [] ignore = []
return processauthors_invert_names( return [
processauthors_clean_authors( split_author_names(author)
for author
in processauthors_clean_authors(
processauthors_ignore_authors( processauthors_ignore_authors(
processauthors_remove_after( processauthors_remove_after(
processauthors_split_string( processauthors_split_string(
@ -235,5 +222,4 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
after), after),
ignore) ignore)
) )
) ]

3
patacrep/build.py

@ -13,6 +13,7 @@ from subprocess import Popen, PIPE, call
from patacrep import __DATADIR__, authors, content, errors from patacrep import __DATADIR__, authors, content, errors
from patacrep.index import process_sxd from patacrep.index import process_sxd
from patacrep.templates import TexRenderer from patacrep.templates import TexRenderer
from patacrep.songs import DataSubpath
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
EOL = "\n" EOL = "\n"
@ -75,7 +76,7 @@ class Songbook(object):
self.config['datadir'] = abs_datadir self.config['datadir'] = abs_datadir
self.config['_songdir'] = [ self.config['_songdir'] = [
os.path.join(path, 'songs') DataSubpath(path, 'songs')
for path in self.config['datadir'] for path in self.config['datadir']
] ]

7
patacrep/content/cwd.py

@ -3,9 +3,8 @@
"""Change base directory before importing songs.""" """Change base directory before importing songs."""
import os
from patacrep.content import process_content from patacrep.content import process_content
from patacrep.songs import DataSubpath
#pylint: disable=unused-argument #pylint: disable=unused-argument
def parse(keyword, config, argument, contentlist): def parse(keyword, config, argument, contentlist):
@ -28,8 +27,8 @@ def parse(keyword, config, argument, contentlist):
""" """
old_songdir = config['_songdir'] old_songdir = config['_songdir']
config['_songdir'] = ( config['_songdir'] = (
[argument] + [DataSubpath("", argument)] +
[os.path.join(path, argument) for path in config['_songdir']] + [path.clone().join(argument) for path in config['_songdir']] +
config['_songdir'] config['_songdir']
) )
processed_content = process_content(contentlist, config) processed_content = process_content(contentlist, config)

4
patacrep/content/include.py

@ -17,6 +17,10 @@ from patacrep import encoding
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
def load_from_datadirs(path, config=None): def load_from_datadirs(path, config=None):
"""Load 'path' from one of the datadirs.
Raise an exception if it was found if none of the datadirs of 'config'.
"""
for datadir in config.get("datadir", []): for datadir in config.get("datadir", []):
filepath = os.path.join(datadir, path) filepath = os.path.join(datadir, path)
if os.path.exists(filepath): if os.path.exists(filepath):

25
patacrep/content/song.py

@ -35,7 +35,7 @@ class SongRenderer(Content, Song):
def render(self, context): def render(self, context):
"""Return the string that will render the song.""" """Return the string that will render the song."""
return ur'\input{{{}}}'.format(files.relpath( return ur'\input{{{}}}'.format(files.relpath(
self.path, self.fullpath,
os.path.dirname(context['filename']) os.path.dirname(context['filename'])
)) ))
@ -59,21 +59,28 @@ def parse(keyword, argument, contentlist, config):
if contentlist: if contentlist:
break break
contentlist = [ contentlist = [
files.relpath(filename, songdir) filename
for filename for filename
in ( in (
files.recursive_find(songdir, "*.sg") files.recursive_find(songdir.fullpath, "*.sg")
+ files.recursive_find(songdir, "*.is") + files.recursive_find(songdir.fullpath, "*.is")
) )
] ]
for elem in contentlist: for elem in contentlist:
before = len(songlist) before = len(songlist)
for songdir in config['_songdir']: for songdir in config['_songdir']:
for filename in glob.iglob(os.path.join(songdir, elem)): if songdir.datadir and not os.path.isdir(songdir.datadir):
LOGGER.debug('Parsing file "{}"'.format(filename)) continue
song = SongRenderer(filename, config) with files.chdir(songdir.datadir):
songlist.append(song) for filename in glob.iglob(os.path.join(songdir.subpath, elem)):
config["_languages"].update(song.languages) LOGGER.debug('Parsing file "{}"'.format(filename))
song = SongRenderer(
songdir.datadir,
filename,
config,
)
songlist.append(song)
config["_languages"].update(song.languages)
if len(songlist) > before: if len(songlist) > before:
break break
if len(songlist) == before: if len(songlist) == before:

8
patacrep/content/sorted.py

@ -33,8 +33,8 @@ def normalize_field(field):
"""Return a normalized field, it being a string or a list of strings.""" """Return a normalized field, it being a string or a list of strings."""
if isinstance(field, basestring): if isinstance(field, basestring):
return normalize_string(field) return normalize_string(field)
elif isinstance(field, list): elif isinstance(field, list) or isinstance(field, tuple):
return [normalize_string(string) for string in field] return [normalize_field(string) for string in field]
def key_generator(sort): def key_generator(sort):
"""Return a function that returns the list of values used to sort the song. """Return a function that returns the list of values used to sort the song.
@ -50,7 +50,7 @@ def key_generator(sort):
if key == "@title": if key == "@title":
field = song.unprefixed_titles field = song.unprefixed_titles
elif key == "@path": elif key == "@path":
field = song.path field = song.fullpath
elif key == "by": elif key == "by":
field = song.authors field = song.authors
else: else:
@ -60,7 +60,7 @@ def key_generator(sort):
LOGGER.debug( LOGGER.debug(
"Ignoring unknown key '{}' for song {}.".format( "Ignoring unknown key '{}' for song {}.".format(
key, key,
files.relpath(song.path), files.relpath(song.fullpath),
) )
) )
field = u"" field = u""

7
patacrep/content/tex.py

@ -41,8 +41,11 @@ def parse(keyword, argument, contentlist, config):
for filename in contentlist: for filename in contentlist:
checked_file = None checked_file = None
for path in config['_songdir']: for path in config['_songdir']:
if os.path.exists(os.path.join(path, filename)): if os.path.exists(os.path.join(path.fullpath, filename)):
checked_file = os.path.relpath(os.path.join(path, filename)) checked_file = os.path.relpath(os.path.join(
path.fullpath,
filename,
))
break break
if not checked_file: if not checked_file:
LOGGER.warning( LOGGER.warning(

1
patacrep/data/examples/.gitignore

@ -0,0 +1 @@
/.cache

30
patacrep/files.py

@ -4,6 +4,7 @@
"""File system utilities.""" """File system utilities."""
from contextlib import contextmanager
import fnmatch import fnmatch
import os import os
@ -12,10 +13,14 @@ def recursive_find(root_directory, pattern):
Return a list of files matching the pattern. Return a list of files matching the pattern.
""" """
if not os.path.isdir(root_directory):
return []
matches = [] matches = []
for root, _, filenames in os.walk(root_directory): with chdir(root_directory):
for filename in fnmatch.filter(filenames, pattern): for root, _, filenames in os.walk(os.curdir):
matches.append(os.path.join(root, filename)) for filename in fnmatch.filter(filenames, pattern):
matches.append(os.path.join(root, filename))
return matches return matches
def relpath(path, start=None): def relpath(path, start=None):
@ -26,3 +31,22 @@ def relpath(path, start=None):
return os.path.relpath(path, start) return os.path.relpath(path, start)
else: else:
return os.path.abspath(path) return os.path.abspath(path)
@contextmanager
def chdir(path):
"""Locally change dir
Can be used as:
with chdir("some/directory"):
do_stuff()
"""
olddir = os.getcwd()
if path:
os.chdir(path)
yield
os.chdir(olddir)
else:
yield

61
patacrep/index.py

@ -22,18 +22,6 @@ KEYWORD_PATTERN = re.compile(ur"^%(\w+)\s?(.*)$", re.LOCALE)
FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE)
def sortkey(value):
"""From a title, return something usable for sorting.
It handles locale (but
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
the sort with latex escape sequences.
"""
return locale.strxfrm(
encoding.unidecode(simpleparse(value).replace(' ', 'A')).lower()
)
def process_sxd(filename): def process_sxd(filename):
"""Parse sxd file. """Parse sxd file.
@ -115,12 +103,18 @@ class Index(object):
No processing is done on data. It is added raw. See add() for a No processing is done on data. It is added raw. See add() for a
similar method with processing. similar method with processing.
""" """
first = self.get_first_letter(key) first = self.get_first_letter(key[0])
if not first in self.data.keys(): if not first in self.data.keys():
self.data[first] = dict() self.data[first] = dict()
if not key in self.data[first].keys(): if not key in self.data[first].keys():
self.data[first][key] = [] self.data[first][key] = {
self.data[first][key].append({'num': number, 'link': link}) 'sortingkey': [
encoding.unidecode(simpleparse(item)).lower()
for item in key
],
'entries': [],
}
self.data[first][key]['entries'].append({'num': number, 'link': link})
def add(self, key, number, link): def add(self, key, number, link):
"""Add a song to the list. """Add a song to the list.
@ -133,15 +127,15 @@ class Index(object):
match = pattern.match(key) match = pattern.match(key)
if match: if match:
self._raw_add( self._raw_add(
ur"\indextitle{{{}}}{{{}}}".format( (
match.group(1).strip(), match.group(1).strip(),
(match.group(2) + match.group(3)).strip(), (match.group(2) + match.group(3)).strip()
), ),
number, number,
link link
) )
return return
self._raw_add(key, number, link) self._raw_add((key, ""), number, link)
if self.indextype == "AUTHOR": if self.indextype == "AUTHOR":
# Processing authors # Processing authors
@ -155,10 +149,26 @@ class Index(object):
"""Return the LaTeX code corresponding to the reference.""" """Return the LaTeX code corresponding to the reference."""
return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
def key_to_str(self, key):
"""Convert the key (title or author) to the LaTeX command rendering it.
"""
if self.indextype == "AUTHOR":
if key[1]:
return ur"\indexauthor{{{first}}}{{{last}}}".format(
first=key[1],
last=key[0],
)
else:
return key[0]
if self.indextype == "TITLE":
return ur"\indextitle{{{0[0]}}}{{{0[1]}}}".format(key)
def entry_to_str(self, key, entry): def entry_to_str(self, key, entry):
"""Return the LaTeX code corresponding to the entry.""" """Return the LaTeX code corresponding to the entry."""
return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format( return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format(
key, self.key_to_str(key),
ur'\\'.join([self.ref_to_str(ref) for ref in entry]), ur'\\'.join([self.ref_to_str(ref) for ref in entry]),
) )
@ -168,9 +178,16 @@ class Index(object):
Here, an index block is a letter, and all data beginning with this Here, an index block is a letter, and all data beginning with this
letter. letter.
""" """
def sortkey(key):
"""Return something sortable for `entries[key]`."""
return [
locale.strxfrm(item)
for item
in entries[key]['sortingkey']
]
string = ur'\begin{idxblock}{' + letter + '}' + EOL string = ur'\begin{idxblock}{' + letter + '}' + EOL
for key in sorted(entries.keys(), key=sortkey): for key in sorted(entries, key=sortkey):
string += self.entry_to_str(key, entries[key]) string += self.entry_to_str(key, entries[key]['entries'])
string += ur'\end{idxblock}' + EOL string += ur'\end{idxblock}' + EOL
return string return string

1
patacrep/plastex.py

@ -39,6 +39,7 @@ def simpleparse(text):
"""Parse a simple LaTeX string. """Parse a simple LaTeX string.
""" """
tex = TeX() tex = TeX()
tex.disableLogging()
tex.input(text) tex.input(text)
doc = tex.parse() doc = tex.parse()
return process_unbr_spaces(doc.textContent) return process_unbr_spaces(doc.textContent)

134
patacrep/songs.py

@ -3,19 +3,121 @@
"""Song management.""" """Song management."""
import errno
import hashlib
import logging
import os
import re import re
try:
import cPickle as pickle
except ImportError:
import pickle
from patacrep.authors import processauthors from patacrep.authors import processauthors
from patacrep.plastex import parsetex from patacrep.plastex import parsetex
# pylint: disable=too-few-public-methods LOGGER = logging.getLogger(__name__)
def cached_name(datadir, filename):
"""Return the filename of the cache version of the file."""
fullpath = os.path.abspath(os.path.join(datadir, '.cache', filename))
directory = os.path.dirname(fullpath)
try:
os.makedirs(directory)
except OSError as error:
if error.errno == errno.EEXIST and os.path.isdir(directory):
pass
else:
raise
return fullpath
class DataSubpath(object):
"""A path divided in two path: a datadir, and its subpath.
- This object can represent either a file or directory.
- If the datadir part is the empty string, it means that the represented
path does not belong to a datadir.
"""
def __init__(self, datadir, subpath):
if os.path.isabs(subpath):
self.datadir = ""
else:
self.datadir = datadir
self.subpath = subpath
def __str__(self):
return os.path.join(self.datadir, self.subpath)
@property
def fullpath(self):
"""Return the full path represented by self."""
return os.path.join(self.datadir, self.subpath)
def clone(self):
"""Return a cloned object."""
return DataSubpath(self.datadir, self.subpath)
def join(self, path):
"""Join "path" argument to self path.
Return self for commodity.
"""
self.subpath = os.path.join(self.subpath, path)
return self
# pylint: disable=too-few-public-methods, too-many-instance-attributes
class Song(object): class Song(object):
"""Song management""" """Song management"""
def __init__(self, filename, config): # Version format of cached song. Increment this number if we update
# information stored in cache.
CACHE_VERSION = 0
# List of attributes to cache
cached_attributes = [
"titles",
"unprefixed_titles",
"args",
"datadir",
"fullpath",
"subpath",
"languages",
"authors",
"_filehash",
"_version",
]
def __init__(self, datadir, subpath, config):
self.fullpath = os.path.join(datadir, subpath)
if datadir:
# Only songs in datadirs are cached
self._filehash = hashlib.md5(
open(self.fullpath, 'rb').read()
).hexdigest()
if os.path.exists(cached_name(datadir, subpath)):
try:
cached = pickle.load(open(
cached_name(datadir, subpath),
'rb',
))
if (
cached['_filehash'] == self._filehash
and cached['_version'] == self.CACHE_VERSION
):
for attribute in self.cached_attributes:
setattr(self, attribute, cached[attribute])
return
except: # pylint: disable=bare-except
LOGGER.warning("Could not use cached version of {}.".format(
self.fullpath
))
# Data extraction from the song with plastex # Data extraction from the song with plastex
data = parsetex(filename) data = parsetex(self.fullpath)
self.titles = data['titles'] self.titles = data['titles']
self.datadir = datadir
self.unprefixed_titles = [ self.unprefixed_titles = [
unprefixed_title( unprefixed_title(
title, title,
@ -25,7 +127,7 @@ class Song(object):
in self.titles in self.titles
] ]
self.args = data['args'] self.args = data['args']
self.path = filename self.subpath = subpath
self.languages = data['languages'] self.languages = data['languages']
if "by" in self.args.keys(): if "by" in self.args.keys():
self.authors = processauthors( self.authors = processauthors(
@ -35,8 +137,30 @@ class Song(object):
else: else:
self.authors = [] self.authors = []
self._version = self.CACHE_VERSION
self._write_cache()
def _write_cache(self):
"""If relevant, write a dumbed down version of self to the cache."""
if self.datadir:
cached = {}
for attribute in self.cached_attributes:
if attribute == "args":
cached[attribute] = dict([
(key, u"{}".format(value)) # Force conversion to unicode
for (key, value)
in self.args.iteritems()
])
else:
cached[attribute] = getattr(self, attribute)
pickle.dump(
cached,
open(cached_name(self.datadir, self.subpath), 'wb'),
protocol=-1
)
def __repr__(self): def __repr__(self):
return repr((self.titles, self.args, self.path)) return repr((self.titles, self.args, self.fullpath))
def unprefixed_title(title, prefixes): def unprefixed_title(title, prefixes):
"""Remove the first prefix of the list in the beginning of title (if any). """Remove the first prefix of the list in the beginning of title (if any).

Loading…
Cancel
Save