Browse Source

Merge pull request #51 from patacrep/cache

Mise en place d'un cache
pull/58/head
Luthaf 10 years ago
parent
commit
a3beec2148
  1. 38
      patacrep/authors.py
  2. 3
      patacrep/build.py
  3. 7
      patacrep/content/cwd.py
  4. 4
      patacrep/content/include.py
  5. 25
      patacrep/content/song.py
  6. 8
      patacrep/content/sorted.py
  7. 7
      patacrep/content/tex.py
  8. 1
      patacrep/data/examples/.gitignore
  9. 30
      patacrep/files.py
  10. 61
      patacrep/index.py
  11. 1
      patacrep/plastex.py
  12. 134
      patacrep/songs.py

38
patacrep/authors.py

@ -64,7 +64,7 @@ def split_author_names(string):
brace_count += 1
if char == "{":
brace_count -= 1
return string[:last_space], string[last_space:]
return string[last_space:], string[:last_space]
def split_sep_author(string, sep):
@ -162,23 +162,6 @@ def processauthors_clean_authors(authors_list):
if author.lstrip()
]
def processauthors_invert_names(authors_list):
"""Move first names after last names
See docstring of processauthors() for more information.
"""
dest = []
for author in authors_list:
first, last = split_author_names(author)
if first:
dest.append(ur"\indexauthor{{{first}}}{{{last}}}".format(
first=first.strip(),
last=last.strip(),
))
else:
dest.append(last.lstrip())
return dest
def processauthors(authors_string, after=None, ignore=None, sep=None):
r"""Return a list of authors
@ -210,10 +193,12 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
4) Strings containing words of "ignore" are dropped.
# ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
5) First and last names are processed through LaTeX command \indexauthor
(which will, by default, invert first and last names).
# ["\indexauthor{William}{Blake}", "\indexauthor{Hubert}{Parry}",
# \indexthaor{The}{Royal\ Choir~of~Nowhere}"]
5) First and last names are splitted
# [
# ("Blake", "William"),
# ("Parry", "Hubert"),
# ("Royal\ Choir~of~Nowhere", "The"),
# ]
"""
if not sep:
@ -223,8 +208,10 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
if not ignore:
ignore = []
return processauthors_invert_names(
processauthors_clean_authors(
return [
split_author_names(author)
for author
in processauthors_clean_authors(
processauthors_ignore_authors(
processauthors_remove_after(
processauthors_split_string(
@ -235,5 +222,4 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
after),
ignore)
)
)
]

3
patacrep/build.py

@ -13,6 +13,7 @@ from subprocess import Popen, PIPE, call
from patacrep import __DATADIR__, authors, content, errors
from patacrep.index import process_sxd
from patacrep.templates import TexRenderer
from patacrep.songs import DataSubpath
LOGGER = logging.getLogger(__name__)
EOL = "\n"
@ -75,7 +76,7 @@ class Songbook(object):
self.config['datadir'] = abs_datadir
self.config['_songdir'] = [
os.path.join(path, 'songs')
DataSubpath(path, 'songs')
for path in self.config['datadir']
]

7
patacrep/content/cwd.py

@ -3,9 +3,8 @@
"""Change base directory before importing songs."""
import os
from patacrep.content import process_content
from patacrep.songs import DataSubpath
#pylint: disable=unused-argument
def parse(keyword, config, argument, contentlist):
@ -28,8 +27,8 @@ def parse(keyword, config, argument, contentlist):
"""
old_songdir = config['_songdir']
config['_songdir'] = (
[argument] +
[os.path.join(path, argument) for path in config['_songdir']] +
[DataSubpath("", argument)] +
[path.clone().join(argument) for path in config['_songdir']] +
config['_songdir']
)
processed_content = process_content(contentlist, config)

4
patacrep/content/include.py

@ -17,6 +17,10 @@ from patacrep import encoding
LOGGER = logging.getLogger(__name__)
def load_from_datadirs(path, config=None):
"""Load 'path' from one of the datadirs.
Raise an exception if it was found if none of the datadirs of 'config'.
"""
for datadir in config.get("datadir", []):
filepath = os.path.join(datadir, path)
if os.path.exists(filepath):

25
patacrep/content/song.py

@ -35,7 +35,7 @@ class SongRenderer(Content, Song):
def render(self, context):
"""Return the string that will render the song."""
return ur'\input{{{}}}'.format(files.relpath(
self.path,
self.fullpath,
os.path.dirname(context['filename'])
))
@ -59,21 +59,28 @@ def parse(keyword, argument, contentlist, config):
if contentlist:
break
contentlist = [
files.relpath(filename, songdir)
filename
for filename
in (
files.recursive_find(songdir, "*.sg")
+ files.recursive_find(songdir, "*.is")
files.recursive_find(songdir.fullpath, "*.sg")
+ files.recursive_find(songdir.fullpath, "*.is")
)
]
for elem in contentlist:
before = len(songlist)
for songdir in config['_songdir']:
for filename in glob.iglob(os.path.join(songdir, elem)):
LOGGER.debug('Parsing file "{}"'.format(filename))
song = SongRenderer(filename, config)
songlist.append(song)
config["_languages"].update(song.languages)
if songdir.datadir and not os.path.isdir(songdir.datadir):
continue
with files.chdir(songdir.datadir):
for filename in glob.iglob(os.path.join(songdir.subpath, elem)):
LOGGER.debug('Parsing file "{}"'.format(filename))
song = SongRenderer(
songdir.datadir,
filename,
config,
)
songlist.append(song)
config["_languages"].update(song.languages)
if len(songlist) > before:
break
if len(songlist) == before:

8
patacrep/content/sorted.py

@ -33,8 +33,8 @@ def normalize_field(field):
"""Return a normalized field, it being a string or a list of strings."""
if isinstance(field, basestring):
return normalize_string(field)
elif isinstance(field, list):
return [normalize_string(string) for string in field]
elif isinstance(field, list) or isinstance(field, tuple):
return [normalize_field(string) for string in field]
def key_generator(sort):
"""Return a function that returns the list of values used to sort the song.
@ -50,7 +50,7 @@ def key_generator(sort):
if key == "@title":
field = song.unprefixed_titles
elif key == "@path":
field = song.path
field = song.fullpath
elif key == "by":
field = song.authors
else:
@ -60,7 +60,7 @@ def key_generator(sort):
LOGGER.debug(
"Ignoring unknown key '{}' for song {}.".format(
key,
files.relpath(song.path),
files.relpath(song.fullpath),
)
)
field = u""

7
patacrep/content/tex.py

@ -41,8 +41,11 @@ def parse(keyword, argument, contentlist, config):
for filename in contentlist:
checked_file = None
for path in config['_songdir']:
if os.path.exists(os.path.join(path, filename)):
checked_file = os.path.relpath(os.path.join(path, filename))
if os.path.exists(os.path.join(path.fullpath, filename)):
checked_file = os.path.relpath(os.path.join(
path.fullpath,
filename,
))
break
if not checked_file:
LOGGER.warning(

1
patacrep/data/examples/.gitignore

@ -0,0 +1 @@
/.cache

30
patacrep/files.py

@ -4,6 +4,7 @@
"""File system utilities."""
from contextlib import contextmanager
import fnmatch
import os
@ -12,10 +13,14 @@ def recursive_find(root_directory, pattern):
Return a list of files matching the pattern.
"""
if not os.path.isdir(root_directory):
return []
matches = []
for root, _, filenames in os.walk(root_directory):
for filename in fnmatch.filter(filenames, pattern):
matches.append(os.path.join(root, filename))
with chdir(root_directory):
for root, _, filenames in os.walk(os.curdir):
for filename in fnmatch.filter(filenames, pattern):
matches.append(os.path.join(root, filename))
return matches
def relpath(path, start=None):
@ -26,3 +31,22 @@ def relpath(path, start=None):
return os.path.relpath(path, start)
else:
return os.path.abspath(path)
@contextmanager
def chdir(path):
"""Locally change dir
Can be used as:
with chdir("some/directory"):
do_stuff()
"""
olddir = os.getcwd()
if path:
os.chdir(path)
yield
os.chdir(olddir)
else:
yield

61
patacrep/index.py

@ -22,18 +22,6 @@ KEYWORD_PATTERN = re.compile(ur"^%(\w+)\s?(.*)$", re.LOCALE)
FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE)
def sortkey(value):
"""From a title, return something usable for sorting.
It handles locale (but
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
the sort with latex escape sequences.
"""
return locale.strxfrm(
encoding.unidecode(simpleparse(value).replace(' ', 'A')).lower()
)
def process_sxd(filename):
"""Parse sxd file.
@ -115,12 +103,18 @@ class Index(object):
No processing is done on data. It is added raw. See add() for a
similar method with processing.
"""
first = self.get_first_letter(key)
first = self.get_first_letter(key[0])
if not first in self.data.keys():
self.data[first] = dict()
if not key in self.data[first].keys():
self.data[first][key] = []
self.data[first][key].append({'num': number, 'link': link})
self.data[first][key] = {
'sortingkey': [
encoding.unidecode(simpleparse(item)).lower()
for item in key
],
'entries': [],
}
self.data[first][key]['entries'].append({'num': number, 'link': link})
def add(self, key, number, link):
"""Add a song to the list.
@ -133,15 +127,15 @@ class Index(object):
match = pattern.match(key)
if match:
self._raw_add(
ur"\indextitle{{{}}}{{{}}}".format(
(
match.group(1).strip(),
(match.group(2) + match.group(3)).strip(),
),
(match.group(2) + match.group(3)).strip()
),
number,
link
)
return
self._raw_add(key, number, link)
self._raw_add((key, ""), number, link)
if self.indextype == "AUTHOR":
# Processing authors
@ -155,10 +149,26 @@ class Index(object):
"""Return the LaTeX code corresponding to the reference."""
return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
def key_to_str(self, key):
"""Convert the key (title or author) to the LaTeX command rendering it.
"""
if self.indextype == "AUTHOR":
if key[1]:
return ur"\indexauthor{{{first}}}{{{last}}}".format(
first=key[1],
last=key[0],
)
else:
return key[0]
if self.indextype == "TITLE":
return ur"\indextitle{{{0[0]}}}{{{0[1]}}}".format(key)
def entry_to_str(self, key, entry):
"""Return the LaTeX code corresponding to the entry."""
return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format(
key,
self.key_to_str(key),
ur'\\'.join([self.ref_to_str(ref) for ref in entry]),
)
@ -168,9 +178,16 @@ class Index(object):
Here, an index block is a letter, and all data beginning with this
letter.
"""
def sortkey(key):
"""Return something sortable for `entries[key]`."""
return [
locale.strxfrm(item)
for item
in entries[key]['sortingkey']
]
string = ur'\begin{idxblock}{' + letter + '}' + EOL
for key in sorted(entries.keys(), key=sortkey):
string += self.entry_to_str(key, entries[key])
for key in sorted(entries, key=sortkey):
string += self.entry_to_str(key, entries[key]['entries'])
string += ur'\end{idxblock}' + EOL
return string

1
patacrep/plastex.py

@ -39,6 +39,7 @@ def simpleparse(text):
"""Parse a simple LaTeX string.
"""
tex = TeX()
tex.disableLogging()
tex.input(text)
doc = tex.parse()
return process_unbr_spaces(doc.textContent)

134
patacrep/songs.py

@ -3,19 +3,121 @@
"""Song management."""
import errno
import hashlib
import logging
import os
import re
try:
import cPickle as pickle
except ImportError:
import pickle
from patacrep.authors import processauthors
from patacrep.plastex import parsetex
# pylint: disable=too-few-public-methods
LOGGER = logging.getLogger(__name__)
def cached_name(datadir, filename):
"""Return the filename of the cache version of the file."""
fullpath = os.path.abspath(os.path.join(datadir, '.cache', filename))
directory = os.path.dirname(fullpath)
try:
os.makedirs(directory)
except OSError as error:
if error.errno == errno.EEXIST and os.path.isdir(directory):
pass
else:
raise
return fullpath
class DataSubpath(object):
"""A path divided in two path: a datadir, and its subpath.
- This object can represent either a file or directory.
- If the datadir part is the empty string, it means that the represented
path does not belong to a datadir.
"""
def __init__(self, datadir, subpath):
if os.path.isabs(subpath):
self.datadir = ""
else:
self.datadir = datadir
self.subpath = subpath
def __str__(self):
return os.path.join(self.datadir, self.subpath)
@property
def fullpath(self):
"""Return the full path represented by self."""
return os.path.join(self.datadir, self.subpath)
def clone(self):
"""Return a cloned object."""
return DataSubpath(self.datadir, self.subpath)
def join(self, path):
"""Join "path" argument to self path.
Return self for commodity.
"""
self.subpath = os.path.join(self.subpath, path)
return self
# pylint: disable=too-few-public-methods, too-many-instance-attributes
class Song(object):
"""Song management"""
def __init__(self, filename, config):
# Version format of cached song. Increment this number if we update
# information stored in cache.
CACHE_VERSION = 0
# List of attributes to cache
cached_attributes = [
"titles",
"unprefixed_titles",
"args",
"datadir",
"fullpath",
"subpath",
"languages",
"authors",
"_filehash",
"_version",
]
def __init__(self, datadir, subpath, config):
self.fullpath = os.path.join(datadir, subpath)
if datadir:
# Only songs in datadirs are cached
self._filehash = hashlib.md5(
open(self.fullpath, 'rb').read()
).hexdigest()
if os.path.exists(cached_name(datadir, subpath)):
try:
cached = pickle.load(open(
cached_name(datadir, subpath),
'rb',
))
if (
cached['_filehash'] == self._filehash
and cached['_version'] == self.CACHE_VERSION
):
for attribute in self.cached_attributes:
setattr(self, attribute, cached[attribute])
return
except: # pylint: disable=bare-except
LOGGER.warning("Could not use cached version of {}.".format(
self.fullpath
))
# Data extraction from the song with plastex
data = parsetex(filename)
data = parsetex(self.fullpath)
self.titles = data['titles']
self.datadir = datadir
self.unprefixed_titles = [
unprefixed_title(
title,
@ -25,7 +127,7 @@ class Song(object):
in self.titles
]
self.args = data['args']
self.path = filename
self.subpath = subpath
self.languages = data['languages']
if "by" in self.args.keys():
self.authors = processauthors(
@ -35,8 +137,30 @@ class Song(object):
else:
self.authors = []
self._version = self.CACHE_VERSION
self._write_cache()
def _write_cache(self):
"""If relevant, write a dumbed down version of self to the cache."""
if self.datadir:
cached = {}
for attribute in self.cached_attributes:
if attribute == "args":
cached[attribute] = dict([
(key, u"{}".format(value)) # Force conversion to unicode
for (key, value)
in self.args.iteritems()
])
else:
cached[attribute] = getattr(self, attribute)
pickle.dump(
cached,
open(cached_name(self.datadir, self.subpath), 'wb'),
protocol=-1
)
def __repr__(self):
return repr((self.titles, self.args, self.path))
return repr((self.titles, self.args, self.fullpath))
def unprefixed_title(title, prefixes):
"""Remove the first prefix of the list in the beginning of title (if any).

Loading…
Cancel
Save