Merge pull request #51 from patacrep/cache

Mise en place d'un cache
11 years ago · a3beec2148
12 changed files with 243 additions and 76 deletions
--- a/patacrep/authors.py
+++ b/patacrep/authors.py
@ -64,7 +64,7 @@ def split_author_names(string):
            brace_count += 1
        if char == "{":
            brace_count -= 1
-    return string[:last_space], string[last_space:]
+    return string[last_space:], string[:last_space]
 def split_sep_author(string, sep):
@ -162,23 +162,6 @@ def processauthors_clean_authors(authors_list):
            if author.lstrip()
            ]
 def processauthors_invert_names(authors_list):
    """Move first names after last names
    See docstring of processauthors() for more information.
    """
    dest = []
    for author in authors_list:
        first, last = split_author_names(author)
        if first:
            dest.append(ur"\indexauthor{{{first}}}{{{last}}}".format(
                first=first.strip(),
                last=last.strip(),
                ))
        else:
            dest.append(last.lstrip())
    return dest
 def processauthors(authors_string, after=None, ignore=None, sep=None):
    r"""Return a list of authors
@ -210,10 +193,12 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
    4) Strings containing words of "ignore" are dropped.
    # ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
-    5) First and last names are processed through LaTeX command \indexauthor
+    5) First and last names are splitted
-        (which will, by default, invert first and last names).
+    # [
-    # ["\indexauthor{William}{Blake}", "\indexauthor{Hubert}{Parry}",
+    #   ("Blake", "William"),
-    # \indexthaor{The}{Royal\ Choir~of~Nowhere}"]
+    #   ("Parry", "Hubert"),
    #   ("Royal\ Choir~of~Nowhere", "The"),
    # ]
    """
    if not sep:
@ -223,8 +208,10 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
    if not ignore:
        ignore = []
-    return processauthors_invert_names(
+    return [
-            processauthors_clean_authors(
+            split_author_names(author)
            for author
            in processauthors_clean_authors(
                processauthors_ignore_authors(
                    processauthors_remove_after(
                        processauthors_split_string(
@ -235,5 +222,4 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
                        after),
                    ignore)
                )
-            )
+            ]
--- a/patacrep/build.py
+++ b/patacrep/build.py
@ -13,6 +13,7 @@ from subprocess import Popen, PIPE, call
 from patacrep import __DATADIR__, authors, content, errors
 from patacrep.index import process_sxd
 from patacrep.templates import TexRenderer
 from patacrep.songs import DataSubpath
 LOGGER = logging.getLogger(__name__)
 EOL = "\n"
@ -75,7 +76,7 @@ class Songbook(object):
        self.config['datadir'] = abs_datadir
        self.config['_songdir'] = [
-                os.path.join(path, 'songs')
+                DataSubpath(path, 'songs')
                for path in self.config['datadir']
                ]
--- a/patacrep/content/cwd.py
+++ b/patacrep/content/cwd.py
@ -3,9 +3,8 @@
 """Change base directory before importing songs."""
 import os
 from patacrep.content import process_content
 from patacrep.songs import DataSubpath
 #pylint: disable=unused-argument
 def parse(keyword, config, argument, contentlist):
@ -28,8 +27,8 @@ def parse(keyword, config, argument, contentlist):
    """
    old_songdir = config['_songdir']
    config['_songdir'] = (
-            [argument] +
+            [DataSubpath("", argument)] +
-            [os.path.join(path, argument) for path in config['_songdir']] +
+            [path.clone().join(argument) for path in config['_songdir']] +
            config['_songdir']
            )
    processed_content = process_content(contentlist, config)
--- a/patacrep/content/include.py
+++ b/patacrep/content/include.py
@ -17,6 +17,10 @@ from patacrep import encoding
 LOGGER = logging.getLogger(__name__)
 def load_from_datadirs(path, config=None):
    """Load 'path' from one of the datadirs.
    Raise an exception if it was found if none of the datadirs of 'config'.
    """
    for datadir in config.get("datadir", []):
        filepath = os.path.join(datadir, path)
        if os.path.exists(filepath):
--- a/patacrep/content/song.py
+++ b/patacrep/content/song.py
@ -35,7 +35,7 @@ class SongRenderer(Content, Song):
    def render(self, context):
        """Return the string that will render the song."""
        return ur'\input{{{}}}'.format(files.relpath(
-            self.path,
+            self.fullpath,
            os.path.dirname(context['filename'])
            ))
@ -59,21 +59,28 @@ def parse(keyword, argument, contentlist, config):
        if contentlist:
            break
        contentlist = [
-                files.relpath(filename, songdir)
+                filename
                for filename
                in (
-                    files.recursive_find(songdir, "*.sg")
+                    files.recursive_find(songdir.fullpath, "*.sg")
-                    + files.recursive_find(songdir, "*.is")
+                    + files.recursive_find(songdir.fullpath, "*.is")
                    )
                ]
    for elem in contentlist:
        before = len(songlist)
        for songdir in config['_songdir']:
-            for filename in glob.iglob(os.path.join(songdir, elem)):
+            if songdir.datadir and not os.path.isdir(songdir.datadir):
-                LOGGER.debug('Parsing file "{}"…'.format(filename))
+                continue
-                song = SongRenderer(filename, config)
+            with files.chdir(songdir.datadir):
-                songlist.append(song)
+                for filename in glob.iglob(os.path.join(songdir.subpath, elem)):
-                config["_languages"].update(song.languages)
+                    LOGGER.debug('Parsing file "{}"…'.format(filename))
                    song = SongRenderer(
                            songdir.datadir,
                            filename,
                            config,
                            )
                    songlist.append(song)
                    config["_languages"].update(song.languages)
            if len(songlist) > before:
                break
        if len(songlist) == before:
--- a/patacrep/content/sorted.py
+++ b/patacrep/content/sorted.py
@ -33,8 +33,8 @@ def normalize_field(field):
    """Return a normalized field, it being a string or a list of strings."""
    if isinstance(field, basestring):
        return normalize_string(field)
-    elif isinstance(field, list):
+    elif isinstance(field, list) or isinstance(field, tuple):
-        return [normalize_string(string) for string in field]
+        return [normalize_field(string) for string in field]
 def key_generator(sort):
    """Return a function that returns the list of values used to sort the song.
@ -50,7 +50,7 @@ def key_generator(sort):
            if key == "@title":
                field = song.unprefixed_titles
            elif key == "@path":
-                field = song.path
+                field = song.fullpath
            elif key == "by":
                field = song.authors
            else:
@ -60,7 +60,7 @@ def key_generator(sort):
                    LOGGER.debug(
                            "Ignoring unknown key '{}' for song {}.".format(
                                key,
-                                files.relpath(song.path),
+                                files.relpath(song.fullpath),
                                )
                            )
                    field = u""
--- a/patacrep/content/tex.py
+++ b/patacrep/content/tex.py
@ -41,8 +41,11 @@ def parse(keyword, argument, contentlist, config):
    for filename in contentlist:
        checked_file = None
        for path in config['_songdir']:
-            if os.path.exists(os.path.join(path, filename)):
+            if os.path.exists(os.path.join(path.fullpath, filename)):
-                checked_file = os.path.relpath(os.path.join(path, filename))
+                checked_file = os.path.relpath(os.path.join(
                    path.fullpath,
                    filename,
                    ))
                break
        if not checked_file:
            LOGGER.warning(
--- a/patacrep/data/examples/.gitignore
+++ b/patacrep/data/examples/.gitignore
@ -0,0 +1 @@
 /.cache
--- a/patacrep/files.py
+++ b/patacrep/files.py
@ -4,6 +4,7 @@
 """File system utilities."""
 from contextlib import contextmanager
 import fnmatch
 import os
@ -12,10 +13,14 @@ def recursive_find(root_directory, pattern):
    Return a list of files matching the pattern.
    """
    if not os.path.isdir(root_directory):
        return []
    matches = []
-    for root, _, filenames in os.walk(root_directory):
+    with chdir(root_directory):
-        for filename in fnmatch.filter(filenames, pattern):
+        for root, _, filenames in os.walk(os.curdir):
-            matches.append(os.path.join(root, filename))
+            for filename in fnmatch.filter(filenames, pattern):
                matches.append(os.path.join(root, filename))
    return matches
 def relpath(path, start=None):
@ -26,3 +31,22 @@ def relpath(path, start=None):
        return os.path.relpath(path, start)
    else:
        return os.path.abspath(path)
@contextmanager
 def chdir(path):
    """Locally change dir
    Can be used as:
        with chdir("some/directory"):
            do_stuff()
    """
    olddir = os.getcwd()
    if path:
        os.chdir(path)
        yield
        os.chdir(olddir)
    else:
        yield
--- a/patacrep/index.py
+++ b/patacrep/index.py
@ -22,18 +22,6 @@ KEYWORD_PATTERN = re.compile(ur"^%(\w+)\s?(.*)$", re.LOCALE)
 FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE)
 def sortkey(value):
    """From a title, return something usable for sorting.
    It handles locale (but
    don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
    the sort with  latex escape sequences.
    """
    return locale.strxfrm(
            encoding.unidecode(simpleparse(value).replace(' ', 'A')).lower()
            )
 def process_sxd(filename):
    """Parse sxd file.
@ -115,12 +103,18 @@ class Index(object):
        No processing is done on data. It is added raw. See add() for a
        similar method with processing.
        """
-        first = self.get_first_letter(key)
+        first = self.get_first_letter(key[0])
        if not first in self.data.keys():
            self.data[first] = dict()
        if not key in self.data[first].keys():
-            self.data[first][key] = []
+            self.data[first][key] = {
-        self.data[first][key].append({'num': number, 'link': link})
+                    'sortingkey': [
                        encoding.unidecode(simpleparse(item)).lower()
                        for item in key
                        ],
                    'entries': [],
                    }
        self.data[first][key]['entries'].append({'num': number, 'link': link})
    def add(self, key, number, link):
        """Add a song to the list.
@ -133,15 +127,15 @@ class Index(object):
                match = pattern.match(key)
                if match:
                    self._raw_add(
-                            ur"\indextitle{{{}}}{{{}}}".format(
+                            (
                                match.group(1).strip(),
-                                (match.group(2) + match.group(3)).strip(),
+                                (match.group(2) + match.group(3)).strip()
-                                ),
+                            ),
                            number,
                            link
                            )
                    return
-            self._raw_add(key, number, link)
+            self._raw_add((key, ""), number, link)
        if self.indextype == "AUTHOR":
            # Processing authors
@ -155,10 +149,26 @@ class Index(object):
        """Return the LaTeX code corresponding to the reference."""
        return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
    def key_to_str(self, key):
        """Convert the key (title or author) to the LaTeX command rendering it.
        """
        if self.indextype == "AUTHOR":
            if key[1]:
                return ur"\indexauthor{{{first}}}{{{last}}}".format(
                    first=key[1],
                    last=key[0],
                    )
            else:
                return key[0]
        if self.indextype == "TITLE":
            return ur"\indextitle{{{0[0]}}}{{{0[1]}}}".format(key)
    def entry_to_str(self, key, entry):
        """Return the LaTeX code corresponding to the entry."""
        return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format(
-                key,
+                self.key_to_str(key),
                ur'\\'.join([self.ref_to_str(ref) for ref in entry]),
                )
@ -168,9 +178,16 @@ class Index(object):
        Here, an index block is a letter, and all data beginning with this
        letter.
        """
        def sortkey(key):
            """Return something sortable for `entries[key]`."""
            return [
                    locale.strxfrm(item)
                    for item
                    in entries[key]['sortingkey']
                    ]
        string = ur'\begin{idxblock}{' + letter + '}' + EOL
-        for key in sorted(entries.keys(), key=sortkey):
+        for key in sorted(entries, key=sortkey):
-            string += self.entry_to_str(key, entries[key])
+            string += self.entry_to_str(key, entries[key]['entries'])
        string += ur'\end{idxblock}' + EOL
        return string
--- a/patacrep/plastex.py
+++ b/patacrep/plastex.py
@ -39,6 +39,7 @@ def simpleparse(text):
    """Parse a simple LaTeX string.
    """
    tex = TeX()
    tex.disableLogging()
    tex.input(text)
    doc = tex.parse()
    return process_unbr_spaces(doc.textContent)
--- a/patacrep/songs.py
+++ b/patacrep/songs.py
@ -3,19 +3,121 @@
 """Song management."""
 import errno
 import hashlib
 import logging
 import os
 import re
 try:
    import cPickle as pickle
 except ImportError:
    import pickle
 from patacrep.authors import processauthors
 from patacrep.plastex import parsetex
-# pylint: disable=too-few-public-methods
+LOGGER = logging.getLogger(__name__)
 def cached_name(datadir, filename):
    """Return the filename of the cache version of the file."""
    fullpath = os.path.abspath(os.path.join(datadir, '.cache', filename))
    directory = os.path.dirname(fullpath)
    try:
        os.makedirs(directory)
    except OSError as error:
        if error.errno == errno.EEXIST and os.path.isdir(directory):
            pass
        else:
            raise
    return fullpath
 class DataSubpath(object):
    """A path divided in two path: a datadir, and its subpath.
    - This object can represent either a file or directory.
    - If the datadir part is the empty string, it means that the represented
      path does not belong to a datadir.
    """
    def __init__(self, datadir, subpath):
        if os.path.isabs(subpath):
            self.datadir = ""
        else:
            self.datadir = datadir
        self.subpath = subpath
    def __str__(self):
        return os.path.join(self.datadir, self.subpath)
    @property
    def fullpath(self):
        """Return the full path represented by self."""
        return os.path.join(self.datadir, self.subpath)
    def clone(self):
        """Return a cloned object."""
        return DataSubpath(self.datadir, self.subpath)
    def join(self, path):
        """Join "path" argument to self path.
        Return self for commodity.
        """
        self.subpath = os.path.join(self.subpath, path)
        return self
 # pylint: disable=too-few-public-methods, too-many-instance-attributes
 class Song(object):
    """Song management"""
-    def __init__(self, filename, config):
+    # Version format of cached song. Increment this number if we update
    # information stored in cache.
    CACHE_VERSION = 0
    # List of attributes to cache
    cached_attributes = [
            "titles",
            "unprefixed_titles",
            "args",
            "datadir",
            "fullpath",
            "subpath",
            "languages",
            "authors",
            "_filehash",
            "_version",
            ]
    def __init__(self, datadir, subpath, config):
        self.fullpath = os.path.join(datadir, subpath)
        if datadir:
            # Only songs in datadirs are cached
            self._filehash = hashlib.md5(
                    open(self.fullpath, 'rb').read()
                    ).hexdigest()
            if os.path.exists(cached_name(datadir, subpath)):
                try:
                    cached = pickle.load(open(
                        cached_name(datadir, subpath),
                        'rb',
                        ))
                    if (
                            cached['_filehash'] == self._filehash
                            and cached['_version'] == self.CACHE_VERSION
                            ):
                        for attribute in self.cached_attributes:
                            setattr(self, attribute, cached[attribute])
                        return
                except: # pylint: disable=bare-except
                    LOGGER.warning("Could not use cached version of {}.".format(
                        self.fullpath
                        ))
        # Data extraction from the song with plastex
-        data = parsetex(filename)
+        data = parsetex(self.fullpath)
        self.titles = data['titles']
        self.datadir = datadir
        self.unprefixed_titles = [
                unprefixed_title(
                    title,
@ -25,7 +127,7 @@ class Song(object):
                in self.titles
                ]
        self.args = data['args']
-        self.path = filename
+        self.subpath = subpath
        self.languages = data['languages']
        if "by" in self.args.keys():
            self.authors = processauthors(
@ -35,8 +137,30 @@ class Song(object):
        else:
            self.authors = []
        self._version = self.CACHE_VERSION
        self._write_cache()
    def _write_cache(self):
        """If relevant, write a dumbed down version of self to the cache."""
        if self.datadir:
            cached = {}
            for attribute in self.cached_attributes:
                if attribute == "args":
                    cached[attribute] = dict([
                        (key, u"{}".format(value)) # Force conversion to unicode
                        for (key, value)
                        in self.args.iteritems()
                        ])
                else:
                    cached[attribute] = getattr(self, attribute)
            pickle.dump(
                    cached,
                    open(cached_name(self.datadir, self.subpath), 'wb'),
                    protocol=-1
                    )
    def __repr__(self):
-        return repr((self.titles, self.args, self.path))
+        return repr((self.titles, self.args, self.fullpath))
 def unprefixed_title(title, prefixes):
    """Remove the first prefix of the list in the beginning of title (if any).