Les auteurs sont désormais triés par nom de famille

13 years ago · 0b962cbf46
6 changed files with 243 additions and 26 deletions
--- a/index.py
+++ b/index.py
@ -9,13 +9,15 @@
 #         src is the .sxd file generated by latex
 #

-from plasTeX.TeX import TeX
 from unidecode import unidecode
 import sys
 import re
 import locale
 import warnings

+from tools import processauthors
+from utils.plastex import simpleparse
+
 # Pattern set to ignore latex command in title prefix
 keywordPattern = re.compile(r"^%(\w+)\s?(.*)$")
 firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")
@ -26,9 +28,7 @@ def sortkey(value):
    don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
    the sort with  latex escape sequences.
    '''
-    tex = TeX()
-    tex.input(value)
-    return locale.strxfrm(unidecode(tex.parse().textContent.replace(' ', 'A')))
+    return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')))

 def processSXDEntry(tab):
    return (tab[0], tab[1], tab[2])
@ -40,9 +40,8 @@ def processSXD(filename):
        data.append(line.strip())
    file.close()

-    type = data[0]
    i = 1
-    idx = index()
+    idx = index(data[0])

    if len(data) > 1:
        while data[i].startswith('%'):
@ -54,12 +53,21 @@ def processSXD(filename):
    for i in range(i,len(data),3):
        entry = processSXDEntry(data[i:i+3])
        idx.add(entry[0],entry[1],entry[2])
+
    return idx

 class index:
-    def __init__(self):
+    def __init__(self, indextype):
        self.data = dict()
        self.keywords = dict()
+        if indextype == "TITLE INDEX DATA FILE":
+            self.indextype = "TITLE"
+        elif indextype == "SCRIPTURE INDEX DATA FILE":
+            self.indextype = "SCRIPTURE"
+        elif indextype == "AUTHOR INDEX DATA FILE":
+            self.indextype = "AUTHOR"
+        else:
+            self.indextype = ""

    def filter(self, key):
        letter = firstLetterPattern.match(key).group(1)
@ -74,16 +82,27 @@ class index:

    def compileKeywords(self):
        self.prefix_patterns = []
+        if self.indextype == "TITLE":
            if 'prefix' in self.keywords:
                for prefix in self.keywords['prefix']:
                    self.prefix_patterns.append(re.compile(r"^(%s)(\b|\\)(\s*.*)$" % prefix))

-    def add(self, key, number, link):
-        for pattern in self.prefix_patterns:
-            match = pattern.match(key)
-            if match:
-                key = "%s (%s)" % (match.group(2) + match.group(3), match.group(1))
-                break # Only one match per key
+        self.authwords = {"after": [], "ignore": [], "sep": []}
+        if self.indextype == "AUTHOR":
+            for key in self.keywords:
+                if key in self.authwords:
+                    self.authwords[key] = self.keywords[key]
+            for word in self.authwords.keys():
+                if word in self.keywords:
+                    if word == "after":
+                        self.authwords[word] = [re.compile(r"^.*%s\b(.*)" % after) for after in self.keywords[word]]
+                    elif word == "sep":
+                        self.authwords[word] = [" %s" % sep for sep in self.authwords[word]] + [","]
+                        self.authwords[word] = [re.compile(r"^(.*)%s (.*)$" % sep) for sep in self.authwords[word] ]
+                    else:
+                        self.authwords[word] = self.keywords[word]
+
+    def _raw_add(self, key, number, link):
        (first, key) = self.filter(key)
        if not self.data.has_key(first):
            self.data[first] = dict()
@ -91,6 +110,25 @@ class index:
            self.data[first][key] = []
        self.data[first][key].append({'num':number, 'link':link})

+    def add(self, key, number, link):
+        if self.indextype == "TITLE":
+            # Removing prefixes before titles
+            for pattern in self.prefix_patterns:
+                match = pattern.match(key)
+                if match:
+                    self._raw_add(
+                            "%s (%s)" % (match.group(2) + match.group(3), match.group(1)),
+                            number, link)
+                    return
+            self._raw_add(key, number, link)
+
+        if self.indextype == "AUTHOR":
+            # Processing authors
+            for author in processauthors(
+                    key,
+                    **self.authwords):
+                self._raw_add(author, number, link)
+
    def refToStr(self, ref):
        if sys.version_info >= (2,6):
            return '\\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
@ -99,9 +137,9 @@ class index:

    def entryToStr(self, key, entry):
        if sys.version_info >= (2,6):
-            return '\\idxentry{{{0}}}{{{1}}}\n'.format(key, '\\\\'.join(map(self.refToStr, entry)))
+            return unicode('\\idxentry{{{0}}}{{{1}}}\n').format(key, '\\\\'.join(map(self.refToStr, entry)))
        else:
-            return '\\idxentry{%s}{%s}\n' % (key, '\\\\'.join(map(self.refToStr, entry)))
+            return unicode('\\idxentry{%s}{%s}\n') % (key, '\\\\'.join(map(self.refToStr, entry)))

    def idxBlockToStr(self, letter, entries):
        str = '\\begin{idxblock}{'+letter+'}'+'\n'
--- a/songbook.py
+++ b/songbook.py
@ -11,7 +11,7 @@ import json
 import glob
 import re
 from subprocess import call
-from tools import recursiveFind
+from tools import recursiveFind, processauthors
 from index import *
 from unidecode import unidecode
 from utils.plastex import parsetex
@ -19,8 +19,10 @@ from utils.plastex import parsetex
 class Song:
    #: Ordre de tri
    sort = []
-    #: Préfixes à ignorer pour le tri
+    #: Préfixes à ignorer pour le tri par titres
    prefixes = []
+    #: Dictionnaire des options pour le traitement des auteurs
+    authwords = {"after": [], "ignore": [], "sep": []}

    def __init__(self, path, languages, titles, args):
        self.titles  = titles
@ -28,6 +30,14 @@ class Song:
        self.args   = args
        self.path   = path
        self.languages = languages
+        if "by" in self.args.keys():
+            self.normalized_authors = [
+                locale.strxfrm(author)
+                for author
+                in processauthors(self.args["by"], **self.authwords)
+                ]
+        else:
+            self.normalized_authors = []

    def __repr__(self):
        return repr((self.titles, self.args, self.path))
@ -40,8 +50,11 @@ class Song:
                self_key = self.normalized_titles
                other_key = other.normalized_titles
            elif key == "@path":
-                self.key = locale.strxfrm(self.path)
+                self_key = locale.strxfrm(self.path)
                other_key = locale.strxfrm(other.path)
+            elif key == "by":
+                self_key = self.normalized_authors
+                other_key = other.normalized_authors
            else:
                self_key = locale.strxfrm(self.args.get(key, ""))
                other_key = locale.strxfrm(other.args.get(key, ""))
@ -165,9 +178,13 @@ def makeTexFile(sb, library, output):
    # default value
    template = "patacrep.tmpl"
    songs = []
-    titleprefixwords = ""
+
+    prefixes_tex = ""
    prefixes = []

+    authwords_tex = ""
+    authwords = {"after": ["by"], "ignore": ["unknown"], "sep": ["and"]}
+
    # parse the songbook data
    if "template" in sb:
        template = sb["template"]
@ -178,8 +195,28 @@ def makeTexFile(sb, library, output):
    if "titleprefixwords" in sb:
        prefixes = sb["titleprefixwords"]
        for prefix in sb["titleprefixwords"]:
-            titleprefixwords += "\\titleprefixword{%s}\n" % prefix
-        sb["titleprefixwords"] = titleprefixwords
+            prefixes_tex += "\\titleprefixword{%s}\n" % prefix
+        sb["titleprefixwords"] = prefixes_tex
+    if "authwords" in sb:
+        # Populating default value
+        for key in ["after", "sep", "ignore"]:
+            if key not in sb["authwords"]:
+                sb["authwords"][key] = authwords[key]
+        # Processing authwords values
+        authwords = sb["authwords"]
+        for key in ["after", "sep", "ignore"]:
+            for word in authwords[key]:
+                if key == "after":
+                    authwords_tex += "\\auth%sword{%s}\n" % ("by", word)
+                else:
+                    authwords_tex += "\\auth%sword{%s}\n" % (key, word)
+        sb["authwords"] = authwords_tex
+    if "after" in authwords:
+        authwords["after"] = [re.compile(r"^.*%s\b(.*)" % after) for after in authwords["after"]]
+    if "sep" in authwords:
+        authwords["sep"] = [" %s" % sep for sep in authwords["sep"]] + [","]
+        authwords["sep"] = [re.compile(r"^(.*)%s (.*)$" % sep) for sep in authwords["sep"] ]
+
    if "lang" not in sb:
        sb["lang"] = "french"
    if "sort" in sb:
@ -189,6 +226,7 @@ def makeTexFile(sb, library, output):
        sort = [u"by", u"album", u"@title"]
    Song.sort = sort
    Song.prefixes = prefixes
+    Song.authwords = authwords

    parameters = parseTemplate("templates/"+template)

@ -327,7 +365,7 @@ def main():
        print "processing " + sxdFile
        idx = processSXD(sxdFile)
        indexFile = open(sxdFile[:-3]+"sbx", "w")
-        indexFile.write(idx.entriesToStr())
+        indexFile.write(idx.entriesToStr().encode('utf8'))
        indexFile.close()

    # Second pdflatex pass
--- a/templates/minimal.tmpl
+++ b/templates/minimal.tmpl
@ -31,6 +31,7 @@
 %%:  {"name":"bookoptions", "description":"Options", "type":"flag", "values":["diagram","importantdiagramonly","lilypond","pictures","tabs","repeatchords","onesongperpage"], "join":",", "mandatory":true, "default":["pictures"]},
 %%:  {"name":"mainfontsize", "description":"Font Size", "type":"font", "default":"10"},
 %%:  {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"},
+%%:  {"name":"authwords", "descriptipn":"Set of options to process author string (LaTeX commands authsepword, authignoreword, authbyword)"},
 %%:  {"name":"languages", "description":"List of languages used by songs", "default":""}
 %%:]
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -54,6 +55,7 @@
 }

 \gettitleprefixwords
+\getauthwords

 \nosongnumbers
 \pagestyle{empty}
--- a/templates/patacrep.tmpl
+++ b/templates/patacrep.tmpl
@ -44,6 +44,7 @@
 %%:  {"name":"notebgcolor", "description":"Note Shade", "type":"color", "default":"#D1E4AE"},
 %%:  {"name":"indexbgcolor", "description":"Index Shade", "type":"color", "default":"#D1E4AE"},
 %%:  {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"},
+%%:  {"name":"authwords", "descriptipn":"Set of options to process author string (LaTeX commands authsepword, authignoreword, authbyword)"},
 %%:  {"name":"languages", "description":"List of languages used by songs", "default":""}
 %%:]
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -90,6 +91,7 @@
 \renewcommand{\idxbgcolor}{IndexBgColor}

 \gettitleprefixwords
+\getauthwords

 \pagestyle{empty}

--- a/tools.py
+++ b/tools.py
@ -11,3 +11,132 @@ def recursiveFind(root_directory, pattern):
      for filename in fnmatch.filter(filenames, pattern):
         matches.append(os.path.join(root, filename))
   return matches
+
+def split_author_names(string):
+    """Split author between first and last name.
+
+    The last space separates first and last name, but spaces following a
+    backslash or a command are not separators.
+    Examples:
+    - Edgar Allan Poe => Poe, Edgar Allan
+    - Edgar Allan \emph {Poe} => \emph {Poe}, Edgar Allan
+    - The Rolling\ Stones => Rolling\ Stones, The
+    - The {Rolling Stones} => {Rolling Stones}, The
+    """
+    ignore_space = False
+    last_space = index = 0
+    brace_count = 0
+    for char in string:
+        index += 1
+        if brace_count == 0:
+            if char == "\\":
+                ignore_space = True
+            elif not char.isalnum() and ignore_space:
+                ignore_space = False
+            elif char == " ":
+                last_space = index
+        if char == "}":
+            brace_count += 1
+        if char == "{":
+            brace_count -= 1
+    return string[:last_space], string[last_space:]
+
+def split_sep_author(string, sep):
+    authors = []
+    match = sep.match(string)
+    while match:
+        authors.append(match.group(2))
+        string = match.group(1)
+        match = sep.match(string)
+    authors.append(string)
+    return authors
+
+def processauthors(authors_string, after = [], ignore = [], sep = []):
+    """Return a list of authors
+
+    For example, we are processing:
+    # processauthors(
+    #   "Lyrics by William Blake (from Milton, 1808), music by Hubert Parry (1916), and sung by The Royal\ Choir~of~Nowhere (just here to show you how processing is done)",
+    #   after = ["by"],
+    #   ignore = ["anonymous"],
+    #   sep = ["and"]
+    #   )
+
+    The "authors_string" string is processed as:
+
+    1) First, parenthesis (and its content) are removed.
+    # "Lyrics by William Blake, music by Hubert Parry, and sung by The Royal\ Choir~of~Nowhere"
+
+    2) String is split, separators being comma and words from "sep".
+    # ["Lyrics by William Blake", "music by Hubert Parry", "sung by The Royal\ Choir~of~Nowhere"]
+
+    3) Everything before words in "after" is removed.
+    # ["William Blake", "Hubert Parry", "The Royal\ Choir~of~Nowhere"]
+
+    4) Strings containing words of "ignore" are dropped.
+    # ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
+
+    5) First names are moved after last names
+    # ["Blake, William", "Parry, Hubert", Royal\ Choir~of~Nowhere, The"]
+    """
+
+    # Removing parentheses
+    opening = 0
+    dest = ""
+    for char in authors_string:
+        if char == '(':
+            opening += 1
+        elif char == ')' and opening > 0:
+            opening -= 1
+        elif opening == 0:
+            dest += char
+    authors_string = dest
+
+    # Splitting strings
+    authors_list = [authors_string]
+    for sepword in sep:
+        dest = []
+        for author in authors_list:
+            dest.extend(split_sep_author(author, sepword))
+        authors_list = dest
+
+    # Removing stuff before "after"
+    dest = []
+    for author in authors_list:
+        for afterword in after:
+            match = afterword.match(author)
+            if match:
+                author = match.group(1)
+                break
+        dest.append(author)
+    authors_list = dest
+
+    # Ignoring ignored authors
+    dest = []
+    for author in authors_list:
+        ignored = False
+        for ignoreword in ignore:
+            if author.find(str(ignoreword)) != -1:
+                ignored = True
+                break
+        if not ignored:
+            dest.append(author)
+    authors_list = dest
+
+    # Cleaning: removing empty authors and unnecessary spaces
+    authors_list = [author.lstrip() for author in authors_list if author.lstrip()]
+
+    # Moving first names after last names
+    dest = []
+    for author in authors_list:
+        first, last = split_author_names(author)
+        if first:
+            dest.append("%(last)s, %(first)s" % {
+                'first': first.lstrip(),
+                'last': last.lstrip(),
+                })
+        else:
+            dest.append(last.lstrip())
+    authors_list = dest
+
+    return authors_list
--- a/utils/plastex.py
+++ b/utils/plastex.py
@ -7,6 +7,14 @@ import copy
 import os
 import sys

+def simpleparse(text):
+    """Parse a simple LaTeX string.
+    """
+    tex = TeX()
+    tex.input(text)
+    doc = tex.parse()
+    return doc.textContent
+
 class SongParser:
    """Analyseur syntaxique de fichiers .sg"""