Les auteurs sont désormais triés par nom de famille

13 years ago · 0b962cbf46
6 changed files with 243 additions and 26 deletions
--- a/index.py
+++ b/index.py
@ -9,13 +9,15 @@
 #         src is the .sxd file generated by latex
 #
 from plasTeX.TeX import TeX
 from unidecode import unidecode
 import sys
 import re
 import locale
 import warnings
 from tools import processauthors
 from utils.plastex import simpleparse
 # Pattern set to ignore latex command in title prefix
 keywordPattern = re.compile(r"^%(\w+)\s?(.*)$")
 firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")
@ -26,9 +28,7 @@ def sortkey(value):
    don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
    the sort with  latex escape sequences.
    '''
-    tex = TeX()
+    return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')))
    tex.input(value)
    return locale.strxfrm(unidecode(tex.parse().textContent.replace(' ', 'A')))
 def processSXDEntry(tab):
    return (tab[0], tab[1], tab[2])
@ -40,9 +40,8 @@ def processSXD(filename):
        data.append(line.strip())
    file.close()
    type = data[0]
    i = 1
-    idx = index()
+    idx = index(data[0])
    if len(data) > 1:
        while data[i].startswith('%'):
@ -54,12 +53,21 @@ def processSXD(filename):
    for i in range(i,len(data),3):
        entry = processSXDEntry(data[i:i+3])
        idx.add(entry[0],entry[1],entry[2])
    return idx
 class index:
-    def __init__(self):
+    def __init__(self, indextype):
        self.data = dict()
        self.keywords = dict()
        if indextype == "TITLE INDEX DATA FILE":
            self.indextype = "TITLE"
        elif indextype == "SCRIPTURE INDEX DATA FILE":
            self.indextype = "SCRIPTURE"
        elif indextype == "AUTHOR INDEX DATA FILE":
            self.indextype = "AUTHOR"
        else:
            self.indextype = ""
    def filter(self, key):
        letter = firstLetterPattern.match(key).group(1)
@ -74,16 +82,27 @@ class index:
    def compileKeywords(self):
        self.prefix_patterns = []
        if self.indextype == "TITLE":
            if 'prefix' in self.keywords:
                for prefix in self.keywords['prefix']:
                    self.prefix_patterns.append(re.compile(r"^(%s)(\b|\\)(\s*.*)$" % prefix))
-    def add(self, key, number, link):
+        self.authwords = {"after": [], "ignore": [], "sep": []}
-        for pattern in self.prefix_patterns:
+        if self.indextype == "AUTHOR":
-            match = pattern.match(key)
+            for key in self.keywords:
-            if match:
+                if key in self.authwords:
-                key = "%s (%s)" % (match.group(2) + match.group(3), match.group(1))
+                    self.authwords[key] = self.keywords[key]
-                break # Only one match per key
+            for word in self.authwords.keys():
                if word in self.keywords:
                    if word == "after":
                        self.authwords[word] = [re.compile(r"^.*%s\b(.*)" % after) for after in self.keywords[word]]
                    elif word == "sep":
                        self.authwords[word] = [" %s" % sep for sep in self.authwords[word]] + [","]
                        self.authwords[word] = [re.compile(r"^(.*)%s (.*)$" % sep) for sep in self.authwords[word] ]
                    else:
                        self.authwords[word] = self.keywords[word]
    def _raw_add(self, key, number, link):
        (first, key) = self.filter(key)
        if not self.data.has_key(first):
            self.data[first] = dict()
@ -91,6 +110,25 @@ class index:
            self.data[first][key] = []
        self.data[first][key].append({'num':number, 'link':link})
    def add(self, key, number, link):
        if self.indextype == "TITLE":
            # Removing prefixes before titles
            for pattern in self.prefix_patterns:
                match = pattern.match(key)
                if match:
                    self._raw_add(
                            "%s (%s)" % (match.group(2) + match.group(3), match.group(1)),
                            number, link)
                    return
            self._raw_add(key, number, link)
        if self.indextype == "AUTHOR":
            # Processing authors
            for author in processauthors(
                    key,
                    **self.authwords):
                self._raw_add(author, number, link)
    def refToStr(self, ref):
        if sys.version_info >= (2,6):
            return '\\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
@ -99,9 +137,9 @@ class index:
    def entryToStr(self, key, entry):
        if sys.version_info >= (2,6):
-            return '\\idxentry{{{0}}}{{{1}}}\n'.format(key, '\\\\'.join(map(self.refToStr, entry)))
+            return unicode('\\idxentry{{{0}}}{{{1}}}\n').format(key, '\\\\'.join(map(self.refToStr, entry)))
        else:
-            return '\\idxentry{%s}{%s}\n' % (key, '\\\\'.join(map(self.refToStr, entry)))
+            return unicode('\\idxentry{%s}{%s}\n') % (key, '\\\\'.join(map(self.refToStr, entry)))
    def idxBlockToStr(self, letter, entries):
        str = '\\begin{idxblock}{'+letter+'}'+'\n'
--- a/songbook.py
+++ b/songbook.py
@ -11,7 +11,7 @@ import json
 import glob
 import re
 from subprocess import call
-from tools import recursiveFind
+from tools import recursiveFind, processauthors
 from index import *
 from unidecode import unidecode
 from utils.plastex import parsetex
@ -19,8 +19,10 @@ from utils.plastex import parsetex
 class Song:
    #: Ordre de tri
    sort = []
-    #: Préfixes à ignorer pour le tri
+    #: Préfixes à ignorer pour le tri par titres
    prefixes = []
    #: Dictionnaire des options pour le traitement des auteurs
    authwords = {"after": [], "ignore": [], "sep": []}
    def __init__(self, path, languages, titles, args):
        self.titles  = titles
@ -28,6 +30,14 @@ class Song:
        self.args   = args
        self.path   = path
        self.languages = languages
        if "by" in self.args.keys():
            self.normalized_authors = [
                locale.strxfrm(author)
                for author
                in processauthors(self.args["by"], **self.authwords)
                ]
        else:
            self.normalized_authors = []
    def __repr__(self):
        return repr((self.titles, self.args, self.path))
@ -40,8 +50,11 @@ class Song:
                self_key = self.normalized_titles
                other_key = other.normalized_titles
            elif key == "@path":
-                self.key = locale.strxfrm(self.path)
+                self_key = locale.strxfrm(self.path)
                other_key = locale.strxfrm(other.path)
            elif key == "by":
                self_key = self.normalized_authors
                other_key = other.normalized_authors
            else:
                self_key = locale.strxfrm(self.args.get(key, ""))
                other_key = locale.strxfrm(other.args.get(key, ""))
@ -165,9 +178,13 @@ def makeTexFile(sb, library, output):
    # default value
    template = "patacrep.tmpl"
    songs = []
-    titleprefixwords = ""
+
    prefixes_tex = ""
    prefixes = []
    authwords_tex = ""
    authwords = {"after": ["by"], "ignore": ["unknown"], "sep": ["and"]}
    # parse the songbook data
    if "template" in sb:
        template = sb["template"]
@ -178,8 +195,28 @@ def makeTexFile(sb, library, output):
    if "titleprefixwords" in sb:
        prefixes = sb["titleprefixwords"]
        for prefix in sb["titleprefixwords"]:
-            titleprefixwords += "\\titleprefixword{%s}\n" % prefix
+            prefixes_tex += "\\titleprefixword{%s}\n" % prefix
-        sb["titleprefixwords"] = titleprefixwords
+        sb["titleprefixwords"] = prefixes_tex
    if "authwords" in sb:
        # Populating default value
        for key in ["after", "sep", "ignore"]:
            if key not in sb["authwords"]:
                sb["authwords"][key] = authwords[key]
        # Processing authwords values
        authwords = sb["authwords"]
        for key in ["after", "sep", "ignore"]:
            for word in authwords[key]:
                if key == "after":
                    authwords_tex += "\\auth%sword{%s}\n" % ("by", word)
                else:
                    authwords_tex += "\\auth%sword{%s}\n" % (key, word)
        sb["authwords"] = authwords_tex
    if "after" in authwords:
        authwords["after"] = [re.compile(r"^.*%s\b(.*)" % after) for after in authwords["after"]]
    if "sep" in authwords:
        authwords["sep"] = [" %s" % sep for sep in authwords["sep"]] + [","]
        authwords["sep"] = [re.compile(r"^(.*)%s (.*)$" % sep) for sep in authwords["sep"] ]
    if "lang" not in sb:
        sb["lang"] = "french"
    if "sort" in sb:
@ -189,6 +226,7 @@ def makeTexFile(sb, library, output):
        sort = [u"by", u"album", u"@title"]
    Song.sort = sort
    Song.prefixes = prefixes
    Song.authwords = authwords
    parameters = parseTemplate("templates/"+template)
@ -327,7 +365,7 @@ def main():
        print "processing " + sxdFile
        idx = processSXD(sxdFile)
        indexFile = open(sxdFile[:-3]+"sbx", "w")
-        indexFile.write(idx.entriesToStr())
+        indexFile.write(idx.entriesToStr().encode('utf8'))
        indexFile.close()
    # Second pdflatex pass
--- a/templates/minimal.tmpl
+++ b/templates/minimal.tmpl
@ -31,6 +31,7 @@
 %%:  {"name":"bookoptions", "description":"Options", "type":"flag", "values":["diagram","importantdiagramonly","lilypond","pictures","tabs","repeatchords","onesongperpage"], "join":",", "mandatory":true, "default":["pictures"]},
 %%:  {"name":"mainfontsize", "description":"Font Size", "type":"font", "default":"10"},
 %%:  {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"},
 %%:  {"name":"authwords", "descriptipn":"Set of options to process author string (LaTeX commands authsepword, authignoreword, authbyword)"},
 %%:  {"name":"languages", "description":"List of languages used by songs", "default":""}
 %%:]
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -54,6 +55,7 @@
 }
 \gettitleprefixwords
 \getauthwords
 \nosongnumbers
 \pagestyle{empty}
--- a/templates/patacrep.tmpl
+++ b/templates/patacrep.tmpl
@ -44,6 +44,7 @@
 %%:  {"name":"notebgcolor", "description":"Note Shade", "type":"color", "default":"#D1E4AE"},
 %%:  {"name":"indexbgcolor", "description":"Index Shade", "type":"color", "default":"#D1E4AE"},
 %%:  {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"},
 %%:  {"name":"authwords", "descriptipn":"Set of options to process author string (LaTeX commands authsepword, authignoreword, authbyword)"},
 %%:  {"name":"languages", "description":"List of languages used by songs", "default":""}
 %%:]
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -90,6 +91,7 @@
 \renewcommand{\idxbgcolor}{IndexBgColor}
 \gettitleprefixwords
 \getauthwords
 \pagestyle{empty}
--- a/tools.py
+++ b/tools.py
@ -11,3 +11,132 @@ def recursiveFind(root_directory, pattern):
      for filename in fnmatch.filter(filenames, pattern):
         matches.append(os.path.join(root, filename))
   return matches
 def split_author_names(string):
    """Split author between first and last name.
    The last space separates first and last name, but spaces following a
    backslash or a command are not separators.
    Examples:
    - Edgar Allan Poe => Poe, Edgar Allan
    - Edgar Allan \emph {Poe} => \emph {Poe}, Edgar Allan
    - The Rolling\ Stones => Rolling\ Stones, The
    - The {Rolling Stones} => {Rolling Stones}, The
    """
    ignore_space = False
    last_space = index = 0
    brace_count = 0
    for char in string:
        index += 1
        if brace_count == 0:
            if char == "\\":
                ignore_space = True
            elif not char.isalnum() and ignore_space:
                ignore_space = False
            elif char == " ":
                last_space = index
        if char == "}":
            brace_count += 1
        if char == "{":
            brace_count -= 1
    return string[:last_space], string[last_space:]
 def split_sep_author(string, sep):
    authors = []
    match = sep.match(string)
    while match:
        authors.append(match.group(2))
        string = match.group(1)
        match = sep.match(string)
    authors.append(string)
    return authors
 def processauthors(authors_string, after = [], ignore = [], sep = []):
    """Return a list of authors
    For example, we are processing:
    # processauthors(
    #   "Lyrics by William Blake (from Milton, 1808), music by Hubert Parry (1916), and sung by The Royal\ Choir~of~Nowhere (just here to show you how processing is done)",
    #   after = ["by"],
    #   ignore = ["anonymous"],
    #   sep = ["and"]
    #   )
    The "authors_string" string is processed as:
    1) First, parenthesis (and its content) are removed.
    # "Lyrics by William Blake, music by Hubert Parry, and sung by The Royal\ Choir~of~Nowhere"
    2) String is split, separators being comma and words from "sep".
    # ["Lyrics by William Blake", "music by Hubert Parry", "sung by The Royal\ Choir~of~Nowhere"]
    3) Everything before words in "after" is removed.
    # ["William Blake", "Hubert Parry", "The Royal\ Choir~of~Nowhere"]
    4) Strings containing words of "ignore" are dropped.
    # ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
    5) First names are moved after last names
    # ["Blake, William", "Parry, Hubert", Royal\ Choir~of~Nowhere, The"]
    """
    # Removing parentheses
    opening = 0
    dest = ""
    for char in authors_string:
        if char == '(':
            opening += 1
        elif char == ')' and opening > 0:
            opening -= 1
        elif opening == 0:
            dest += char
    authors_string = dest
    # Splitting strings
    authors_list = [authors_string]
    for sepword in sep:
        dest = []
        for author in authors_list:
            dest.extend(split_sep_author(author, sepword))
        authors_list = dest
    # Removing stuff before "after"
    dest = []
    for author in authors_list:
        for afterword in after:
            match = afterword.match(author)
            if match:
                author = match.group(1)
                break
        dest.append(author)
    authors_list = dest
    # Ignoring ignored authors
    dest = []
    for author in authors_list:
        ignored = False
        for ignoreword in ignore:
            if author.find(str(ignoreword)) != -1:
                ignored = True
                break
        if not ignored:
            dest.append(author)
    authors_list = dest
    # Cleaning: removing empty authors and unnecessary spaces
    authors_list = [author.lstrip() for author in authors_list if author.lstrip()]
    # Moving first names after last names
    dest = []
    for author in authors_list:
        first, last = split_author_names(author)
        if first:
            dest.append("%(last)s, %(first)s" % {
                'first': first.lstrip(),
                'last': last.lstrip(),
                })
        else:
            dest.append(last.lstrip())
    authors_list = dest
    return authors_list
--- a/utils/plastex.py
+++ b/utils/plastex.py
@ -7,6 +7,14 @@ import copy
 import os
 import sys
 def simpleparse(text):
    """Parse a simple LaTeX string.
    """
    tex = TeX()
    tex.input(text)
    doc = tex.parse()
    return doc.textContent
 class SongParser:
    """Analyseur syntaxique de fichiers .sg"""