Browse Source

Les auteurs sont désormais triés par nom de famille

remotes/origin/next
Louis 12 years ago
parent
commit
0b962cbf46
  1. 68
      index.py
  2. 52
      songbook.py
  3. 2
      templates/minimal.tmpl
  4. 2
      templates/patacrep.tmpl
  5. 129
      tools.py
  6. 8
      utils/plastex.py

68
index.py

@ -9,13 +9,15 @@
# src is the .sxd file generated by latex
#
from plasTeX.TeX import TeX
from unidecode import unidecode
import sys
import re
import locale
import warnings
from tools import processauthors
from utils.plastex import simpleparse
# Pattern set to ignore latex command in title prefix
keywordPattern = re.compile(r"^%(\w+)\s?(.*)$")
firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")
@ -26,9 +28,7 @@ def sortkey(value):
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
the sort with latex escape sequences.
'''
tex = TeX()
tex.input(value)
return locale.strxfrm(unidecode(tex.parse().textContent.replace(' ', 'A')))
return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')))
def processSXDEntry(tab):
return (tab[0], tab[1], tab[2])
@ -40,9 +40,8 @@ def processSXD(filename):
data.append(line.strip())
file.close()
type = data[0]
i = 1
idx = index()
idx = index(data[0])
if len(data) > 1:
while data[i].startswith('%'):
@ -54,12 +53,21 @@ def processSXD(filename):
for i in range(i,len(data),3):
entry = processSXDEntry(data[i:i+3])
idx.add(entry[0],entry[1],entry[2])
return idx
class index:
def __init__(self):
def __init__(self, indextype):
self.data = dict()
self.keywords = dict()
if indextype == "TITLE INDEX DATA FILE":
self.indextype = "TITLE"
elif indextype == "SCRIPTURE INDEX DATA FILE":
self.indextype = "SCRIPTURE"
elif indextype == "AUTHOR INDEX DATA FILE":
self.indextype = "AUTHOR"
else:
self.indextype = ""
def filter(self, key):
letter = firstLetterPattern.match(key).group(1)
@ -74,16 +82,27 @@ class index:
def compileKeywords(self):
self.prefix_patterns = []
if self.indextype == "TITLE":
if 'prefix' in self.keywords:
for prefix in self.keywords['prefix']:
self.prefix_patterns.append(re.compile(r"^(%s)(\b|\\)(\s*.*)$" % prefix))
def add(self, key, number, link):
for pattern in self.prefix_patterns:
match = pattern.match(key)
if match:
key = "%s (%s)" % (match.group(2) + match.group(3), match.group(1))
break # Only one match per key
self.authwords = {"after": [], "ignore": [], "sep": []}
if self.indextype == "AUTHOR":
for key in self.keywords:
if key in self.authwords:
self.authwords[key] = self.keywords[key]
for word in self.authwords.keys():
if word in self.keywords:
if word == "after":
self.authwords[word] = [re.compile(r"^.*%s\b(.*)" % after) for after in self.keywords[word]]
elif word == "sep":
self.authwords[word] = [" %s" % sep for sep in self.authwords[word]] + [","]
self.authwords[word] = [re.compile(r"^(.*)%s (.*)$" % sep) for sep in self.authwords[word] ]
else:
self.authwords[word] = self.keywords[word]
def _raw_add(self, key, number, link):
(first, key) = self.filter(key)
if not self.data.has_key(first):
self.data[first] = dict()
@ -91,6 +110,25 @@ class index:
self.data[first][key] = []
self.data[first][key].append({'num':number, 'link':link})
def add(self, key, number, link):
if self.indextype == "TITLE":
# Removing prefixes before titles
for pattern in self.prefix_patterns:
match = pattern.match(key)
if match:
self._raw_add(
"%s (%s)" % (match.group(2) + match.group(3), match.group(1)),
number, link)
return
self._raw_add(key, number, link)
if self.indextype == "AUTHOR":
# Processing authors
for author in processauthors(
key,
**self.authwords):
self._raw_add(author, number, link)
def refToStr(self, ref):
if sys.version_info >= (2,6):
return '\\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
@ -99,9 +137,9 @@ class index:
def entryToStr(self, key, entry):
if sys.version_info >= (2,6):
return '\\idxentry{{{0}}}{{{1}}}\n'.format(key, '\\\\'.join(map(self.refToStr, entry)))
return unicode('\\idxentry{{{0}}}{{{1}}}\n').format(key, '\\\\'.join(map(self.refToStr, entry)))
else:
return '\\idxentry{%s}{%s}\n' % (key, '\\\\'.join(map(self.refToStr, entry)))
return unicode('\\idxentry{%s}{%s}\n') % (key, '\\\\'.join(map(self.refToStr, entry)))
def idxBlockToStr(self, letter, entries):
str = '\\begin{idxblock}{'+letter+'}'+'\n'

52
songbook.py

@ -11,7 +11,7 @@ import json
import glob
import re
from subprocess import call
from tools import recursiveFind
from tools import recursiveFind, processauthors
from index import *
from unidecode import unidecode
from utils.plastex import parsetex
@ -19,8 +19,10 @@ from utils.plastex import parsetex
class Song:
#: Ordre de tri
sort = []
#: Préfixes à ignorer pour le tri
#: Préfixes à ignorer pour le tri par titres
prefixes = []
#: Dictionnaire des options pour le traitement des auteurs
authwords = {"after": [], "ignore": [], "sep": []}
def __init__(self, path, languages, titles, args):
self.titles = titles
@ -28,6 +30,14 @@ class Song:
self.args = args
self.path = path
self.languages = languages
if "by" in self.args.keys():
self.normalized_authors = [
locale.strxfrm(author)
for author
in processauthors(self.args["by"], **self.authwords)
]
else:
self.normalized_authors = []
def __repr__(self):
return repr((self.titles, self.args, self.path))
@ -40,8 +50,11 @@ class Song:
self_key = self.normalized_titles
other_key = other.normalized_titles
elif key == "@path":
self.key = locale.strxfrm(self.path)
self_key = locale.strxfrm(self.path)
other_key = locale.strxfrm(other.path)
elif key == "by":
self_key = self.normalized_authors
other_key = other.normalized_authors
else:
self_key = locale.strxfrm(self.args.get(key, ""))
other_key = locale.strxfrm(other.args.get(key, ""))
@ -165,9 +178,13 @@ def makeTexFile(sb, library, output):
# default value
template = "patacrep.tmpl"
songs = []
titleprefixwords = ""
prefixes_tex = ""
prefixes = []
authwords_tex = ""
authwords = {"after": ["by"], "ignore": ["unknown"], "sep": ["and"]}
# parse the songbook data
if "template" in sb:
template = sb["template"]
@ -178,8 +195,28 @@ def makeTexFile(sb, library, output):
if "titleprefixwords" in sb:
prefixes = sb["titleprefixwords"]
for prefix in sb["titleprefixwords"]:
titleprefixwords += "\\titleprefixword{%s}\n" % prefix
sb["titleprefixwords"] = titleprefixwords
prefixes_tex += "\\titleprefixword{%s}\n" % prefix
sb["titleprefixwords"] = prefixes_tex
if "authwords" in sb:
# Populating default value
for key in ["after", "sep", "ignore"]:
if key not in sb["authwords"]:
sb["authwords"][key] = authwords[key]
# Processing authwords values
authwords = sb["authwords"]
for key in ["after", "sep", "ignore"]:
for word in authwords[key]:
if key == "after":
authwords_tex += "\\auth%sword{%s}\n" % ("by", word)
else:
authwords_tex += "\\auth%sword{%s}\n" % (key, word)
sb["authwords"] = authwords_tex
if "after" in authwords:
authwords["after"] = [re.compile(r"^.*%s\b(.*)" % after) for after in authwords["after"]]
if "sep" in authwords:
authwords["sep"] = [" %s" % sep for sep in authwords["sep"]] + [","]
authwords["sep"] = [re.compile(r"^(.*)%s (.*)$" % sep) for sep in authwords["sep"] ]
if "lang" not in sb:
sb["lang"] = "french"
if "sort" in sb:
@ -189,6 +226,7 @@ def makeTexFile(sb, library, output):
sort = [u"by", u"album", u"@title"]
Song.sort = sort
Song.prefixes = prefixes
Song.authwords = authwords
parameters = parseTemplate("templates/"+template)
@ -327,7 +365,7 @@ def main():
print "processing " + sxdFile
idx = processSXD(sxdFile)
indexFile = open(sxdFile[:-3]+"sbx", "w")
indexFile.write(idx.entriesToStr())
indexFile.write(idx.entriesToStr().encode('utf8'))
indexFile.close()
# Second pdflatex pass

2
templates/minimal.tmpl

@ -31,6 +31,7 @@
%%: {"name":"bookoptions", "description":"Options", "type":"flag", "values":["diagram","importantdiagramonly","lilypond","pictures","tabs","repeatchords","onesongperpage"], "join":",", "mandatory":true, "default":["pictures"]},
%%: {"name":"mainfontsize", "description":"Font Size", "type":"font", "default":"10"},
%%: {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"},
%%: {"name":"authwords", "descriptipn":"Set of options to process author string (LaTeX commands authsepword, authignoreword, authbyword)"},
%%: {"name":"languages", "description":"List of languages used by songs", "default":""}
%%:]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -54,6 +55,7 @@
}
\gettitleprefixwords
\getauthwords
\nosongnumbers
\pagestyle{empty}

2
templates/patacrep.tmpl

@ -44,6 +44,7 @@
%%: {"name":"notebgcolor", "description":"Note Shade", "type":"color", "default":"#D1E4AE"},
%%: {"name":"indexbgcolor", "description":"Index Shade", "type":"color", "default":"#D1E4AE"},
%%: {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"},
%%: {"name":"authwords", "descriptipn":"Set of options to process author string (LaTeX commands authsepword, authignoreword, authbyword)"},
%%: {"name":"languages", "description":"List of languages used by songs", "default":""}
%%:]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -90,6 +91,7 @@
\renewcommand{\idxbgcolor}{IndexBgColor}
\gettitleprefixwords
\getauthwords
\pagestyle{empty}

129
tools.py

@ -11,3 +11,132 @@ def recursiveFind(root_directory, pattern):
for filename in fnmatch.filter(filenames, pattern):
matches.append(os.path.join(root, filename))
return matches
def split_author_names(string):
"""Split author between first and last name.
The last space separates first and last name, but spaces following a
backslash or a command are not separators.
Examples:
- Edgar Allan Poe => Poe, Edgar Allan
- Edgar Allan \emph {Poe} => \emph {Poe}, Edgar Allan
- The Rolling\ Stones => Rolling\ Stones, The
- The {Rolling Stones} => {Rolling Stones}, The
"""
ignore_space = False
last_space = index = 0
brace_count = 0
for char in string:
index += 1
if brace_count == 0:
if char == "\\":
ignore_space = True
elif not char.isalnum() and ignore_space:
ignore_space = False
elif char == " ":
last_space = index
if char == "}":
brace_count += 1
if char == "{":
brace_count -= 1
return string[:last_space], string[last_space:]
def split_sep_author(string, sep):
authors = []
match = sep.match(string)
while match:
authors.append(match.group(2))
string = match.group(1)
match = sep.match(string)
authors.append(string)
return authors
def processauthors(authors_string, after = [], ignore = [], sep = []):
"""Return a list of authors
For example, we are processing:
# processauthors(
# "Lyrics by William Blake (from Milton, 1808), music by Hubert Parry (1916), and sung by The Royal\ Choir~of~Nowhere (just here to show you how processing is done)",
# after = ["by"],
# ignore = ["anonymous"],
# sep = ["and"]
# )
The "authors_string" string is processed as:
1) First, parenthesis (and its content) are removed.
# "Lyrics by William Blake, music by Hubert Parry, and sung by The Royal\ Choir~of~Nowhere"
2) String is split, separators being comma and words from "sep".
# ["Lyrics by William Blake", "music by Hubert Parry", "sung by The Royal\ Choir~of~Nowhere"]
3) Everything before words in "after" is removed.
# ["William Blake", "Hubert Parry", "The Royal\ Choir~of~Nowhere"]
4) Strings containing words of "ignore" are dropped.
# ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
5) First names are moved after last names
# ["Blake, William", "Parry, Hubert", Royal\ Choir~of~Nowhere, The"]
"""
# Removing parentheses
opening = 0
dest = ""
for char in authors_string:
if char == '(':
opening += 1
elif char == ')' and opening > 0:
opening -= 1
elif opening == 0:
dest += char
authors_string = dest
# Splitting strings
authors_list = [authors_string]
for sepword in sep:
dest = []
for author in authors_list:
dest.extend(split_sep_author(author, sepword))
authors_list = dest
# Removing stuff before "after"
dest = []
for author in authors_list:
for afterword in after:
match = afterword.match(author)
if match:
author = match.group(1)
break
dest.append(author)
authors_list = dest
# Ignoring ignored authors
dest = []
for author in authors_list:
ignored = False
for ignoreword in ignore:
if author.find(str(ignoreword)) != -1:
ignored = True
break
if not ignored:
dest.append(author)
authors_list = dest
# Cleaning: removing empty authors and unnecessary spaces
authors_list = [author.lstrip() for author in authors_list if author.lstrip()]
# Moving first names after last names
dest = []
for author in authors_list:
first, last = split_author_names(author)
if first:
dest.append("%(last)s, %(first)s" % {
'first': first.lstrip(),
'last': last.lstrip(),
})
else:
dest.append(last.lstrip())
authors_list = dest
return authors_list

8
utils/plastex.py

@ -7,6 +7,14 @@ import copy
import os
import sys
def simpleparse(text):
"""Parse a simple LaTeX string.
"""
tex = TeX()
tex.input(text)
doc = tex.parse()
return doc.textContent
class SongParser:
"""Analyseur syntaxique de fichiers .sg"""

Loading…
Cancel
Save