Browse Source

Les auteurs sont désormais triés par nom de famille

remotes/origin/next
Louis 12 years ago
parent
commit
0b962cbf46
  1. 68
      index.py
  2. 52
      songbook.py
  3. 2
      templates/minimal.tmpl
  4. 2
      templates/patacrep.tmpl
  5. 129
      tools.py
  6. 8
      utils/plastex.py

68
index.py

@ -9,13 +9,15 @@
# src is the .sxd file generated by latex # src is the .sxd file generated by latex
# #
from plasTeX.TeX import TeX
from unidecode import unidecode from unidecode import unidecode
import sys import sys
import re import re
import locale import locale
import warnings import warnings
from tools import processauthors
from utils.plastex import simpleparse
# Pattern set to ignore latex command in title prefix # Pattern set to ignore latex command in title prefix
keywordPattern = re.compile(r"^%(\w+)\s?(.*)$") keywordPattern = re.compile(r"^%(\w+)\s?(.*)$")
firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)") firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")
@ -26,9 +28,7 @@ def sortkey(value):
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
the sort with latex escape sequences. the sort with latex escape sequences.
''' '''
tex = TeX() return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')))
tex.input(value)
return locale.strxfrm(unidecode(tex.parse().textContent.replace(' ', 'A')))
def processSXDEntry(tab): def processSXDEntry(tab):
return (tab[0], tab[1], tab[2]) return (tab[0], tab[1], tab[2])
@ -40,9 +40,8 @@ def processSXD(filename):
data.append(line.strip()) data.append(line.strip())
file.close() file.close()
type = data[0]
i = 1 i = 1
idx = index() idx = index(data[0])
if len(data) > 1: if len(data) > 1:
while data[i].startswith('%'): while data[i].startswith('%'):
@ -54,12 +53,21 @@ def processSXD(filename):
for i in range(i,len(data),3): for i in range(i,len(data),3):
entry = processSXDEntry(data[i:i+3]) entry = processSXDEntry(data[i:i+3])
idx.add(entry[0],entry[1],entry[2]) idx.add(entry[0],entry[1],entry[2])
return idx return idx
class index: class index:
def __init__(self): def __init__(self, indextype):
self.data = dict() self.data = dict()
self.keywords = dict() self.keywords = dict()
if indextype == "TITLE INDEX DATA FILE":
self.indextype = "TITLE"
elif indextype == "SCRIPTURE INDEX DATA FILE":
self.indextype = "SCRIPTURE"
elif indextype == "AUTHOR INDEX DATA FILE":
self.indextype = "AUTHOR"
else:
self.indextype = ""
def filter(self, key): def filter(self, key):
letter = firstLetterPattern.match(key).group(1) letter = firstLetterPattern.match(key).group(1)
@ -74,16 +82,27 @@ class index:
def compileKeywords(self): def compileKeywords(self):
self.prefix_patterns = [] self.prefix_patterns = []
if self.indextype == "TITLE":
if 'prefix' in self.keywords: if 'prefix' in self.keywords:
for prefix in self.keywords['prefix']: for prefix in self.keywords['prefix']:
self.prefix_patterns.append(re.compile(r"^(%s)(\b|\\)(\s*.*)$" % prefix)) self.prefix_patterns.append(re.compile(r"^(%s)(\b|\\)(\s*.*)$" % prefix))
def add(self, key, number, link): self.authwords = {"after": [], "ignore": [], "sep": []}
for pattern in self.prefix_patterns: if self.indextype == "AUTHOR":
match = pattern.match(key) for key in self.keywords:
if match: if key in self.authwords:
key = "%s (%s)" % (match.group(2) + match.group(3), match.group(1)) self.authwords[key] = self.keywords[key]
break # Only one match per key for word in self.authwords.keys():
if word in self.keywords:
if word == "after":
self.authwords[word] = [re.compile(r"^.*%s\b(.*)" % after) for after in self.keywords[word]]
elif word == "sep":
self.authwords[word] = [" %s" % sep for sep in self.authwords[word]] + [","]
self.authwords[word] = [re.compile(r"^(.*)%s (.*)$" % sep) for sep in self.authwords[word] ]
else:
self.authwords[word] = self.keywords[word]
def _raw_add(self, key, number, link):
(first, key) = self.filter(key) (first, key) = self.filter(key)
if not self.data.has_key(first): if not self.data.has_key(first):
self.data[first] = dict() self.data[first] = dict()
@ -91,6 +110,25 @@ class index:
self.data[first][key] = [] self.data[first][key] = []
self.data[first][key].append({'num':number, 'link':link}) self.data[first][key].append({'num':number, 'link':link})
def add(self, key, number, link):
if self.indextype == "TITLE":
# Removing prefixes before titles
for pattern in self.prefix_patterns:
match = pattern.match(key)
if match:
self._raw_add(
"%s (%s)" % (match.group(2) + match.group(3), match.group(1)),
number, link)
return
self._raw_add(key, number, link)
if self.indextype == "AUTHOR":
# Processing authors
for author in processauthors(
key,
**self.authwords):
self._raw_add(author, number, link)
def refToStr(self, ref): def refToStr(self, ref):
if sys.version_info >= (2,6): if sys.version_info >= (2,6):
return '\\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) return '\\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
@ -99,9 +137,9 @@ class index:
def entryToStr(self, key, entry): def entryToStr(self, key, entry):
if sys.version_info >= (2,6): if sys.version_info >= (2,6):
return '\\idxentry{{{0}}}{{{1}}}\n'.format(key, '\\\\'.join(map(self.refToStr, entry))) return unicode('\\idxentry{{{0}}}{{{1}}}\n').format(key, '\\\\'.join(map(self.refToStr, entry)))
else: else:
return '\\idxentry{%s}{%s}\n' % (key, '\\\\'.join(map(self.refToStr, entry))) return unicode('\\idxentry{%s}{%s}\n') % (key, '\\\\'.join(map(self.refToStr, entry)))
def idxBlockToStr(self, letter, entries): def idxBlockToStr(self, letter, entries):
str = '\\begin{idxblock}{'+letter+'}'+'\n' str = '\\begin{idxblock}{'+letter+'}'+'\n'

52
songbook.py

@ -11,7 +11,7 @@ import json
import glob import glob
import re import re
from subprocess import call from subprocess import call
from tools import recursiveFind from tools import recursiveFind, processauthors
from index import * from index import *
from unidecode import unidecode from unidecode import unidecode
from utils.plastex import parsetex from utils.plastex import parsetex
@ -19,8 +19,10 @@ from utils.plastex import parsetex
class Song: class Song:
#: Ordre de tri #: Ordre de tri
sort = [] sort = []
#: Préfixes à ignorer pour le tri #: Préfixes à ignorer pour le tri par titres
prefixes = [] prefixes = []
#: Dictionnaire des options pour le traitement des auteurs
authwords = {"after": [], "ignore": [], "sep": []}
def __init__(self, path, languages, titles, args): def __init__(self, path, languages, titles, args):
self.titles = titles self.titles = titles
@ -28,6 +30,14 @@ class Song:
self.args = args self.args = args
self.path = path self.path = path
self.languages = languages self.languages = languages
if "by" in self.args.keys():
self.normalized_authors = [
locale.strxfrm(author)
for author
in processauthors(self.args["by"], **self.authwords)
]
else:
self.normalized_authors = []
def __repr__(self): def __repr__(self):
return repr((self.titles, self.args, self.path)) return repr((self.titles, self.args, self.path))
@ -40,8 +50,11 @@ class Song:
self_key = self.normalized_titles self_key = self.normalized_titles
other_key = other.normalized_titles other_key = other.normalized_titles
elif key == "@path": elif key == "@path":
self.key = locale.strxfrm(self.path) self_key = locale.strxfrm(self.path)
other_key = locale.strxfrm(other.path) other_key = locale.strxfrm(other.path)
elif key == "by":
self_key = self.normalized_authors
other_key = other.normalized_authors
else: else:
self_key = locale.strxfrm(self.args.get(key, "")) self_key = locale.strxfrm(self.args.get(key, ""))
other_key = locale.strxfrm(other.args.get(key, "")) other_key = locale.strxfrm(other.args.get(key, ""))
@ -165,9 +178,13 @@ def makeTexFile(sb, library, output):
# default value # default value
template = "patacrep.tmpl" template = "patacrep.tmpl"
songs = [] songs = []
titleprefixwords = ""
prefixes_tex = ""
prefixes = [] prefixes = []
authwords_tex = ""
authwords = {"after": ["by"], "ignore": ["unknown"], "sep": ["and"]}
# parse the songbook data # parse the songbook data
if "template" in sb: if "template" in sb:
template = sb["template"] template = sb["template"]
@ -178,8 +195,28 @@ def makeTexFile(sb, library, output):
if "titleprefixwords" in sb: if "titleprefixwords" in sb:
prefixes = sb["titleprefixwords"] prefixes = sb["titleprefixwords"]
for prefix in sb["titleprefixwords"]: for prefix in sb["titleprefixwords"]:
titleprefixwords += "\\titleprefixword{%s}\n" % prefix prefixes_tex += "\\titleprefixword{%s}\n" % prefix
sb["titleprefixwords"] = titleprefixwords sb["titleprefixwords"] = prefixes_tex
if "authwords" in sb:
# Populating default value
for key in ["after", "sep", "ignore"]:
if key not in sb["authwords"]:
sb["authwords"][key] = authwords[key]
# Processing authwords values
authwords = sb["authwords"]
for key in ["after", "sep", "ignore"]:
for word in authwords[key]:
if key == "after":
authwords_tex += "\\auth%sword{%s}\n" % ("by", word)
else:
authwords_tex += "\\auth%sword{%s}\n" % (key, word)
sb["authwords"] = authwords_tex
if "after" in authwords:
authwords["after"] = [re.compile(r"^.*%s\b(.*)" % after) for after in authwords["after"]]
if "sep" in authwords:
authwords["sep"] = [" %s" % sep for sep in authwords["sep"]] + [","]
authwords["sep"] = [re.compile(r"^(.*)%s (.*)$" % sep) for sep in authwords["sep"] ]
if "lang" not in sb: if "lang" not in sb:
sb["lang"] = "french" sb["lang"] = "french"
if "sort" in sb: if "sort" in sb:
@ -189,6 +226,7 @@ def makeTexFile(sb, library, output):
sort = [u"by", u"album", u"@title"] sort = [u"by", u"album", u"@title"]
Song.sort = sort Song.sort = sort
Song.prefixes = prefixes Song.prefixes = prefixes
Song.authwords = authwords
parameters = parseTemplate("templates/"+template) parameters = parseTemplate("templates/"+template)
@ -327,7 +365,7 @@ def main():
print "processing " + sxdFile print "processing " + sxdFile
idx = processSXD(sxdFile) idx = processSXD(sxdFile)
indexFile = open(sxdFile[:-3]+"sbx", "w") indexFile = open(sxdFile[:-3]+"sbx", "w")
indexFile.write(idx.entriesToStr()) indexFile.write(idx.entriesToStr().encode('utf8'))
indexFile.close() indexFile.close()
# Second pdflatex pass # Second pdflatex pass

2
templates/minimal.tmpl

@ -31,6 +31,7 @@
%%: {"name":"bookoptions", "description":"Options", "type":"flag", "values":["diagram","importantdiagramonly","lilypond","pictures","tabs","repeatchords","onesongperpage"], "join":",", "mandatory":true, "default":["pictures"]}, %%: {"name":"bookoptions", "description":"Options", "type":"flag", "values":["diagram","importantdiagramonly","lilypond","pictures","tabs","repeatchords","onesongperpage"], "join":",", "mandatory":true, "default":["pictures"]},
%%: {"name":"mainfontsize", "description":"Font Size", "type":"font", "default":"10"}, %%: {"name":"mainfontsize", "description":"Font Size", "type":"font", "default":"10"},
%%: {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"}, %%: {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"},
%%: {"name":"authwords", "descriptipn":"Set of options to process author string (LaTeX commands authsepword, authignoreword, authbyword)"},
%%: {"name":"languages", "description":"List of languages used by songs", "default":""} %%: {"name":"languages", "description":"List of languages used by songs", "default":""}
%%:] %%:]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -54,6 +55,7 @@
} }
\gettitleprefixwords \gettitleprefixwords
\getauthwords
\nosongnumbers \nosongnumbers
\pagestyle{empty} \pagestyle{empty}

2
templates/patacrep.tmpl

@ -44,6 +44,7 @@
%%: {"name":"notebgcolor", "description":"Note Shade", "type":"color", "default":"#D1E4AE"}, %%: {"name":"notebgcolor", "description":"Note Shade", "type":"color", "default":"#D1E4AE"},
%%: {"name":"indexbgcolor", "description":"Index Shade", "type":"color", "default":"#D1E4AE"}, %%: {"name":"indexbgcolor", "description":"Index Shade", "type":"color", "default":"#D1E4AE"},
%%: {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"}, %%: {"name":"titleprefixwords", "description":"Ignore some words in the beginning of song titles"},
%%: {"name":"authwords", "descriptipn":"Set of options to process author string (LaTeX commands authsepword, authignoreword, authbyword)"},
%%: {"name":"languages", "description":"List of languages used by songs", "default":""} %%: {"name":"languages", "description":"List of languages used by songs", "default":""}
%%:] %%:]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -90,6 +91,7 @@
\renewcommand{\idxbgcolor}{IndexBgColor} \renewcommand{\idxbgcolor}{IndexBgColor}
\gettitleprefixwords \gettitleprefixwords
\getauthwords
\pagestyle{empty} \pagestyle{empty}

129
tools.py

@ -11,3 +11,132 @@ def recursiveFind(root_directory, pattern):
for filename in fnmatch.filter(filenames, pattern): for filename in fnmatch.filter(filenames, pattern):
matches.append(os.path.join(root, filename)) matches.append(os.path.join(root, filename))
return matches return matches
def split_author_names(string):
"""Split author between first and last name.
The last space separates first and last name, but spaces following a
backslash or a command are not separators.
Examples:
- Edgar Allan Poe => Poe, Edgar Allan
- Edgar Allan \emph {Poe} => \emph {Poe}, Edgar Allan
- The Rolling\ Stones => Rolling\ Stones, The
- The {Rolling Stones} => {Rolling Stones}, The
"""
ignore_space = False
last_space = index = 0
brace_count = 0
for char in string:
index += 1
if brace_count == 0:
if char == "\\":
ignore_space = True
elif not char.isalnum() and ignore_space:
ignore_space = False
elif char == " ":
last_space = index
if char == "}":
brace_count += 1
if char == "{":
brace_count -= 1
return string[:last_space], string[last_space:]
def split_sep_author(string, sep):
authors = []
match = sep.match(string)
while match:
authors.append(match.group(2))
string = match.group(1)
match = sep.match(string)
authors.append(string)
return authors
def processauthors(authors_string, after = [], ignore = [], sep = []):
"""Return a list of authors
For example, we are processing:
# processauthors(
# "Lyrics by William Blake (from Milton, 1808), music by Hubert Parry (1916), and sung by The Royal\ Choir~of~Nowhere (just here to show you how processing is done)",
# after = ["by"],
# ignore = ["anonymous"],
# sep = ["and"]
# )
The "authors_string" string is processed as:
1) First, parenthesis (and its content) are removed.
# "Lyrics by William Blake, music by Hubert Parry, and sung by The Royal\ Choir~of~Nowhere"
2) String is split, separators being comma and words from "sep".
# ["Lyrics by William Blake", "music by Hubert Parry", "sung by The Royal\ Choir~of~Nowhere"]
3) Everything before words in "after" is removed.
# ["William Blake", "Hubert Parry", "The Royal\ Choir~of~Nowhere"]
4) Strings containing words of "ignore" are dropped.
# ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
5) First names are moved after last names
# ["Blake, William", "Parry, Hubert", Royal\ Choir~of~Nowhere, The"]
"""
# Removing parentheses
opening = 0
dest = ""
for char in authors_string:
if char == '(':
opening += 1
elif char == ')' and opening > 0:
opening -= 1
elif opening == 0:
dest += char
authors_string = dest
# Splitting strings
authors_list = [authors_string]
for sepword in sep:
dest = []
for author in authors_list:
dest.extend(split_sep_author(author, sepword))
authors_list = dest
# Removing stuff before "after"
dest = []
for author in authors_list:
for afterword in after:
match = afterword.match(author)
if match:
author = match.group(1)
break
dest.append(author)
authors_list = dest
# Ignoring ignored authors
dest = []
for author in authors_list:
ignored = False
for ignoreword in ignore:
if author.find(str(ignoreword)) != -1:
ignored = True
break
if not ignored:
dest.append(author)
authors_list = dest
# Cleaning: removing empty authors and unnecessary spaces
authors_list = [author.lstrip() for author in authors_list if author.lstrip()]
# Moving first names after last names
dest = []
for author in authors_list:
first, last = split_author_names(author)
if first:
dest.append("%(last)s, %(first)s" % {
'first': first.lstrip(),
'last': last.lstrip(),
})
else:
dest.append(last.lstrip())
authors_list = dest
return authors_list

8
utils/plastex.py

@ -7,6 +7,14 @@ import copy
import os import os
import sys import sys
def simpleparse(text):
"""Parse a simple LaTeX string.
"""
tex = TeX()
tex.input(text)
doc = tex.parse()
return doc.textContent
class SongParser: class SongParser:
"""Analyseur syntaxique de fichiers .sg""" """Analyseur syntaxique de fichiers .sg"""

Loading…
Cancel
Save