Engine for LaTeX songbooks http://www.patacrep.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

198 lines
6.4 KiB

11 years ago
"""Manage indexes.
Generate indexes files for the songbook compilation. This is a replacement for
the original makeindex program written in C that produces an index file (.sbx)
from a file generated by the latex compilation of the songbook (.sxd).
"""
12 years ago
import locale
import unidecode
import re
12 years ago
11 years ago
from patacrep import authors
from patacrep import encoding
from patacrep.latex import tex2plain
EOL = "\n"
12 years ago
# Pattern set to ignore latex command in title prefix
KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$", re.LOCALE)
FIRST_LETTER_PATTERN = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE)
12 years ago
def process_sxd(filename):
11 years ago
"""Parse sxd file.
Return an Index object.
11 years ago
"""
12 years ago
data = []
index_file = None
with encoding.open_read(filename) as index_file:
for line in index_file:
data.append(line.strip())
12 years ago
i = 1
idx = Index(data[0])
12 years ago
while len(data) > i and data[i].startswith('%'):
keywords = KEYWORD_PATTERN.match(data[i]).groups()
idx.add_keyword(keywords[0], keywords[1])
i += 1
12 years ago
idx.compile_keywords()
for i in range(i, len(data), 3):
11 years ago
entry = data[i:i + 3]
idx.add(entry[0], entry[1], entry[2])
12 years ago
return idx
class Index(object):
"""Title, author or scripture Index representation."""
11 years ago
def __init__(self, indextype):
self.data = dict()
self.keywords = dict()
self.authwords = dict()
self.prefix_patterns = []
if indextype == "TITLE INDEX DATA FILE":
self.indextype = "TITLE"
elif indextype == "SCRIPTURE INDEX DATA FILE":
self.indextype = "SCRIPTURE"
elif indextype == "AUTHOR INDEX DATA FILE":
self.indextype = "AUTHOR"
else:
self.indextype = ""
12 years ago
@staticmethod
def get_first_letter(key):
"""Return the uppercase first letter of key."""
try:
letter = FIRST_LETTER_PATTERN.match(key).group(1)
except AttributeError:
11 years ago
# classify as number all the non letter characters
letter = "0"
if re.match(r'\d', letter):
12 years ago
letter = '0-9'
return letter.upper()
12 years ago
def add_keyword(self, key, word):
"""Add 'word' to self.keywords[key]."""
if not key in self.keywords:
12 years ago
self.keywords[key] = []
self.keywords[key].append(word)
def compile_keywords(self):
"""Turn keywords (self.keywords) into regular expressions."""
if self.indextype == "TITLE":
if 'prefix' in self.keywords:
for prefix in self.keywords['prefix']:
self.prefix_patterns.append(re.compile(
r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix),
re.LOCALE
))
if self.indextype == "AUTHOR":
self.authwords = authors.compile_authwords(self.keywords)
def _raw_add(self, key, number, link):
"""Add a song to the list.
No processing is done on data. It is added raw. See add() for a
similar method with processing.
"""
first = self.get_first_letter(key[0])
if not first in self.data:
12 years ago
self.data[first] = dict()
if not key in self.data[first]:
self.data[first][key] = {
'sortingkey': [
unidecode.unidecode(tex2plain(item)).lower()
for item in key
],
'entries': [],
}
self.data[first][key]['entries'].append({'num': number, 'link': link})
12 years ago
def add(self, key, number, link):
"""Add a song to the list.
Process data before adding it.
"""
if self.indextype == "TITLE":
# Removing prefixes before titles
for pattern in self.prefix_patterns:
match = pattern.match(key)
if match:
self._raw_add(
(
(match.group(2) + match.group(3)).strip(),
match.group(1).strip(),
),
number,
link
)
return
self._raw_add((key, ""), number, link)
if self.indextype == "AUTHOR":
# Processing authors
for author in authors.processauthors(
key,
**self.authwords):
self._raw_add(author, number, link)
@staticmethod
def ref_to_str(ref):
"""Return the LaTeX code corresponding to the reference."""
return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
12 years ago
def key_to_str(self, key):
"""Convert the key (title or author) to the LaTeX command rendering it.
"""
if self.indextype == "AUTHOR":
return r"\indexauthor{{{first}}}{{{last}}}".format(
first=key[1],
last=key[0],
)
if self.indextype == "TITLE":
return r"\indextitle{{{0[1]}}}{{{0[0]}}}".format(key)
def entry_to_str(self, key, entry):
"""Return the LaTeX code corresponding to the entry."""
return r"""\idxentry{{
{0}
}}{{
{1}
}}""".format(
self.key_to_str(key),
r'\\'.join([self.ref_to_str(ref) for ref in entry]),
11 years ago
)
12 years ago
def idxblock_to_str(self, letter, entries):
"""Return the LaTeX code corresponding to an index block.
Here, an index block is a letter, and all data beginning with this
letter.
"""
def sortkey(key):
"""Return something sortable for `entries[key]`."""
return [
locale.strxfrm(item)
for item
in entries[key]['sortingkey']
]
string = r'\begin{idxblock}{' + letter + '}' + EOL
for key in sorted(entries, key=sortkey):
string += " " + self.entry_to_str(key, entries[key]['entries'])
string += EOL + r'\end{idxblock}'
return string
12 years ago
def entries_to_str(self):
"""Return the LaTeX code corresponding to the index."""
string = ""
12 years ago
for letter in sorted(self.data.keys()):
string += self.idxblock_to_str(letter, self.data[letter]) + EOL
return string