Engine for LaTeX songbooks http://www.patacrep.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
6.7 KiB

12 years ago
#!/usr/bin/python
# -*- coding: utf-8 -*-
11 years ago
"""Manage indexes.
Generate indexes files for the songbook compilation. This is a replacement for
the original makeindex program written in C that produces an index file (.sbx)
from a file generated by the latex compilation of the songbook (.sxd).
"""
12 years ago
from unidecode import unidecode
12 years ago
import locale
import re
import codecs
12 years ago
from songbook_core.authors import processauthors
from songbook_core.plastex import simpleparse
EOL = u"\n"
12 years ago
# Pattern set to ignore latex command in title prefix
KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$")
FIRST_LETTER_PATTERN = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")
12 years ago
12 years ago
def sortkey(value):
"""From a title, return something usable for sorting.
It handles locale (but
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
the sort with latex escape sequences.
"""
return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')))
12 years ago
def process_sxd(filename):
11 years ago
"""Parse sxd file.
Return an Index object.
11 years ago
"""
12 years ago
data = []
with codecs.open(filename, 'r', 'utf-8') as index_file:
for line in index_file:
data.append(line.strip())
12 years ago
i = 1
idx = Index(data[0])
12 years ago
while len(data) > i and data[i].startswith('%'):
keywords = KEYWORD_PATTERN.match(data[i]).groups()
idx.add_keyword(keywords[0], keywords[1])
i += 1
12 years ago
idx.compile_keywords()
for i in range(i, len(data), 3):
11 years ago
entry = data[i:i + 3]
idx.add(entry[0], entry[1], entry[2])
12 years ago
return idx
class Index(object):
"""Title, author or scripture Index representation."""
11 years ago
def __init__(self, indextype):
self.data = dict()
self.keywords = dict()
self.prefix_patterns = []
self.authwords = {"after": [], "ignore": [], "sep": []}
if indextype == "TITLE INDEX DATA FILE":
self.indextype = "TITLE"
elif indextype == "SCRIPTURE INDEX DATA FILE":
self.indextype = "SCRIPTURE"
elif indextype == "AUTHOR INDEX DATA FILE":
self.indextype = "AUTHOR"
else:
self.indextype = ""
12 years ago
@staticmethod
def get_first_letter(key):
"""Return the uppercase first letter of key."""
letter = FIRST_LETTER_PATTERN.match(key).group(1)
if re.match(r'\d', letter):
12 years ago
letter = '0-9'
return letter.upper()
12 years ago
def add_keyword(self, key, word):
"""Add 'word' to self.keywords[key]."""
if not key in self.keywords.keys():
12 years ago
self.keywords[key] = []
self.keywords[key].append(word)
def compile_keywords(self):
"""Turn keywords (self.keywords) into regular expressions."""
if self.indextype == "TITLE":
if 'prefix' in self.keywords:
for prefix in self.keywords['prefix']:
self.prefix_patterns.append(re.compile(
r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix)
))
if self.indextype == "AUTHOR":
for key in self.keywords:
if key in self.authwords:
self.authwords[key] = self.keywords[key]
for word in self.authwords.keys():
if word in self.keywords:
if word == "after":
self.authwords[word] = [
re.compile(r"^.*{after}\b(.*)".format(after=after))
for after in self.keywords[word]
]
elif word == "sep":
self.authwords[word] = [" {sep}".format(sep=sep)
for sep in self.authwords[word]
] + [","]
self.authwords[word] = [
re.compile(r"^(.*){sep} (.*)$".format(sep=sep))
for sep in self.authwords[word]
]
else:
self.authwords[word] = self.keywords[word]
def _raw_add(self, key, number, link):
"""Add a song to the list.
No processing is done on data. It is added raw. See add() for a
similar method with processing.
"""
first = self.get_first_letter(key)
if not first in self.data.keys():
12 years ago
self.data[first] = dict()
if not key in self.data[first].keys():
12 years ago
self.data[first][key] = []
self.data[first][key].append({'num': number, 'link': link})
12 years ago
def add(self, key, number, link):
"""Add a song to the list.
Process data before adding it.
"""
if self.indextype == "TITLE":
# Removing prefixes before titles
for pattern in self.prefix_patterns:
match = pattern.match(key.encode('utf-8'))
if match:
self._raw_add(
"{} ({})".format(
match.group(2) + match.group(3),
match.group(1)
),
number,
link
)
return
self._raw_add(key, number, link)
if self.indextype == "AUTHOR":
# Processing authors
for author in processauthors(
key,
**self.authwords):
self._raw_add(author, number, link)
@staticmethod
def ref_to_str(ref):
"""Return the LaTeX code corresponding to the reference."""
return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
12 years ago
def entry_to_str(self, key, entry):
"""Return the LaTeX code corresponding to the entry."""
if not isinstance(key, unicode):
key = unicode(key, "UTF-8")
return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format(
11 years ago
key,
ur'\\'.join([self.ref_to_str(ref) for ref in entry]),
11 years ago
)
12 years ago
def idxblock_to_str(self, letter, entries):
"""Return the LaTeX code corresponding to an index block.
Here, an index block is a letter, and all data beginning with this
letter.
"""
string = r'\begin{idxblock}{' + letter + '}' + EOL
12 years ago
for key in sorted(entries.keys(), key=sortkey):
string += self.entry_to_str(key, entries[key])
string += r'\end{idxblock}' + EOL
return string
12 years ago
def entries_to_str(self):
"""Return the LaTeX code corresponding to the index."""
string = ""
12 years ago
for letter in sorted(self.data.keys()):
string += self.idxblock_to_str(letter, self.data[letter])
return string