Engine for LaTeX songbooks http://www.patacrep.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

173 lines
5.9 KiB

12 years ago
#!/usr/bin/python
# -*- coding: utf-8 -*-
11 years ago
"""Manage indexes.
Generate indexes files for the songbook compilation. This is a replacement for
the original makeindex program written in C that produces an index file (.sbx)
from a file generated by the latex compilation of the songbook (.sxd).
"""
12 years ago
from unidecode import unidecode
12 years ago
import locale
import re
import sys
12 years ago
from songbook.authors import processauthors
from songbook.plastex import simpleparse
EOL = "\n"
12 years ago
# Pattern set to ignore latex command in title prefix
keywordPattern = re.compile(r"^%(\w+)\s?(.*)$")
firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")
12 years ago
def sortkey(value):
'''
From a title, return something usable for sorting. It handles locale (but
don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
the sort with latex escape sequences.
12 years ago
'''
return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')))
12 years ago
12 years ago
def processSXD(filename):
11 years ago
"""Parse sxd file.
Return an Index object.
11 years ago
"""
index_file = open(filename)
12 years ago
data = []
for line in index_file:
12 years ago
data.append(line.strip())
index_file.close()
12 years ago
i = 1
idx = Index(data[0])
12 years ago
while len(data) > i and data[i].startswith('%'):
keywords = keywordPattern.match(data[i]).groups()
idx.keyword(keywords[0], keywords[1])
i += 1
12 years ago
idx.compileKeywords()
for i in range(i, len(data), 3):
11 years ago
entry = data[i:i + 3]
idx.add(entry[0], entry[1], entry[2])
12 years ago
return idx
class Index:
"""Title, author or scripture Index representation."""
11 years ago
def __init__(self, indextype):
self.data = dict()
self.keywords = dict()
self.prefix_patterns = []
self.authwords = {"after": [], "ignore": [], "sep": []}
if indextype == "TITLE INDEX DATA FILE":
self.indextype = "TITLE"
elif indextype == "SCRIPTURE INDEX DATA FILE":
self.indextype = "SCRIPTURE"
elif indextype == "AUTHOR INDEX DATA FILE":
self.indextype = "AUTHOR"
else:
self.indextype = ""
12 years ago
def filter(self, key):
letter = firstLetterPattern.match(key).group(1)
if re.match('\d', letter):
12 years ago
letter = '0-9'
return (letter.upper(), key)
def keyword(self, key, word):
if not key in self.keywords.keys():
12 years ago
self.keywords[key] = []
self.keywords[key].append(word)
def compileKeywords(self):
if self.indextype == "TITLE":
if 'prefix' in self.keywords:
for prefix in self.keywords['prefix']:
self.prefix_patterns.append(re.compile(
r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix)
))
if self.indextype == "AUTHOR":
for key in self.keywords:
if key in self.authwords:
self.authwords[key] = self.keywords[key]
for word in self.authwords.keys():
if word in self.keywords:
if word == "after":
self.authwords[word] = [
re.compile(r"^.*{after}\b(.*)".format(after=after))
for after in self.keywords[word]
]
elif word == "sep":
self.authwords[word] = [" {sep}".format(sep=sep)
for sep in self.authwords[word]
] + [","]
self.authwords[word] = [
re.compile(r"^(.*){sep} (.*)$".format(sep=sep))
for sep in self.authwords[word]
]
else:
self.authwords[word] = self.keywords[word]
def _raw_add(self, key, number, link):
12 years ago
(first, key) = self.filter(key)
if not first in self.data.keys():
12 years ago
self.data[first] = dict()
if not key in self.data[first].keys():
12 years ago
self.data[first][key] = []
self.data[first][key].append({'num': number, 'link': link})
12 years ago
def add(self, key, number, link):
if self.indextype == "TITLE":
# Removing prefixes before titles
for pattern in self.prefix_patterns:
match = pattern.match(key)
if match:
self._raw_add(
"{} ({})".format(
match.group(2) + match.group(3),
match.group(1)),
number, link)
return
self._raw_add(key, number, link)
if self.indextype == "AUTHOR":
# Processing authors
for author in processauthors(
key,
**self.authwords):
self._raw_add(author, number, link)
12 years ago
def refToStr(self, ref):
if sys.version_info >= (2, 6):
return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)
12 years ago
else:
return r'\hyperlink{%(link)s}{%(num)s}' % ref
12 years ago
def entryToStr(self, key, entry):
if sys.version_info >= (2, 6):
return unicode(r'\idxentry{{{0}}}{{{1}}}' + EOL).format(key, r'\\'.join(map(self.refToStr, entry)))
12 years ago
else:
return unicode(r'\idxentry{%s}{%s}' + EOL) % (key, r'\\'.join(map(self.refToStr, entry)))
12 years ago
def idxBlockToStr(self, letter, entries):
string = r'\begin{idxblock}{' + letter + '}' + EOL
12 years ago
for key in sorted(entries.keys(), key=sortkey):
string += self.entryToStr(key, entries[key])
string += r'\end{idxblock}' + EOL
return string
12 years ago
def entriesToStr(self):
string = ""
12 years ago
for letter in sorted(self.data.keys()):
string += self.idxBlockToStr(letter, self.data[letter])
return string