#!/usr/bin/python # -*- coding: utf-8 -*- """Manage indexes. Generate indexes files for the songbook compilation. This is a replacement for the original makeindex program written in C that produces an index file (.sbx) from a file generated by the latex compilation of the songbook (.sxd). """ from unidecode import unidecode import locale import re import codecs from songbook_core import authors from songbook_core.plastex import simpleparse EOL = u"\n" # Pattern set to ignore latex command in title prefix KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$") FIRST_LETTER_PATTERN = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)") def sortkey(value): """From a title, return something usable for sorting. It handles locale (but don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles the sort with latex escape sequences. """ return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')).lower()) def process_sxd(filename): """Parse sxd file. Return an Index object. """ data = [] with codecs.open(filename, 'r', 'utf-8') as index_file: for line in index_file: data.append(line.strip()) i = 1 idx = Index(data[0]) while len(data) > i and data[i].startswith('%'): keywords = KEYWORD_PATTERN.match(data[i]).groups() idx.add_keyword(keywords[0], keywords[1]) i += 1 idx.compile_keywords() for i in range(i, len(data), 3): entry = data[i:i + 3] idx.add(entry[0], entry[1], entry[2]) return idx class Index(object): """Title, author or scripture Index representation.""" def __init__(self, indextype): self.data = dict() self.keywords = dict() self.prefix_patterns = [] if indextype == "TITLE INDEX DATA FILE": self.indextype = "TITLE" elif indextype == "SCRIPTURE INDEX DATA FILE": self.indextype = "SCRIPTURE" elif indextype == "AUTHOR INDEX DATA FILE": self.indextype = "AUTHOR" else: self.indextype = "" @staticmethod def get_first_letter(key): """Return the uppercase first letter of key.""" letter = FIRST_LETTER_PATTERN.match(key).group(1) if re.match(r'\d', letter): letter = '0-9' return letter.upper() def add_keyword(self, key, word): """Add 'word' to self.keywords[key].""" if not key in self.keywords.keys(): self.keywords[key] = [] self.keywords[key].append(word) def compile_keywords(self): """Turn keywords (self.keywords) into regular expressions.""" if self.indextype == "TITLE": if 'prefix' in self.keywords: for prefix in self.keywords['prefix']: self.prefix_patterns.append(re.compile( r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix) )) if self.indextype == "AUTHOR": self.authwords = authors.compile_authwords(self.keywords) def _raw_add(self, key, number, link): """Add a song to the list. No processing is done on data. It is added raw. See add() for a similar method with processing. """ first = self.get_first_letter(key) if not first in self.data.keys(): self.data[first] = dict() if not key in self.data[first].keys(): self.data[first][key] = [] self.data[first][key].append({'num': number, 'link': link}) def add(self, key, number, link): """Add a song to the list. Process data before adding it. """ if self.indextype == "TITLE": # Removing prefixes before titles for pattern in self.prefix_patterns: match = pattern.match(key.encode('utf-8')) if match: self._raw_add( "{} ({})".format( match.group(2) + match.group(3), match.group(1) ), number, link ) return self._raw_add(key, number, link) if self.indextype == "AUTHOR": # Processing authors for author in authors.processauthors( key, **self.authwords): self._raw_add(author, number, link) @staticmethod def ref_to_str(ref): """Return the LaTeX code corresponding to the reference.""" return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) def entry_to_str(self, key, entry): """Return the LaTeX code corresponding to the entry.""" if not isinstance(key, unicode): key = unicode(key, "UTF-8") return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format( key, ur'\\'.join([self.ref_to_str(ref) for ref in entry]), ) def idxblock_to_str(self, letter, entries): """Return the LaTeX code corresponding to an index block. Here, an index block is a letter, and all data beginning with this letter. """ string = r'\begin{idxblock}{' + letter + '}' + EOL for key in sorted(entries.keys(), key=sortkey): string += self.entry_to_str(key, entries[key]) string += r'\end{idxblock}' + EOL return string def entries_to_str(self): """Return the LaTeX code corresponding to the index.""" string = "" for letter in sorted(self.data.keys()): string += self.idxblock_to_str(letter, self.data[letter]) return string