patacrep/songbook/index.py


								#!/usr/bin/python

								# -*- coding: utf-8 -*-


								"""Manage indexes.


								Generate indexes files for the songbook compilation. This is a replacement for

								the original makeindex program written in C that produces an index file (.sbx)

								from a file generated by the latex compilation of the songbook (.sxd).

								"""


								from unidecode import unidecode

								import locale

								import re

								import sys

								#import warnings


								from songbook.authors import processauthors

								from songbook.plastex import simpleparse


								# Pattern set to ignore latex command in title prefix

								keywordPattern = re.compile(r"^%(\w+)\s?(.*)$")

								firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")


								def sortkey(value):

								    '''

								    From a title, return something usable for sorting. It handles locale (but

								    don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles

								    the sort with  latex escape sequences.

								    '''

								    return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')))


								def processSXD(filename):

								    """Parse sxd file.


								    Return an index object.

								    """

								    index_file = open(filename)

								    data = []

								    for line in index_file:

								        data.append(line.strip())

								    index_file.close()


								    i = 1

								    idx = index(data[0])


								    while len(data) > i and data[i].startswith('%'):

								        keywords = keywordPattern.match(data[i]).groups()

								        idx.keyword(keywords[0], keywords[1])

								        i += 1


								    idx.compileKeywords()

								    for i in range(i, len(data), 3):

								        entry = data[i:i + 3]

								        idx.add(entry[0], entry[1], entry[2])


								    return idx


								class index:

								    """Title, author or scripture index representation."""


								    def __init__(self, indextype):

								        self.data = dict()

								        self.keywords = dict()

								        self.prefix_patterns = []

								        self.authwords = {"after": [], "ignore": [], "sep": []}

								        if indextype == "TITLE INDEX DATA FILE":

								            self.indextype = "TITLE"

								        elif indextype == "SCRIPTURE INDEX DATA FILE":

								            self.indextype = "SCRIPTURE"

								        elif indextype == "AUTHOR INDEX DATA FILE":

								            self.indextype = "AUTHOR"

								        else:

								            self.indextype = ""


								    def filter(self, key):

								        letter = firstLetterPattern.match(key).group(1)

								        if re.match('\d', letter):

								            letter = '0-9'

								        return (letter.upper(), key)


								    def keyword(self, key, word):

								        if not key in self.keywords.keys():

								            self.keywords[key] = []

								        self.keywords[key].append(word)


								    def compileKeywords(self):

								        if self.indextype == "TITLE":

								            if 'prefix' in self.keywords:

								                for prefix in self.keywords['prefix']:

								                    self.prefix_patterns.append(re.compile(r"^(%s)(\b|\\)(\s*.*)$" % prefix))


								        if self.indextype == "AUTHOR":

								            for key in self.keywords:

								                if key in self.authwords:

								                    self.authwords[key] = self.keywords[key]

								            for word in self.authwords.keys():

								                if word in self.keywords:

								                    if word == "after":

								                        self.authwords[word] = [re.compile(r"^.*%s\b(.*)" % after)

								                                                for after in self.keywords[word]]

								                    elif word == "sep":

								                        self.authwords[word] = [" %s" % sep for sep in self.authwords[word]] + [","]

								                        self.authwords[word] = [re.compile(r"^(.*)%s (.*)$" % sep)

								                                                for sep in self.authwords[word]]

								                    else:

								                        self.authwords[word] = self.keywords[word]


								    def _raw_add(self, key, number, link):

								        (first, key) = self.filter(key)

								        if not first in self.data.keys():

								            self.data[first] = dict()

								        if not key in self.data[first].keys():

								            self.data[first][key] = []

								        self.data[first][key].append({'num': number, 'link': link})


								    def add(self, key, number, link):

								        if self.indextype == "TITLE":

								            # Removing prefixes before titles

								            for pattern in self.prefix_patterns:

								                match = pattern.match(key)

								                if match:

								                    self._raw_add(

								                            "%s (%s)" % (match.group(2) + match.group(3),

								                                         match.group(1)), number, link)

								                    return

								            self._raw_add(key, number, link)


								        if self.indextype == "AUTHOR":

								            # Processing authors

								            for author in processauthors(

								                    key,

								                    **self.authwords):

								                self._raw_add(author, number, link)


								    def refToStr(self, ref):

								        if sys.version_info >= (2, 6):

								            return '\\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)

								        else:

								            return '\\hyperlink{%(link)s}{%(num)s}' % ref


								    def entryToStr(self, key, entry):

								        if sys.version_info >= (2, 6):

								            return unicode('\\idxentry{{{0}}}{{{1}}}\n').format(key, '\\\\'.join(map(self.refToStr, entry)))

								        else:

								            return unicode('\\idxentry{%s}{%s}\n') % (key, '\\\\'.join(map(self.refToStr, entry)))


								    def idxBlockToStr(self, letter, entries):

								        string = '\\begin{idxblock}{' + letter + '}' + '\n'

								        for key in sorted(entries.keys(), key=sortkey):

								            string += self.entryToStr(key, entries[key])

								        string += '\\end{idxblock}' + '\n'

								        return string


								    def entriesToStr(self):

								        string = ""

								        for letter in sorted(self.data.keys()):

								            string += self.idxBlockToStr(letter, self.data[letter])

								        return string