#!/usr/bin/python # -*- coding: utf-8 -*- # Generate indexes files for the Crep's chordbook compilation. This is # a replacement for the original makeindex program written in C that # produces an index file (.sbx) from a file generated by the latex # compilation of the songbook (.sxd). # # Usage : songbook-makeindex.py src # src is the .sxd file generated by latex # import sys import re import locale import warnings # Pattern set to ignore latex command in title prefix keywordPattern = re.compile(r"^%(\w+)\s?(.*)$") firstLetterPattern = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)") iecPattern = re.compile(r"\IeC {\\(.*?)}") replacePattern = { '`A': 'À', '`a': 'à', '^a': 'â', "'a": 'á', "~a": 'ã', 'oe': 'œ', "'e" : 'é', "`e" : 'è', "^e" : 'ê', '"e' : 'ë', "'E" : 'É', "`E" : 'È', "'o" : 'ó', "^o" : 'ô', r'"\i' : 'i', r'^\i' : 'i', '"u' : 'ü', '`u' : 'ù', '`u' : 'ù', '~n' : 'ñ', "c C" : 'Ç', "c c" : 'ç', "textquoteright" : "'", } def sortkey(value): ''' From a title, return something usable for sorting. It handles locale (but don't forget to call locale.setlocale(locale.LC_ALL, '')). It also try to handle the sort with crappy latex escape sequences. Some chars may not be handled by this function, so add them to *replacePattern* dictionnary. ''' def repl(match): try: return replacePattern[match.group(1).strip()] except KeyError: warnings.warn("Error, no match to replace %s in %s. You should add it in the coresponding table in title_sort.py" % (match.group(0), match.group(1))) return locale.strxfrm(iecPattern.sub(repl, value).replace(' ', 'A')) def processSXDEntry(tab): return (tab[0], tab[1], tab[2]) def processSXD(filename): file = open(filename) data = [] for line in file: data.append(line.strip()) file.close() type = data[0] i = 1 idx = index() if len(data) > 1: while data[i].startswith('%'): keywords = keywordPattern.match(data[i]).groups() idx.keyword(keywords[0],keywords[1]) i += 1 idx.compileKeywords() for i in range(i,len(data),3): entry = processSXDEntry(data[i:i+3]) idx.add(entry[0],entry[1],entry[2]) return idx class index: data = dict() keywords = dict() def filter(self, key): letter = firstLetterPattern.match(key).group(1) if re.match('\d',letter): letter = '0-9' return (letter.upper(), key) def keyword(self, key, word): if not self.keywords.has_key(key): self.keywords[key] = [] self.keywords[key].append(word) def compileKeywords(self): self.prefix_patterns = [] if 'prefix' in self.keywords: for prefix in self.keywords['prefix']: self.prefix_patterns.append(re.compile(r"^(%s)\b\s*(.*)$" % prefix)) def add(self, key, number, link): for pattern in self.prefix_patterns: match = pattern.match(key) if match: key = "%s (%s)" % (match.group(2), match.group(1)) break # Only one match per key (first, key) = self.filter(key) if not self.data.has_key(first): self.data[first] = dict() if not self.data[first].has_key(key): self.data[first][key] = [] self.data[first][key].append({'num':number, 'link':link}) def refToStr(self, ref): if sys.version_info >= (2,6): return '\\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) else: return '\\hyperlink{%(link)s}{%(num)s}' % ref def entryToStr(self, key, entry): if sys.version_info >= (2,6): return '\\idxentry{{{0}}}{{{1}}}\n'.format(key, '\\\\'.join(map(self.refToStr, entry))) else: return '\\idxentry{%s}{%s}\n' % (key, '\\\\'.join(map(self.refToStr, entry))) def idxBlockToStr(self, letter, entries): str = '\\begin{idxblock}{'+letter+'}'+'\n' for key in sorted(entries.keys(), key=sortkey): str += self.entryToStr(key, entries[key]) str += '\\end{idxblock}'+'\n' return str def entriesToStr(self): str = "" for letter in sorted(self.data.keys()): str += self.idxBlockToStr(letter, self.data[letter]) return str