#!/usr/bin/python
# -*- coding: utf-8 -*-

"""Manage indexes.

Generate indexes files for the songbook compilation. This is a replacement for
the original makeindex program written in C that produces an index file (.sbx)
from a file generated by the latex compilation of the songbook (.sxd).
"""

from unidecode import unidecode
import locale
import re
import sys

from songbook_core.authors import processauthors
from songbook_core.plastex import simpleparse

EOL = "\n"

# Pattern set to ignore latex command in title prefix
KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$")
FIRST_LETTER_PATTERN = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)")


def sortkey(value):
    """From a title, return something usable for sorting.

    It handles locale (but
    don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles
    the sort with  latex escape sequences.
    """
    return locale.strxfrm(unidecode(simpleparse(value).replace(' ', 'A')))


def process_sxd(filename):
    """Parse sxd file.

    Return an Index object.
    """
    index_file = open(filename)
    data = []
    for line in index_file:
        data.append(line.strip())
    index_file.close()

    i = 1
    idx = Index(data[0])

    while len(data) > i and data[i].startswith('%'):
        keywords = KEYWORD_PATTERN.match(data[i]).groups()
        idx.add_keyword(keywords[0], keywords[1])
        i += 1

    idx.compile_keywords()
    for i in range(i, len(data), 3):
        entry = data[i:i + 3]
        idx.add(entry[0], entry[1], entry[2])

    return idx


class Index(object):
    """Title, author or scripture Index representation."""

    def __init__(self, indextype):
        self.data = dict()
        self.keywords = dict()
        self.prefix_patterns = []
        self.authwords = {"after": [], "ignore": [], "sep": []}
        if indextype == "TITLE INDEX DATA FILE":
            self.indextype = "TITLE"
        elif indextype == "SCRIPTURE INDEX DATA FILE":
            self.indextype = "SCRIPTURE"
        elif indextype == "AUTHOR INDEX DATA FILE":
            self.indextype = "AUTHOR"
        else:
            self.indextype = ""

    @staticmethod
    def get_first_letter(key):
        """Return the uppercase first letter of key."""
        letter = FIRST_LETTER_PATTERN.match(key).group(1)
        if re.match(r'\d', letter):
            letter = '0-9'
        return letter.upper()

    def add_keyword(self, key, word):
        """Add 'word' to self.keywords[key]."""
        if not key in self.keywords.keys():
            self.keywords[key] = []
        self.keywords[key].append(word)

    def compile_keywords(self):
        """Turn keywords (self.keywords) into regular expressions."""
        if self.indextype == "TITLE":
            if 'prefix' in self.keywords:
                for prefix in self.keywords['prefix']:
                    self.prefix_patterns.append(re.compile(
                            r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix)
                            ))

        if self.indextype == "AUTHOR":
            for key in self.keywords:
                if key in self.authwords:
                    self.authwords[key] = self.keywords[key]
            for word in self.authwords.keys():
                if word in self.keywords:
                    if word == "after":
                        self.authwords[word] = [
                            re.compile(r"^.*{after}\b(.*)".format(after=after))
                            for after in self.keywords[word]
                            ]
                    elif word == "sep":
                        self.authwords[word] = [" {sep}".format(sep=sep)
                                            for sep in self.authwords[word]
                                            ] + [","]
                        self.authwords[word] = [
                                re.compile(r"^(.*){sep} (.*)$".format(sep=sep))
                                for sep in self.authwords[word]
                                ]
                    else:
                        self.authwords[word] = self.keywords[word]

    def _raw_add(self, key, number, link):
        """Add a song to the list.

        No processing is done one data. It is added raw. See add() for a
        similar method with processing.
        """
        first = self.get_first_letter(key)
        if not first in self.data.keys():
            self.data[first] = dict()
        if not key in self.data[first].keys():
            self.data[first][key] = []
        self.data[first][key].append({'num': number, 'link': link})

    def add(self, key, number, link):
        """Add a song to the list.

        Process data before adding it.
        """
        if self.indextype == "TITLE":
            # Removing prefixes before titles
            for pattern in self.prefix_patterns:
                match = pattern.match(key)
                if match:
                    self._raw_add(
                                  "{} ({})".format(
                                        match.group(2) + match.group(3),
                                        match.group(1)),
                                  number, link)
                    return
            self._raw_add(key, number, link)

        if self.indextype == "AUTHOR":
            # Processing authors
            for author in processauthors(
                    key,
                    **self.authwords):
                self._raw_add(author, number, link)

    @staticmethod
    def ref_to_str(ref):
        """Return the LaTeX code corresponding to the reference."""
        return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref)

    def entry_to_str(self, key, entry):
        """Return the LaTeX code corresponding to the entry."""
        return unicode(r'\idxentry{{{0}}}{{{1}}}' + EOL).format(
                key,
                r'\\'.join([self.ref_to_str(ref) for ref in entry]),
                )

    def idxblock_to_str(self, letter, entries):
        """Return the LaTeX code corresponding to an index block.

        Here, an index block is a letter, and all data beginning with this
        letter.
        """
        string = r'\begin{idxblock}{' + letter + '}' + EOL
        for key in sorted(entries.keys(), key=sortkey):
            string += self.entry_to_str(key, entries[key])
        string += r'\end{idxblock}' + EOL
        return string

    def entries_to_str(self):
        """Return the LaTeX code corresponding to the index."""
        string = ""
        for letter in sorted(self.data.keys()):
            string += self.idxblock_to_str(letter, self.data[letter])
        return string