#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""PlasTeX module to process song files."""

from plasTeX.TeX import TeX
from plasTeX.Base.LaTeX import Sentences

import codecs
import locale
import os
import sys

def process_unbr_spaces(node):
    r"""Replace '~' and '\ ' in node by nodes that
    will be rendered as unbreakable space.

    Return node object for convenience.

    This function is a workaround to a bug that has been solved since:
    - https://github.com/tiarno/plastex/commit/76bb78d5fbaac48e68025a3545286cc63cb4e7ad
    - https://github.com/tiarno/plastex/commit/682a0d223b99d6b949bacf1c974d24dc9bb1d18e

    It can be deleted once this bug has been merged in production version of
    if (type(node) == Sentences.InterWordSpace or
        (type(node) == Sentences.NoLineBreak and node.source == '~ ')):
        node.unicode = unichr(160)
    for child in node.childNodes:

    return node

def simpleparse(text):
    """Parse a simple LaTeX string.
    tex = TeX()
    doc = tex.parse()
    return process_unbr_spaces(doc.textContent)

class SongParser(object):
    """Analyseur syntaxique de fichiers .sg"""

    def __init__(self):

    def create_tex():
        """Create a TeX object, ready to parse a tex file."""
        tex = TeX()
        tex.ownerDocument.context.loadPackage(tex, "plastex_patchedbabel")
        tex.ownerDocument.context.loadPackage(tex, "plastex_songs")
        return tex

    def parse(cls, filename):
        """Parse a TeX file, and return its plasTeX representation."""
        tex = cls.create_tex()
        tex.input(codecs.open(filename, 'r+', 'utf-8', 'replace'))
        return tex.parse()

def parsetex(filename):
    r"""Analyse syntaxique d'un fichier .sg

    Renvoie un dictionnaire contenant les métadonnées lues dans le fichier. Les
    clefs sont :
    - languages: l'ensemble des langages utilisés (recherche des
      \selectlanguages{}) ;
    - titles: la liste des titres ;
    - args: le dictionnaire des paramètres passés à \beginsong.
    # /* BEGIN plasTeX patch
    # The following lines, and another line a few lines later, are used to
    # circumvent a plasTeX bug. It has been reported and corrected :
    # https://github.com/tiarno/plastex/commit/8f4e5a385f3cb6a04d5863f731ce24a7e856f2a4
    # To see if you can delete those lines, set your LC_TIME locale to French,
    # during a month containing diacritics (e.g. Février), and run songbook. If
    # no plasTeX bug appears, it is safe to remove those lines.
    oldlocale = locale.getlocale(locale.LC_TIME)
    locale.setlocale(locale.LC_TIME, 'C')
    # plasTeX patch END */

    # Analyse syntaxique
    doc = SongParser.parse(filename)

    # /* BEGIN plasTeX patch
    if oldlocale[0] and oldlocale[1]:
        locale.setlocale(locale.LC_TIME, "%s.%s" % oldlocale)
    # plasTeX patch END */

    # Extraction des données
    data = {
            "languages": set(),
    for node in doc.allChildNodes:
        if node.nodeName == "selectlanguage":
        if node.nodeName == "beginsong":
            data["titles"] = node.attributes["titles"]
            data["args"] = node.attributes["args"]

    return data