diff --git a/Requirements.txt b/Requirements.txt index 9678473f..4aa4a442 100644 --- a/Requirements.txt +++ b/Requirements.txt @@ -2,4 +2,3 @@ Jinja2==2.7.3 argparse==1.2.1 chardet==2.2.1 unidecode>=0.04.16 -https://github.com/tiarno/plastex/archive/master.zip \ No newline at end of file diff --git a/patacrep/index.py b/patacrep/index.py index 470bb67e..c1b74806 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -12,7 +12,7 @@ import re from patacrep import authors from patacrep import encoding -from patacrep.plastex import simpleparse +from patacrep.latex import latex2unicode EOL = u"\n" @@ -112,7 +112,7 @@ class Index(object): if not key in self.data[first].keys(): self.data[first][key] = { 'sortingkey': [ - encoding.unidecode(simpleparse(item)).lower() + encoding.unidecode(latex2unicode(item)).lower() for item in key ], 'entries': [], diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py new file mode 100644 index 00000000..f284c094 --- /dev/null +++ b/patacrep/latex/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +"""Very simple LaTeX parser""" + +def latex2unicode(string): + """Convert LaTeX string to unicode""" + return u"TODO" + +def parsetex(path): + """Return a dictonary of data read from the latex file `path`. + + This file is a drop in replacement for an old function. Elle ne devrait pas + apparaitre telle quelle dans la version finale, une fois que + https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. + + TODO + """ + return { + 'titles': ["TODO"], + 'args': {}, + 'languages': ['french'], + } + diff --git a/patacrep/plastex.py b/patacrep/plastex.py deleted file mode 100644 index cdaa3a64..00000000 --- a/patacrep/plastex.py +++ /dev/null @@ -1,117 +0,0 @@ -# -*- coding: utf-8 -*- - -"""PlasTeX module to process song files.""" - -from plasTeX.TeX import TeX -from plasTeX.Base.LaTeX import Sentences - -import locale -import os -import sys - -from patacrep import encoding - -def process_unbr_spaces(node): - #pylint: disable=line-too-long - r"""Replace '~' and '\ ' in node by nodes that - will be rendered as unbreakable space. - - Return node object for convenience. - - This function is a workaround to a bug that has been solved since: - - https://github.com/tiarno/plastex/commit/76bb78d5fbaac48e68025a3545286cc63cb4e7ad - - https://github.com/tiarno/plastex/commit/682a0d223b99d6b949bacf1c974d24dc9bb1d18e - - It can be deleted once this bug has been merged in production version of - PlasTeX. - """ - if (type(node) == Sentences.InterWordSpace or - (type(node) == Sentences.NoLineBreak and node.source == '~ ')): - node.unicode = unichr(160) - for child in node.childNodes: - process_unbr_spaces(child) - - return node - - -def simpleparse(text): - """Parse a simple LaTeX string. - """ - tex = TeX() - tex.disableLogging() - tex.input(text) - doc = tex.parse() - return process_unbr_spaces(doc.textContent) - - -class SongParser(object): - """Analyseur syntaxique de fichiers .sg""" - - @staticmethod - def create_tex(): - """Create a TeX object, ready to parse a tex file.""" - tex = TeX() - tex.disableLogging() - tex.ownerDocument.context.loadBaseMacros() - sys.path.append(os.path.dirname(__file__)) - tex.ownerDocument.context.loadPackage(tex, "plastex_patchedbabel") - tex.ownerDocument.context.loadPackage(tex, "plastex_chord") - tex.ownerDocument.context.loadPackage(tex, "plastex_songs") - tex.ownerDocument.context.loadPackage(tex, "plastex_misc_commands") - sys.path.pop() - return tex - - @classmethod - def parse(cls, filename): - """Parse a TeX file, and return its plasTeX representation.""" - tex = cls.create_tex() - tex.input(encoding.open_read(filename, 'r')) - return tex.parse() - - -def parsetex(filename): - r"""Analyse syntaxique d'un fichier .sg - - Renvoie un dictionnaire contenant les métadonnées lues dans le fichier. Les - clefs sont : - - languages: l'ensemble des langages utilisés (recherche des - \selectlanguages{}) ; - - titles: la liste des titres ; - - args: le dictionnaire des paramètres passés à \beginsong. - """ - # /* BEGIN plasTeX patch - # The following lines, and another line a few lines later, are used to - # circumvent a plasTeX bug. It has been reported and corrected : - # https://github.com/tiarno/plastex/commit/8f4e5a385f3cb6a04d5863f731ce24a7e856f2a4 - # To see if you can delete those lines, set your LC_TIME locale to French, - # during a month containing diacritics (e.g. Février), and run songbook. If - # no plasTeX bug appears, it is safe to remove those lines. - oldlocale = locale.getlocale(locale.LC_TIME) - locale.setlocale(locale.LC_TIME, 'C') - # plasTeX patch END */ - - # Analyse syntaxique - doc = SongParser.parse(filename) - - # /* BEGIN plasTeX patch - if oldlocale[0] and oldlocale[1]: - try: - locale.setlocale(locale.LC_TIME, "%s.%s" % oldlocale) - except locale.Error: - pass # Workaround a bug on windows - # plasTeX patch END */ - - # Extraction des données - data = { - "languages": set(), - "_doc": doc, - "_filename": filename, - } - for node in doc.allChildNodes: - if node.nodeName == "selectlanguage": - data["languages"].add(node.attributes['lang']) - if node.nodeName in ["beginsong", "sortassong"]: - data["titles"] = node.attributes["titles"] - data["args"] = node.attributes["args"] - - return data diff --git a/patacrep/plastex_chord.py b/patacrep/plastex_chord.py deleted file mode 100644 index dba9f36c..00000000 --- a/patacrep/plastex_chord.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""PlasTeX module to deal with chords commands of the songs LaTeX package - -Chords are set using commands like \[C]. This package parses those commands. -""" - -import logging - -import plasTeX -from plasTeX import Command, Environment, Macro -from plasTeX.Base.LaTeX.Math import BeginDisplayMath - -LOGGER = logging.getLogger(__name__) - -# Count the number of levels of 'verse' environment: IN_VERSE==1 means that we -# are in a 'verse' environment; IN_VERSE==2 means that we are in two included -# 'verse' environment, and so on. -IN_VERSE = 0 - -def wrap_displaymath(cls): - """Decorator to store the depth of 'verse' environment - - In the invoke() method classes, global variable IN_VERSE indicates the - number of 'verse' (or 'chorus' or 'verse*') environment we are in. - """ - - # pylint: disable=no-init,too-few-public-methods - class WrappedClass(cls): - """Wrapper to LaTeX environment updating IN_VERSE""" - blockType = True - # pylint: disable=super-on-old-class,global-statement,no-member - def invoke(self, tex): - """Wrapper to invoke() to update global variable IN_VERSE.""" - global IN_VERSE - if self.macroMode == Macro.MODE_BEGIN: - self.ownerDocument.context.push() - self.ownerDocument.context.catcode("\n", 13) - IN_VERSE += 1 - - # Removing spaces and line breaks at the beginning of verse - token = None - for token in tex: - if not match_space(token): - break - if token is not None: - tex.pushToken(token) - - else: - self.ownerDocument.context.pop() - IN_VERSE -= 1 - return super(WrappedClass, self).invoke(tex) - return WrappedClass - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class Verse(Environment): - """LaTeX 'verse' environment""" - macroName = 'verse' - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class VerseStar(Environment): - """LaTeX 'verse*' environment""" - macroName = 'verse*' - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class Chorus(Environment): - """LaTeX 'chorus' environment""" - macroName = 'chorus' - -def match_space(token): - """Return True if token is a space or newline character.""" - return ( - isinstance(token, plasTeX.Tokenizer.Space) - or token.nodeName == 'active::\n' - ) - -def match_closing_square_bracket(token): - """Return True if token is character ']'.""" - return token.nodeType == token.TEXT_NODE and token.nodeValue == ']' - -def match_egroup(token): - """Return True if token is of type `egroup` (end of group).""" - return isinstance(token, plasTeX.Base.Text.egroup) #pylint: disable=no-member - -def match_space_or_chord(token): - """Return True if token is a space or a chord.""" - return match_space(token) or isinstance(token, Chord) - -def parse_until(tex, end=lambda x: False): - """Parse `tex` until condition `end`, or `egroup` is met. - - Arguments: - - tex: object to parse - - end: function taking a token in argument, and returning a boolean. - Parsing stops when this function returns True, or an `egroup` is met. - - Return: a tuple of two items (the list of parsed tokens, last token). This - is done so that caller can decide whether they want to discard it or not. - Last token can be None if everything has been parsed without the end - condition being met. - """ - parsed = [] - last = None - for token in tex: - if end(token) or match_egroup(token): - last = token - break - elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member - # pylint: disable=expression-not-assigned - [token.appendChild(item) for item in parse_until(tex, match_egroup)[0]] - parsed.append(token) - return (parsed, last) - - -class Chord(Command): - """Beginning of a chord notation""" - macroName = 'chord' - macroMode = Command.MODE_NONE - -class BeginChordOrDisplayMath(BeginDisplayMath): - r"""Wrapper to BeginDisplayMath - - In a 'verse' (or 'verse*' or 'chorus') environment, the '\[' macro - displays a chord. Otherwise, it corresponds to the usual LaTeX math mode. - This class calls the right method, depending on the inclusion of this - macro in a verse environment. - """ - macroName = '[' - - def invoke(self, tex): - """Process this macro""" - if IN_VERSE: - chord = Chord() - - self.ownerDocument.context.push() #pylint: disable=no-member - self.ownerDocument.context.catcode("&", 13) #pylint: disable=no-member - chord.setAttribute( - 'name', - parse_until(tex, match_closing_square_bracket)[0], - ) - self.ownerDocument.context.pop() #pylint: disable=no-member - - token = next(iter(tex), None) - if token is None: - return [chord] - elif match_space(token): - return [chord, token] - elif ( - isinstance(token, Verse) - or isinstance(token, VerseStar) - or isinstance(token, Chorus) - ): - LOGGER.warning(( - "{} L{}: '\\end{{verse}}' (or 'verse*' or 'chorus') not " - "allowed directly after '\\['." - ).format(tex.filename, tex.lineNumber) - ) - return [chord] - elif isinstance(token, Chord): - token.attributes['name'] = ( - chord.attributes['name'] - + token.attributes['name'] - ) - chord = token - return [chord] - elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member - # pylint: disable=expression-not-assigned - [chord.appendChild(item) for item in parse_until(tex)[0]] - return [chord] - else: - chord.appendChild(token) - (parsed, last) = parse_until(tex, match_space_or_chord) - # pylint: disable=expression-not-assigned - [chord.appendChild(item) for item in parsed] - return [chord, last] - else: - return super(BeginChordOrDisplayMath, self).invoke(tex) - diff --git a/patacrep/plastex_misc_commands.py b/patacrep/plastex_misc_commands.py deleted file mode 100644 index 4b4b2602..00000000 --- a/patacrep/plastex_misc_commands.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Quick management of random LaTeX commands.""" - -from plasTeX import Command - -# pylint: disable=invalid-name,too-many-public-methods -class songcolumns(Command): - r"""Manage `\songcolumns` command""" - args = '{num:int}' - -# pylint: disable=invalid-name,too-many-public-methods -class gtab(Command): - r"""Manage `\gta` command""" - args = '{chord:str}{diagram:str}' diff --git a/patacrep/plastex_patchedbabel.py b/patacrep/plastex_patchedbabel.py deleted file mode 100644 index e20d3086..00000000 --- a/patacrep/plastex_patchedbabel.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""Patch pour le paquet Babel de PlasTeX - -Un bug dans PlasTeX intervient lorsqu'on essaye d'analyser une commande LaTeX -\selectlanguage{}, que nous voulons utiliser ici. Un patch a été proposé aux -développeurs de plasTeX, et accepté. Mais il faut que cette correction arrive -en production. En attendant, nous utilisons cette version modifiée. - -Dés que la correction sera entrée en production, il faudra supprimer ce -fichier, et remplater l'occurence à "patchedbabel" par "babel" dans le fichier -"plastex.py". -La correction à suveiller est la révision -41a48c0c229dd46b69fb0e3720595000a71b17d8 du fichier babel.py : -https://github.com/tiarno/plastex/commit/41a48c0c229dd46b69fb0e3720595000a71b17d8 - -# Comment vérifier si on peut supprimer ce fichier ? - -1) Remplacer l'occurence à patchedbabel par babel dans le fichier plastex.py. - -2) Générer un fichier .tex à partir d'un fichier .sb, ce dernier faisant -intervenir des chansons dans lesquelles \selectlanguage est utilisé (par -exemple, "make -B matteo.tex" ou "make -B naheulbeuk.tex" pour des fichiers pas -trop gros. - -3) Si l'erreur suivante apparaît, c'est qu'il faut encore attendre. - -> Traceback (most recent call last): -> [...] -> File "/usr/lib/pymodules/python2.7/plasTeX/Packages/babel.py", line 18, in -> invoke context.loadLanguage(self.attributes['lang'], self.ownerDocument) -> NameError: global name 'context' is not defined - -3 bis) Si elle n'apparait pas : youpi ! Supprimez ce fichier ! - -# Contact et commentaires - -Mercredi 27 mars 2013 -Louis - -""" - -from plasTeX import Command - -# pylint: disable=invalid-name,too-many-public-methods -class selectlanguage(Command): - """Patch of vanilla selectlanguage class. - - See module docstring for more information.""" - args = 'lang:str' - - def invoke(self, tex): - res = Command.invoke(self, tex) - self.ownerDocument.context.loadLanguage( # pylint: disable=no-member - self.attributes['lang'], - self.ownerDocument - ) - return res diff --git a/patacrep/plastex_songs.py b/patacrep/plastex_songs.py deleted file mode 100644 index 5bf5041a..00000000 --- a/patacrep/plastex_songs.py +++ /dev/null @@ -1,70 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Module to process song LaTeX environment. -""" - -import plasTeX - -from patacrep import encoding -from patacrep.plastex import process_unbr_spaces - - -def split_linebreak(texlist): - """Return a list of alternative title. - - A title can be defined with alternative names : - - A real name\\ - Alternative name\\ - Another alternative name - - This function takes the object representation of a list of titles, and - return a list of titles. - """ - return_list = [] - current = [] - for token in texlist: - if token.nodeName == '\\': - return_list.append(current) - current = [] - else: - current.append(encoding.basestring2unicode( - process_unbr_spaces(token).textContent - )) - if current: - return_list.append(current) - return return_list - - -class beginsong(plasTeX.Command): # pylint: disable=invalid-name,too-many-public-methods - """Class parsing the LaTeX song environment.""" - - args = '{titles}[args:dict]' - - def invoke(self, tex): - """Parse an occurence of song environment.""" - - plasTeX.Command.invoke(self, tex) - - # Parsing title - titles = [] - for tokens in split_linebreak(self.attributes['titles'].allChildNodes): - titles.append("".join(tokens)) - self.attributes['titles'] = encoding.list2unicode(titles) - - # Parsing keyval arguments - args = {} - for (key, val) in self.attributes['args'].iteritems(): - if isinstance(val, plasTeX.DOM.Element): - args[key] = encoding.basestring2unicode( - process_unbr_spaces(val).textContent - ) - elif isinstance(val, basestring): - args[key] = encoding.basestring2unicode(val) - else: - args[key] = unicode(val) - self.attributes['args'] = args - -class sortassong(beginsong): # pylint: disable=invalid-name,too-many-public-methods - r"""Treat '\sortassong' exactly as if it were a '\beginsong'.""" - pass diff --git a/patacrep/songs.py b/patacrep/songs.py index 961d9f37..1cb69e8c 100644 --- a/patacrep/songs.py +++ b/patacrep/songs.py @@ -14,7 +14,7 @@ except ImportError: import pickle from patacrep.authors import processauthors -from patacrep.plastex import parsetex +from patacrep.latex import parsetex LOGGER = logging.getLogger(__name__) @@ -113,7 +113,7 @@ class Song(object): self.fullpath )) - # Data extraction from the song with plastex + # Data extraction from the latex song data = parsetex(self.fullpath) self.titles = data['titles'] self.datadir = datadir diff --git a/readme.md b/readme.md index c1b7fd8e..a4ae03a4 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,7 @@ is precised in the header. # Python version -Patacrep is compatible with Python 2.7 (no Python3 since [one of the -library](http://plastex.sourceforge.net/) we are using is not). +Patacrep is compatible with Python 2.7 (no Python3 yet). # Download diff --git a/setup.py b/setup.py index a71f9dff..6936bd35 100755 --- a/setup.py +++ b/setup.py @@ -24,11 +24,11 @@ SETUP = {"name": 'patacrep', "scripts": ['songbook'], "requires": [ "argparse", "codecs", "distutils", "fnmatch", "glob", "json", - "locale", "logging", "os", "plasTeX", "re", "subprocess", "sys", + "locale", "logging", "os", "re", "subprocess", "sys", "textwrap", "unidecode", "jinja2", "chardet" ], "install_requires": [ - "argparse", "plasTeX", "unidecode", "jinja2", "chardet" + "argparse", "unidecode", "jinja2", "chardet" ], "package_data": {'patacrep': [ 'data/latex/*', 'data/templates/*', diff --git a/stdeb.cfg b/stdeb.cfg index 8c33d4a4..d654b578 100644 --- a/stdeb.cfg +++ b/stdeb.cfg @@ -1,5 +1,5 @@ [DEFAULT] -Depends: python-jinja2, python-pkg-resources, python-plastex, python-chardet, python-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended +Depends: python-jinja2, python-pkg-resources, python-chardet, python-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended Recommends: texlive-lang-english, texlive-lang-french, texlive-lang-portuguese, texlive-lang-spanish, texlive-fonts-extra XS-Python-Version: >=2.7 Section: tex