From e41590c4097d1e9915e6ba174557062f0a5bde2c Mon Sep 17 00:00:00 2001 From: Louis Date: Fri, 26 Sep 2014 22:24:45 +0200 Subject: [PATCH 01/26] Suppression de plasTeX --- Requirements.txt | 1 - patacrep/index.py | 4 +- patacrep/latex/__init__.py | 23 ++++ patacrep/plastex.py | 117 ------------------- patacrep/plastex_chord.py | 181 ------------------------------ patacrep/plastex_misc_commands.py | 15 --- patacrep/plastex_patchedbabel.py | 58 ---------- patacrep/plastex_songs.py | 70 ------------ patacrep/songs.py | 4 +- readme.md | 3 +- setup.py | 4 +- stdeb.cfg | 2 +- 12 files changed, 31 insertions(+), 451 deletions(-) create mode 100644 patacrep/latex/__init__.py delete mode 100644 patacrep/plastex.py delete mode 100644 patacrep/plastex_chord.py delete mode 100644 patacrep/plastex_misc_commands.py delete mode 100644 patacrep/plastex_patchedbabel.py delete mode 100644 patacrep/plastex_songs.py diff --git a/Requirements.txt b/Requirements.txt index 9678473f..4aa4a442 100644 --- a/Requirements.txt +++ b/Requirements.txt @@ -2,4 +2,3 @@ Jinja2==2.7.3 argparse==1.2.1 chardet==2.2.1 unidecode>=0.04.16 -https://github.com/tiarno/plastex/archive/master.zip \ No newline at end of file diff --git a/patacrep/index.py b/patacrep/index.py index 470bb67e..c1b74806 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -12,7 +12,7 @@ import re from patacrep import authors from patacrep import encoding -from patacrep.plastex import simpleparse +from patacrep.latex import latex2unicode EOL = u"\n" @@ -112,7 +112,7 @@ class Index(object): if not key in self.data[first].keys(): self.data[first][key] = { 'sortingkey': [ - encoding.unidecode(simpleparse(item)).lower() + encoding.unidecode(latex2unicode(item)).lower() for item in key ], 'entries': [], diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py new file mode 100644 index 00000000..f284c094 --- /dev/null +++ b/patacrep/latex/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +"""Very simple LaTeX parser""" + +def latex2unicode(string): + """Convert LaTeX string to unicode""" + return u"TODO" + +def parsetex(path): + """Return a dictonary of data read from the latex file `path`. + + This file is a drop in replacement for an old function. Elle ne devrait pas + apparaitre telle quelle dans la version finale, une fois que + https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. + + TODO + """ + return { + 'titles': ["TODO"], + 'args': {}, + 'languages': ['french'], + } + diff --git a/patacrep/plastex.py b/patacrep/plastex.py deleted file mode 100644 index cdaa3a64..00000000 --- a/patacrep/plastex.py +++ /dev/null @@ -1,117 +0,0 @@ -# -*- coding: utf-8 -*- - -"""PlasTeX module to process song files.""" - -from plasTeX.TeX import TeX -from plasTeX.Base.LaTeX import Sentences - -import locale -import os -import sys - -from patacrep import encoding - -def process_unbr_spaces(node): - #pylint: disable=line-too-long - r"""Replace '~' and '\ ' in node by nodes that - will be rendered as unbreakable space. - - Return node object for convenience. - - This function is a workaround to a bug that has been solved since: - - https://github.com/tiarno/plastex/commit/76bb78d5fbaac48e68025a3545286cc63cb4e7ad - - https://github.com/tiarno/plastex/commit/682a0d223b99d6b949bacf1c974d24dc9bb1d18e - - It can be deleted once this bug has been merged in production version of - PlasTeX. - """ - if (type(node) == Sentences.InterWordSpace or - (type(node) == Sentences.NoLineBreak and node.source == '~ ')): - node.unicode = unichr(160) - for child in node.childNodes: - process_unbr_spaces(child) - - return node - - -def simpleparse(text): - """Parse a simple LaTeX string. - """ - tex = TeX() - tex.disableLogging() - tex.input(text) - doc = tex.parse() - return process_unbr_spaces(doc.textContent) - - -class SongParser(object): - """Analyseur syntaxique de fichiers .sg""" - - @staticmethod - def create_tex(): - """Create a TeX object, ready to parse a tex file.""" - tex = TeX() - tex.disableLogging() - tex.ownerDocument.context.loadBaseMacros() - sys.path.append(os.path.dirname(__file__)) - tex.ownerDocument.context.loadPackage(tex, "plastex_patchedbabel") - tex.ownerDocument.context.loadPackage(tex, "plastex_chord") - tex.ownerDocument.context.loadPackage(tex, "plastex_songs") - tex.ownerDocument.context.loadPackage(tex, "plastex_misc_commands") - sys.path.pop() - return tex - - @classmethod - def parse(cls, filename): - """Parse a TeX file, and return its plasTeX representation.""" - tex = cls.create_tex() - tex.input(encoding.open_read(filename, 'r')) - return tex.parse() - - -def parsetex(filename): - r"""Analyse syntaxique d'un fichier .sg - - Renvoie un dictionnaire contenant les métadonnées lues dans le fichier. Les - clefs sont : - - languages: l'ensemble des langages utilisés (recherche des - \selectlanguages{}) ; - - titles: la liste des titres ; - - args: le dictionnaire des paramètres passés à \beginsong. - """ - # /* BEGIN plasTeX patch - # The following lines, and another line a few lines later, are used to - # circumvent a plasTeX bug. It has been reported and corrected : - # https://github.com/tiarno/plastex/commit/8f4e5a385f3cb6a04d5863f731ce24a7e856f2a4 - # To see if you can delete those lines, set your LC_TIME locale to French, - # during a month containing diacritics (e.g. Février), and run songbook. If - # no plasTeX bug appears, it is safe to remove those lines. - oldlocale = locale.getlocale(locale.LC_TIME) - locale.setlocale(locale.LC_TIME, 'C') - # plasTeX patch END */ - - # Analyse syntaxique - doc = SongParser.parse(filename) - - # /* BEGIN plasTeX patch - if oldlocale[0] and oldlocale[1]: - try: - locale.setlocale(locale.LC_TIME, "%s.%s" % oldlocale) - except locale.Error: - pass # Workaround a bug on windows - # plasTeX patch END */ - - # Extraction des données - data = { - "languages": set(), - "_doc": doc, - "_filename": filename, - } - for node in doc.allChildNodes: - if node.nodeName == "selectlanguage": - data["languages"].add(node.attributes['lang']) - if node.nodeName in ["beginsong", "sortassong"]: - data["titles"] = node.attributes["titles"] - data["args"] = node.attributes["args"] - - return data diff --git a/patacrep/plastex_chord.py b/patacrep/plastex_chord.py deleted file mode 100644 index dba9f36c..00000000 --- a/patacrep/plastex_chord.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""PlasTeX module to deal with chords commands of the songs LaTeX package - -Chords are set using commands like \[C]. This package parses those commands. -""" - -import logging - -import plasTeX -from plasTeX import Command, Environment, Macro -from plasTeX.Base.LaTeX.Math import BeginDisplayMath - -LOGGER = logging.getLogger(__name__) - -# Count the number of levels of 'verse' environment: IN_VERSE==1 means that we -# are in a 'verse' environment; IN_VERSE==2 means that we are in two included -# 'verse' environment, and so on. -IN_VERSE = 0 - -def wrap_displaymath(cls): - """Decorator to store the depth of 'verse' environment - - In the invoke() method classes, global variable IN_VERSE indicates the - number of 'verse' (or 'chorus' or 'verse*') environment we are in. - """ - - # pylint: disable=no-init,too-few-public-methods - class WrappedClass(cls): - """Wrapper to LaTeX environment updating IN_VERSE""" - blockType = True - # pylint: disable=super-on-old-class,global-statement,no-member - def invoke(self, tex): - """Wrapper to invoke() to update global variable IN_VERSE.""" - global IN_VERSE - if self.macroMode == Macro.MODE_BEGIN: - self.ownerDocument.context.push() - self.ownerDocument.context.catcode("\n", 13) - IN_VERSE += 1 - - # Removing spaces and line breaks at the beginning of verse - token = None - for token in tex: - if not match_space(token): - break - if token is not None: - tex.pushToken(token) - - else: - self.ownerDocument.context.pop() - IN_VERSE -= 1 - return super(WrappedClass, self).invoke(tex) - return WrappedClass - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class Verse(Environment): - """LaTeX 'verse' environment""" - macroName = 'verse' - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class VerseStar(Environment): - """LaTeX 'verse*' environment""" - macroName = 'verse*' - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class Chorus(Environment): - """LaTeX 'chorus' environment""" - macroName = 'chorus' - -def match_space(token): - """Return True if token is a space or newline character.""" - return ( - isinstance(token, plasTeX.Tokenizer.Space) - or token.nodeName == 'active::\n' - ) - -def match_closing_square_bracket(token): - """Return True if token is character ']'.""" - return token.nodeType == token.TEXT_NODE and token.nodeValue == ']' - -def match_egroup(token): - """Return True if token is of type `egroup` (end of group).""" - return isinstance(token, plasTeX.Base.Text.egroup) #pylint: disable=no-member - -def match_space_or_chord(token): - """Return True if token is a space or a chord.""" - return match_space(token) or isinstance(token, Chord) - -def parse_until(tex, end=lambda x: False): - """Parse `tex` until condition `end`, or `egroup` is met. - - Arguments: - - tex: object to parse - - end: function taking a token in argument, and returning a boolean. - Parsing stops when this function returns True, or an `egroup` is met. - - Return: a tuple of two items (the list of parsed tokens, last token). This - is done so that caller can decide whether they want to discard it or not. - Last token can be None if everything has been parsed without the end - condition being met. - """ - parsed = [] - last = None - for token in tex: - if end(token) or match_egroup(token): - last = token - break - elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member - # pylint: disable=expression-not-assigned - [token.appendChild(item) for item in parse_until(tex, match_egroup)[0]] - parsed.append(token) - return (parsed, last) - - -class Chord(Command): - """Beginning of a chord notation""" - macroName = 'chord' - macroMode = Command.MODE_NONE - -class BeginChordOrDisplayMath(BeginDisplayMath): - r"""Wrapper to BeginDisplayMath - - In a 'verse' (or 'verse*' or 'chorus') environment, the '\[' macro - displays a chord. Otherwise, it corresponds to the usual LaTeX math mode. - This class calls the right method, depending on the inclusion of this - macro in a verse environment. - """ - macroName = '[' - - def invoke(self, tex): - """Process this macro""" - if IN_VERSE: - chord = Chord() - - self.ownerDocument.context.push() #pylint: disable=no-member - self.ownerDocument.context.catcode("&", 13) #pylint: disable=no-member - chord.setAttribute( - 'name', - parse_until(tex, match_closing_square_bracket)[0], - ) - self.ownerDocument.context.pop() #pylint: disable=no-member - - token = next(iter(tex), None) - if token is None: - return [chord] - elif match_space(token): - return [chord, token] - elif ( - isinstance(token, Verse) - or isinstance(token, VerseStar) - or isinstance(token, Chorus) - ): - LOGGER.warning(( - "{} L{}: '\\end{{verse}}' (or 'verse*' or 'chorus') not " - "allowed directly after '\\['." - ).format(tex.filename, tex.lineNumber) - ) - return [chord] - elif isinstance(token, Chord): - token.attributes['name'] = ( - chord.attributes['name'] - + token.attributes['name'] - ) - chord = token - return [chord] - elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member - # pylint: disable=expression-not-assigned - [chord.appendChild(item) for item in parse_until(tex)[0]] - return [chord] - else: - chord.appendChild(token) - (parsed, last) = parse_until(tex, match_space_or_chord) - # pylint: disable=expression-not-assigned - [chord.appendChild(item) for item in parsed] - return [chord, last] - else: - return super(BeginChordOrDisplayMath, self).invoke(tex) - diff --git a/patacrep/plastex_misc_commands.py b/patacrep/plastex_misc_commands.py deleted file mode 100644 index 4b4b2602..00000000 --- a/patacrep/plastex_misc_commands.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Quick management of random LaTeX commands.""" - -from plasTeX import Command - -# pylint: disable=invalid-name,too-many-public-methods -class songcolumns(Command): - r"""Manage `\songcolumns` command""" - args = '{num:int}' - -# pylint: disable=invalid-name,too-many-public-methods -class gtab(Command): - r"""Manage `\gta` command""" - args = '{chord:str}{diagram:str}' diff --git a/patacrep/plastex_patchedbabel.py b/patacrep/plastex_patchedbabel.py deleted file mode 100644 index e20d3086..00000000 --- a/patacrep/plastex_patchedbabel.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""Patch pour le paquet Babel de PlasTeX - -Un bug dans PlasTeX intervient lorsqu'on essaye d'analyser une commande LaTeX -\selectlanguage{}, que nous voulons utiliser ici. Un patch a été proposé aux -développeurs de plasTeX, et accepté. Mais il faut que cette correction arrive -en production. En attendant, nous utilisons cette version modifiée. - -Dés que la correction sera entrée en production, il faudra supprimer ce -fichier, et remplater l'occurence à "patchedbabel" par "babel" dans le fichier -"plastex.py". -La correction à suveiller est la révision -41a48c0c229dd46b69fb0e3720595000a71b17d8 du fichier babel.py : -https://github.com/tiarno/plastex/commit/41a48c0c229dd46b69fb0e3720595000a71b17d8 - -# Comment vérifier si on peut supprimer ce fichier ? - -1) Remplacer l'occurence à patchedbabel par babel dans le fichier plastex.py. - -2) Générer un fichier .tex à partir d'un fichier .sb, ce dernier faisant -intervenir des chansons dans lesquelles \selectlanguage est utilisé (par -exemple, "make -B matteo.tex" ou "make -B naheulbeuk.tex" pour des fichiers pas -trop gros. - -3) Si l'erreur suivante apparaît, c'est qu'il faut encore attendre. - -> Traceback (most recent call last): -> [...] -> File "/usr/lib/pymodules/python2.7/plasTeX/Packages/babel.py", line 18, in -> invoke context.loadLanguage(self.attributes['lang'], self.ownerDocument) -> NameError: global name 'context' is not defined - -3 bis) Si elle n'apparait pas : youpi ! Supprimez ce fichier ! - -# Contact et commentaires - -Mercredi 27 mars 2013 -Louis - -""" - -from plasTeX import Command - -# pylint: disable=invalid-name,too-many-public-methods -class selectlanguage(Command): - """Patch of vanilla selectlanguage class. - - See module docstring for more information.""" - args = 'lang:str' - - def invoke(self, tex): - res = Command.invoke(self, tex) - self.ownerDocument.context.loadLanguage( # pylint: disable=no-member - self.attributes['lang'], - self.ownerDocument - ) - return res diff --git a/patacrep/plastex_songs.py b/patacrep/plastex_songs.py deleted file mode 100644 index 5bf5041a..00000000 --- a/patacrep/plastex_songs.py +++ /dev/null @@ -1,70 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Module to process song LaTeX environment. -""" - -import plasTeX - -from patacrep import encoding -from patacrep.plastex import process_unbr_spaces - - -def split_linebreak(texlist): - """Return a list of alternative title. - - A title can be defined with alternative names : - - A real name\\ - Alternative name\\ - Another alternative name - - This function takes the object representation of a list of titles, and - return a list of titles. - """ - return_list = [] - current = [] - for token in texlist: - if token.nodeName == '\\': - return_list.append(current) - current = [] - else: - current.append(encoding.basestring2unicode( - process_unbr_spaces(token).textContent - )) - if current: - return_list.append(current) - return return_list - - -class beginsong(plasTeX.Command): # pylint: disable=invalid-name,too-many-public-methods - """Class parsing the LaTeX song environment.""" - - args = '{titles}[args:dict]' - - def invoke(self, tex): - """Parse an occurence of song environment.""" - - plasTeX.Command.invoke(self, tex) - - # Parsing title - titles = [] - for tokens in split_linebreak(self.attributes['titles'].allChildNodes): - titles.append("".join(tokens)) - self.attributes['titles'] = encoding.list2unicode(titles) - - # Parsing keyval arguments - args = {} - for (key, val) in self.attributes['args'].iteritems(): - if isinstance(val, plasTeX.DOM.Element): - args[key] = encoding.basestring2unicode( - process_unbr_spaces(val).textContent - ) - elif isinstance(val, basestring): - args[key] = encoding.basestring2unicode(val) - else: - args[key] = unicode(val) - self.attributes['args'] = args - -class sortassong(beginsong): # pylint: disable=invalid-name,too-many-public-methods - r"""Treat '\sortassong' exactly as if it were a '\beginsong'.""" - pass diff --git a/patacrep/songs.py b/patacrep/songs.py index 961d9f37..1cb69e8c 100644 --- a/patacrep/songs.py +++ b/patacrep/songs.py @@ -14,7 +14,7 @@ except ImportError: import pickle from patacrep.authors import processauthors -from patacrep.plastex import parsetex +from patacrep.latex import parsetex LOGGER = logging.getLogger(__name__) @@ -113,7 +113,7 @@ class Song(object): self.fullpath )) - # Data extraction from the song with plastex + # Data extraction from the latex song data = parsetex(self.fullpath) self.titles = data['titles'] self.datadir = datadir diff --git a/readme.md b/readme.md index c1b7fd8e..a4ae03a4 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,7 @@ is precised in the header. # Python version -Patacrep is compatible with Python 2.7 (no Python3 since [one of the -library](http://plastex.sourceforge.net/) we are using is not). +Patacrep is compatible with Python 2.7 (no Python3 yet). # Download diff --git a/setup.py b/setup.py index a71f9dff..6936bd35 100755 --- a/setup.py +++ b/setup.py @@ -24,11 +24,11 @@ SETUP = {"name": 'patacrep', "scripts": ['songbook'], "requires": [ "argparse", "codecs", "distutils", "fnmatch", "glob", "json", - "locale", "logging", "os", "plasTeX", "re", "subprocess", "sys", + "locale", "logging", "os", "re", "subprocess", "sys", "textwrap", "unidecode", "jinja2", "chardet" ], "install_requires": [ - "argparse", "plasTeX", "unidecode", "jinja2", "chardet" + "argparse", "unidecode", "jinja2", "chardet" ], "package_data": {'patacrep': [ 'data/latex/*', 'data/templates/*', diff --git a/stdeb.cfg b/stdeb.cfg index 8c33d4a4..d654b578 100644 --- a/stdeb.cfg +++ b/stdeb.cfg @@ -1,5 +1,5 @@ [DEFAULT] -Depends: python-jinja2, python-pkg-resources, python-plastex, python-chardet, python-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended +Depends: python-jinja2, python-pkg-resources, python-chardet, python-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended Recommends: texlive-lang-english, texlive-lang-french, texlive-lang-portuguese, texlive-lang-spanish, texlive-fonts-extra XS-Python-Version: >=2.7 Section: tex From 2a3bee175ee635cf8d5fc30e0dc8174fd6f6accd Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 27 Sep 2014 09:43:29 +0200 Subject: [PATCH 02/26] Converted to Python3 Not fully tested: base case seems to work --- patacrep/build.py | 4 ++-- patacrep/content/__init__.py | 6 +++--- patacrep/content/cwd.py | 2 +- patacrep/content/section.py | 6 +++--- patacrep/content/song.py | 8 ++++---- patacrep/content/songsection.py | 4 ++-- patacrep/content/sorted.py | 10 +++++----- patacrep/content/tex.py | 4 ++-- patacrep/encoding.py | 29 +---------------------------- patacrep/errors.py | 5 +---- patacrep/files.py | 2 +- patacrep/index.py | 33 +++++++++++++++++---------------- patacrep/latex/__init__.py | 2 +- patacrep/songs.py | 19 ++++--------------- patacrep/templates.py | 16 ++++++++-------- readme.md | 10 +++++----- setup.py | 2 +- songbook | 6 +++--- stdeb.cfg | 4 ++-- 19 files changed, 66 insertions(+), 106 deletions(-) diff --git a/patacrep/build.py b/patacrep/build.py index c3a414d4..9f79bb6a 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -57,7 +57,7 @@ class Songbook(object): def _set_datadir(self): """Set the default values for datadir""" try: - if isinstance(self.config['datadir'], basestring): + if isinstance(self.config['datadir'], str): self.config['datadir'] = [self.config['datadir']] except KeyError: # No datadir in the raw_songbook self.config['datadir'] = [os.path.abspath('.')] @@ -213,7 +213,7 @@ class SongbookBuilder(object): log = '' line = process.stdout.readline() while line: - log += line + log += str(line) line = process.stdout.readline() LOGGER.debug(log) diff --git a/patacrep/content/__init__.py b/patacrep/content/__init__.py index 2f4fea0e..a53d4d7e 100755 --- a/patacrep/content/__init__.py +++ b/patacrep/content/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Content plugin management. @@ -225,11 +225,11 @@ def process_content(content, config=None): """ contentlist = [] plugins = load_plugins(config) - keyword_re = re.compile(ur'^ *(?P\w*) *(\((?P.*)\))? *$') + keyword_re = re.compile(r'^ *(?P\w*) *(\((?P.*)\))? *$') if not content: content = [["song"]] for elem in content: - if isinstance(elem, basestring): + if isinstance(elem, str): elem = ["song", elem] if len(content) == 0: content = ["song"] diff --git a/patacrep/content/cwd.py b/patacrep/content/cwd.py index 338adb76..5e55d68d 100755 --- a/patacrep/content/cwd.py +++ b/patacrep/content/cwd.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Change base directory before importing songs.""" diff --git a/patacrep/content/section.py b/patacrep/content/section.py index 2bde2eb2..96215f68 100755 --- a/patacrep/content/section.py +++ b/patacrep/content/section.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Allow LaTeX sections (starred or not) as content of a songbook.""" @@ -26,9 +26,9 @@ class Section(Content): def render(self, __context): if self.short is None: - return ur'\{}{{{}}}'.format(self.keyword, self.name) + return r'\{}{{{}}}'.format(self.keyword, self.name) else: - return ur'\{}[{}]{{{}}}'.format(self.keyword, self.short, self.name) + return r'\{}[{}]{{{}}}'.format(self.keyword, self.short, self.name) #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): diff --git a/patacrep/content/song.py b/patacrep/content/song.py index b83fd134..02acf463 100755 --- a/patacrep/content/song.py +++ b/patacrep/content/song.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Plugin to include songs to the songbook.""" @@ -26,15 +26,15 @@ class SongRenderer(Content, Song): indexes = context.resolve("indexes") if isinstance(indexes, jinja2.runtime.Undefined): indexes = "" - return ur'\begin{songs}{%s}' % indexes + return r'\begin{songs}{%s}' % indexes def end_block(self, __context): """Return the string to end a block.""" - return ur'\end{songs}' + return r'\end{songs}' def render(self, context): """Return the string that will render the song.""" - return ur'\input{{{}}}'.format(files.path2posix( + return r'\input{{{}}}'.format(files.path2posix( files.relpath( self.fullpath, os.path.dirname(context['filename']) diff --git a/patacrep/content/songsection.py b/patacrep/content/songsection.py index b4c9d446..07153591 100755 --- a/patacrep/content/songsection.py +++ b/patacrep/content/songsection.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Allow 'songchapter' and 'songsection' as content of a songbook.""" @@ -19,7 +19,7 @@ class SongSection(Content): def render(self, __context): """Render this section or chapter.""" - return ur'\{}{{{}}}'.format(self.keyword, self.name) + return r'\{}{{{}}}'.format(self.keyword, self.name) #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): diff --git a/patacrep/content/sorted.py b/patacrep/content/sorted.py index e9a5e677..651bd07e 100755 --- a/patacrep/content/sorted.py +++ b/patacrep/content/sorted.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Sorted list of songs. @@ -9,9 +9,9 @@ to a songbook. import locale import logging +import unidecode from patacrep import files -from patacrep import encoding from patacrep.content import ContentError from patacrep.content.song import OnlySongsError, process_songs @@ -27,11 +27,11 @@ def normalize_string(string): - lower case; - passed through locale.strxfrm(). """ - return locale.strxfrm(encoding.unidecode(string.lower().strip())) + return locale.strxfrm(unidecode.unidecode(string.lower().strip())) def normalize_field(field): """Return a normalized field, it being a string or a list of strings.""" - if isinstance(field, basestring): + if isinstance(field, str): return normalize_string(field) elif isinstance(field, list) or isinstance(field, tuple): return [normalize_field(string) for string in field] @@ -63,7 +63,7 @@ def key_generator(sort): files.relpath(song.fullpath), ) ) - field = u"" + field = "" songkey.append(normalize_field(field)) return songkey return ordered_song_keys diff --git a/patacrep/content/tex.py b/patacrep/content/tex.py index 5f80fcfc..38593f38 100755 --- a/patacrep/content/tex.py +++ b/patacrep/content/tex.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Include LaTeX raw code in the songbook.""" @@ -18,7 +18,7 @@ class LaTeX(Content): self.filename = filename def render(self, context): - return ur'\input{{{}}}'.format(files.relpath( + return r'\input{{{}}}'.format(files.relpath( self.filename, os.path.dirname(context['filename']), )) diff --git a/patacrep/encoding.py b/patacrep/encoding.py index 8ba7de61..ca917295 100644 --- a/patacrep/encoding.py +++ b/patacrep/encoding.py @@ -5,7 +5,6 @@ import codecs import chardet import logging -from unidecode import unidecode as unidecode_orig LOGGER = logging.getLogger(__name__) @@ -17,32 +16,6 @@ def open_read(filename, mode='r'): return codecs.open( filename, mode=mode, - encoding=chardet.detect(open(filename, "r").read())['encoding'], + encoding=chardet.detect(open(filename, 'rb').read())['encoding'], errors='replace', ) - -def basestring2unicode(arg): - """Return the unicode version of the argument, guessing original encoding. - """ - if isinstance(arg, unicode): - return arg - elif isinstance(arg, basestring): - return arg.decode( - encoding=chardet.detect(arg)['encoding'], - errors='replace', - ) - else: - LOGGER.warning("Cannot decode string {}. Ignored.".format(str(arg))) - return "" - -def list2unicode(arg): - """Return the unicode version of the argument, guessing original encoding. - - Argument is a list of strings. If an item is of another type, it is - silently ignored (an empty string is returned). - """ - return [basestring2unicode(item) for item in arg] - -def unidecode(arg): - """Return a unicode version of a unidecoded string.""" - return unicode(unidecode_orig(arg)) diff --git a/patacrep/errors.py b/patacrep/errors.py index ff3d210d..a065c322 100644 --- a/patacrep/errors.py +++ b/patacrep/errors.py @@ -17,10 +17,7 @@ class SBFileError(SongbookError): self.message = message def __str__(self): - if self.message is None: - return str(self.original) - else: - return self.message + return self.message class TemplateError(SongbookError): """Error during template generation""" diff --git a/patacrep/files.py b/patacrep/files.py index 48928a00..6e02d481 100644 --- a/patacrep/files.py +++ b/patacrep/files.py @@ -16,7 +16,7 @@ def recursive_find(root_directory, pattern): matches = [] with chdir(root_directory): - for root, _, filenames in os.walk(os.curdir): + for root, __ignored, filenames in os.walk(os.curdir): for filename in fnmatch.filter(filenames, pattern): matches.append(os.path.join(root, filename)) return matches diff --git a/patacrep/index.py b/patacrep/index.py index c1b74806..ac927569 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -8,17 +8,18 @@ from a file generated by the latex compilation of the songbook (.sxd). """ import locale +import unidecode import re from patacrep import authors from patacrep import encoding from patacrep.latex import latex2unicode -EOL = u"\n" +EOL = "\n" # Pattern set to ignore latex command in title prefix -KEYWORD_PATTERN = re.compile(ur"^%(\w+)\s?(.*)$", re.LOCALE) -FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) +KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$", re.LOCALE) +FIRST_LETTER_PATTERN = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) def process_sxd(filename): @@ -77,13 +78,13 @@ class Index(object): except AttributeError: # classify as number all the non letter characters letter = "0" - if re.match(ur'\d', letter): + if re.match(r'\d', letter): letter = '0-9' return letter.upper() def add_keyword(self, key, word): """Add 'word' to self.keywords[key].""" - if not key in self.keywords.keys(): + if not key in self.keywords: self.keywords[key] = [] self.keywords[key].append(word) @@ -93,7 +94,7 @@ class Index(object): if 'prefix' in self.keywords: for prefix in self.keywords['prefix']: self.prefix_patterns.append(re.compile( - ur"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix), + r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix), re.LOCALE )) @@ -107,12 +108,12 @@ class Index(object): similar method with processing. """ first = self.get_first_letter(key[0]) - if not first in self.data.keys(): + if not first in self.data: self.data[first] = dict() - if not key in self.data[first].keys(): + if not key in self.data[first]: self.data[first][key] = { 'sortingkey': [ - encoding.unidecode(latex2unicode(item)).lower() + unidecode.unidecode(latex2unicode(item)).lower() for item in key ], 'entries': [], @@ -150,26 +151,26 @@ class Index(object): @staticmethod def ref_to_str(ref): """Return the LaTeX code corresponding to the reference.""" - return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) + return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) def key_to_str(self, key): """Convert the key (title or author) to the LaTeX command rendering it. """ if self.indextype == "AUTHOR": - return ur"\indexauthor{{{first}}}{{{last}}}".format( + return r"\indexauthor{{{first}}}{{{last}}}".format( first=key[1], last=key[0], ) if self.indextype == "TITLE": - return ur"\indextitle{{{0[1]}}}{{{0[0]}}}".format(key) + return r"\indextitle{{{0[1]}}}{{{0[0]}}}".format(key) def entry_to_str(self, key, entry): """Return the LaTeX code corresponding to the entry.""" - return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format( + return (r'\idxentry{{{0}}}{{{1}}}' + EOL).format( self.key_to_str(key), - ur'\\'.join([self.ref_to_str(ref) for ref in entry]), + r'\\'.join([self.ref_to_str(ref) for ref in entry]), ) def idxblock_to_str(self, letter, entries): @@ -185,10 +186,10 @@ class Index(object): for item in entries[key]['sortingkey'] ] - string = ur'\begin{idxblock}{' + letter + '}' + EOL + string = r'\begin{idxblock}{' + letter + '}' + EOL for key in sorted(entries, key=sortkey): string += self.entry_to_str(key, entries[key]['entries']) - string += ur'\end{idxblock}' + EOL + string += r'\end{idxblock}' + EOL return string def entries_to_str(self): diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index f284c094..1de7d1b9 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -4,7 +4,7 @@ def latex2unicode(string): """Convert LaTeX string to unicode""" - return u"TODO" + return "TODO" def parsetex(path): """Return a dictonary of data read from the latex file `path`. diff --git a/patacrep/songs.py b/patacrep/songs.py index 1cb69e8c..bf3ff3a6 100644 --- a/patacrep/songs.py +++ b/patacrep/songs.py @@ -6,13 +6,9 @@ import errno import hashlib import logging import os +import pickle import re -try: - import cPickle as pickle -except ImportError: - import pickle - from patacrep.authors import processauthors from patacrep.latex import parsetex @@ -128,7 +124,7 @@ class Song(object): self.args = data['args'] self.subpath = subpath self.languages = data['languages'] - if "by" in self.args.keys(): + if "by" in self.args: self.authors = processauthors( self.args["by"], **config["_compiled_authwords"] @@ -144,14 +140,7 @@ class Song(object): if self.datadir: cached = {} for attribute in self.cached_attributes: - if attribute == "args": - cached[attribute] = dict([ - (key, u"{}".format(value)) # Force conversion to unicode - for (key, value) - in self.args.iteritems() - ]) - else: - cached[attribute] = getattr(self, attribute) + cached[attribute] = getattr(self, attribute) pickle.dump( cached, open(cached_name(self.datadir, self.subpath), 'wb'), @@ -165,7 +154,7 @@ def unprefixed_title(title, prefixes): """Remove the first prefix of the list in the beginning of title (if any). """ for prefix in prefixes: - match = re.compile(ur"^(%s)\b\s*(.*)$" % prefix, re.LOCALE).match(title) + match = re.compile(r"^(%s)\b\s*(.*)$" % prefix, re.LOCALE).match(title) if match: return match.group(2) return title diff --git a/patacrep/templates.py b/patacrep/templates.py index ec8ad99c..d1e9a616 100644 --- a/patacrep/templates.py +++ b/patacrep/templates.py @@ -12,15 +12,15 @@ import json from patacrep import encoding, errors, files _LATEX_SUBS = ( - (re.compile(ur'\\'), ur'\\textbackslash'), - (re.compile(ur'([{}_#%&$])'), ur'\\\1'), - (re.compile(ur'~'), ur'\~{}'), - (re.compile(ur'\^'), ur'\^{}'), - (re.compile(ur'"'), ur"''"), - (re.compile(ur'\.\.\.+'), ur'\\ldots'), + (re.compile(r'\\'), r'\\textbackslash'), + (re.compile(r'([{}_#%&$])'), r'\\\1'), + (re.compile(r'~'), r'\~{}'), + (re.compile(r'\^'), r'\^{}'), + (re.compile(r'"'), r"''"), + (re.compile(r'\.\.\.+'), r'\\ldots'), ) -_VARIABLE_REGEXP = re.compile(ur""" +_VARIABLE_REGEXP = re.compile(r""" \(\*\ *variables\ *\*\) # Match (* variables *) ( # Match and capture the following: (?: # Start of non-capturing group, used to match a single character @@ -48,7 +48,7 @@ class VariablesExtension(Extension): tags = set(['variables']) def parse(self, parser): - parser.stream.next() + next(parser.stream) parser.parse_statements( end_tokens=['name:endvariables'], drop_needle=True, diff --git a/readme.md b/readme.md index a4ae03a4..ed4b1860 100644 --- a/readme.md +++ b/readme.md @@ -12,7 +12,7 @@ is precised in the header. # Python version -Patacrep is compatible with Python 2.7 (no Python3 yet). +Patacrep is compatible with Python 3. # Download @@ -26,7 +26,7 @@ Clone Patacrep repos: Make sure you have [pip](https://pip.pypa.io/en/latest/) installed, and then run > pip install -r Requirements.txt -> python setup.py install +> python3 setup.py install # Run @@ -40,10 +40,10 @@ Look for existing songbook files in `/books/`. For example: # Quick and dirty deb packages -Install `python-stdeb`, then: +Install `python3-stdeb`, then: -> python setup.py --command-packages=stdeb.command bdist_deb -> sudo dpkg -i deb_dist/python-patacrep_-1_all.deb +> python3 setup.py --command-packages=stdeb.command bdist_deb +> sudo dpkg -i deb_dist/python3-patacrep_-1_all.deb # Documentation diff --git a/setup.py b/setup.py index 6936bd35..0f616b30 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """Installation script for songbook. diff --git a/songbook b/songbook index bcf015ee..1bd48f92 100755 --- a/songbook +++ b/songbook @@ -1,4 +1,4 @@ -#! /usr/bin/env python2 +#! /usr/bin/env python3 # -*- coding: utf-8 -*- """Command line tool to compile songbooks using the songbook library.""" @@ -92,7 +92,7 @@ def main(): locale.setlocale(locale.LC_ALL, '') except locale.Error as error: # Locale is not installed on user's system, or wrongly configured. - sys.stderr.write("Locale error: {}\n".format(error.message)) + sys.stderr.write("Locale error: {}\n".format(str(error))) options = argument_parser(sys.argv[1:]) @@ -119,7 +119,7 @@ def main(): datadirs += [item[0] for item in options.datadir] if 'datadir' in songbook: # .sg file - if isinstance(songbook['datadir'], basestring): + if isinstance(songbook['datadir'], str): songbook['datadir'] = [songbook['datadir']] datadirs += [ os.path.join( diff --git a/stdeb.cfg b/stdeb.cfg index d654b578..ed8a5bed 100644 --- a/stdeb.cfg +++ b/stdeb.cfg @@ -1,6 +1,6 @@ [DEFAULT] -Depends: python-jinja2, python-pkg-resources, python-chardet, python-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended +Depends: python3-jinja2, python3-pkg-resources, python3-chardet, python3-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended Recommends: texlive-lang-english, texlive-lang-french, texlive-lang-portuguese, texlive-lang-spanish, texlive-fonts-extra -XS-Python-Version: >=2.7 +X-Python3-Version: Section: tex From 6c98a09216825d45a22ac16fdbcf70ab368b474f Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 27 Sep 2014 10:39:45 +0200 Subject: [PATCH 03/26] Temporary workaround to make babel work --- patacrep/latex/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index 1de7d1b9..08d1805f 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -18,6 +18,6 @@ def parsetex(path): return { 'titles': ["TODO"], 'args': {}, - 'languages': ['french'], + 'languages': ['french', 'english', 'portuguese', 'spanish'], } From baedd3b2366868dbf20e59d9290ee2f755d9740b Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 30 Sep 2014 23:41:33 +0200 Subject: [PATCH 04/26] [WIP] LaTeX files are parsed again --- patacrep/build.py | 17 ++-- patacrep/content/sorted.py | 2 +- patacrep/index.py | 4 +- patacrep/latex/__init__.py | 27 +++--- patacrep/latex/ast.py | 49 ++++++++++ patacrep/latex/detex.py | 110 +++++++++++++++++++++ patacrep/latex/lexer.py | 124 ++++++++++++++++++++++++ patacrep/latex/parsetab.py | 72 ++++++++++++++ patacrep/latex/syntax.py | 194 +++++++++++++++++++++++++++++++++++++ patacrep/latex/testing.py | 50 ++++++++++ patacrep/songs.py | 17 ++-- 11 files changed, 632 insertions(+), 34 deletions(-) create mode 100644 patacrep/latex/ast.py create mode 100644 patacrep/latex/detex.py create mode 100644 patacrep/latex/lexer.py create mode 100644 patacrep/latex/parsetab.py create mode 100644 patacrep/latex/syntax.py create mode 100644 patacrep/latex/testing.py diff --git a/patacrep/build.py b/patacrep/build.py index 9f79bb6a..212305ca 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -50,7 +50,6 @@ class Songbook(object): super(Songbook, self).__init__() self.config = raw_songbook self.basename = basename - self.contentlist = [] # Some special keys have their value processed. self._set_datadir() @@ -86,7 +85,7 @@ class Songbook(object): - output: a file object, in which the file will be written. """ # Updating configuration - config = DEFAULT_CONFIG + config = DEFAULT_CONFIG.copy() config.update(self.config) renderer = TexRenderer( config['template'], @@ -100,18 +99,16 @@ class Songbook(object): copy.deepcopy(config['authwords']) ) - self.config = config # Configuration set - self.contentlist = content.process_content( - self.config.get('content', []), - self.config, + config['render_content'] = content.render_content + config['content'] = content.process_content( + config.get('content', []), + config, ) - self.config['render_content'] = content.render_content - self.config['content'] = self.contentlist - self.config['filename'] = output.name[:-4] + config['filename'] = output.name[:-4] - renderer.render_tex(output, self.config) + renderer.render_tex(output, config) class SongbookBuilder(object): diff --git a/patacrep/content/sorted.py b/patacrep/content/sorted.py index 651bd07e..f95065d6 100755 --- a/patacrep/content/sorted.py +++ b/patacrep/content/sorted.py @@ -55,7 +55,7 @@ def key_generator(sort): field = song.authors else: try: - field = song.args[key] + field = song.data[key] except KeyError: LOGGER.debug( "Ignoring unknown key '{}' for song {}.".format( diff --git a/patacrep/index.py b/patacrep/index.py index ac927569..c715918a 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -13,7 +13,7 @@ import re from patacrep import authors from patacrep import encoding -from patacrep.latex import latex2unicode +from patacrep.latex import tex2plain EOL = "\n" @@ -113,7 +113,7 @@ class Index(object): if not key in self.data[first]: self.data[first][key] = { 'sortingkey': [ - unidecode.unidecode(latex2unicode(item)).lower() + unidecode.unidecode(tex2plain(item)).lower() for item in key ], 'entries': [], diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index 08d1805f..b0826e6e 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -1,23 +1,26 @@ # -*- coding: utf-8 -*- +from patacrep.latex.syntax import tex2plain as syntax_tex2plain +from patacrep.latex.syntax import parsesong as syntax_parsesong +from patacrep.latex.detex import detex +from patacrep import encoding + """Very simple LaTeX parser""" -def latex2unicode(string): - """Convert LaTeX string to unicode""" - return "TODO" +def tex2plain(string): + """Render LaTeX string + + Very few commands (mostly diacritics) are interpreted. + """ + return syntax_tex2plain(string) -def parsetex(path): +def parsesong(path): """Return a dictonary of data read from the latex file `path`. This file is a drop in replacement for an old function. Elle ne devrait pas apparaitre telle quelle dans la version finale, une fois que https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. - - TODO """ - return { - 'titles': ["TODO"], - 'args': {}, - 'languages': ['french', 'english', 'portuguese', 'spanish'], - } - + data = syntax_parsesong(encoding.open_read(path).read(), path) + data['@path'] = path + return data diff --git a/patacrep/latex/ast.py b/patacrep/latex/ast.py new file mode 100644 index 00000000..1f181172 --- /dev/null +++ b/patacrep/latex/ast.py @@ -0,0 +1,49 @@ +class AST: + + metadata = None + + @classmethod + def init_metadata(cls): + cls.metadata = { + '@languages': set(), + } + +class Expression(AST): + + def __init__(self, value): + super().__init__() + self.content = [value] + + def prepend(self, value): + if value is not None: + self.content.insert(0, value) + return self + + def __str__(self): + return "".join([str(item) for item in self.content]) + +class Command(AST): + + def __init__(self, name, optional, mandatory): + self.name = name + self.mandatory = mandatory + self.optional = optional + + if name == r'\selectlanguage': + self.metadata['@languages'] |= set(self.mandatory) + + def __str__(self): + if self.name in [r'\emph']: + return str(self.mandatory[0]) + return "{}{}{}".format( + self.name, + "".join(["[{}]".format(item) for item in self.optional]), + "".join(["{{{}}}".format(item) for item in self.mandatory]), + ) + + +class BeginSong(AST): + + def __init__(self, titles, arguments): + self.titles = titles + self.arguments = arguments diff --git a/patacrep/latex/detex.py b/patacrep/latex/detex.py new file mode 100644 index 00000000..cb9f277f --- /dev/null +++ b/patacrep/latex/detex.py @@ -0,0 +1,110 @@ +MATCH = [ + # Diacritics: a + (r"\'a", "á"), + (r"\'A", "Á"), + (r"\`a", "à"), + (r"\`A", "À"), + (r"\^a", "â"), + (r"\^A", "Â"), + (r"\"a", "ä"), + (r"\"A", "Ä"), + + # Diacritics: e + (r"\'e", "é"), + (r"\'E", "É"), + (r"\`e", "è"), + (r"\`E", "È"), + (r"\^e", "ê"), + (r"\^E", "Ê"), + (r"\"e", "ë"), + (r"\"E", "Ë"), + + # Diacritics: i + (r"\'i", "í"), + (r"\'I", "Í"), + (r"\`i", "ì"), + (r"\`I", "Ì"), + (r"\^i", "î"), + (r"\^I", "Î"), + (r"\"i", "ï"), + (r"\"I", "Ï"), + (r"\'\i", "í"), + (r"\'\I", "Í"), + (r"\`\i", "ì"), + (r"\`\I", "Ì"), + (r"\^\i", "î"), + (r"\^\I", "Î"), + (r"\"\i", "ï"), + (r"\"\I", "Ï"), + + # Diacritics: o + (r"\'o", "ó"), + (r"\'O", "Ó"), + (r"\`o", "ò"), + (r"\`O", "Ò"), + (r"\^o", "ô"), + (r"\^O", "Ô"), + (r"\"o", "ö"), + (r"\"O", "Ö"), + + # Diacritics: u + (r"\'u", "ú"), + (r"\'U", "Ú"), + (r"\`u", "ù"), + (r"\`U", "Ù"), + (r"\^u", "û"), + (r"\^U", "Û"), + (r"\"u", "ü"), + (r"\"U", "Ü"), + + # Cedille + (r"\c c", "ç"), + (r"\c C", "Ç"), + + # œ, æ + (r"\oe", "œ"), + (r"\OE", "Œ"), + (r"\ae", "æ"), + (r"\AE", "Æ"), + + # Spaces + (r"\ ", " "), + (r"\,", " "), + (r"\~", " "), + + # IeC + (r"\IeC ", ""), + + # Miscallenous + (r"\dots", "…"), + (r"\%", "%"), + (r"\&", "&"), + (r"\_", "_"), + + ] + + +def detex(arg): + if isinstance(arg, dict): + return dict([ + (key, detex(value)) + for (key, value) + in arg.items() + ]) + elif isinstance(arg, list): + return [ + detex(item) + for item + in arg + ] + elif isinstance(arg, set): + return set(detex(list(arg))) + elif isinstance(arg, str): + string = arg + for (latex, plain) in MATCH: + string = string.replace(latex, plain) + if '\\' in string: + print("WARNING: Remaining command in string '{}'.".format(string)) + return string.strip() + else: + return detex(str(arg)) diff --git a/patacrep/latex/lexer.py b/patacrep/latex/lexer.py new file mode 100644 index 00000000..199d6f9c --- /dev/null +++ b/patacrep/latex/lexer.py @@ -0,0 +1,124 @@ +import ply.lex as lex + +tokens = ( + 'LBRACKET', + 'RBRACKET', + 'LBRACE', + 'RBRACE', + 'COMMAND', + 'NEWLINE', + 'COMMA', + 'EQUAL', + 'CHARACTER', + 'SPACE', + 'BEGINSONG', + 'SONG_LTITLE', + 'SONG_RTITLE', + 'SONG_LOPTIONS', + 'SONG_ROPTIONS', +) + +class SimpleLexer: + + tokens = tokens + + # Regular expression rules for simple tokens + t_LBRACKET = r'\[' + t_RBRACKET = r'\]' + t_LBRACE = r'{' + t_RBRACE = r'}' + t_COMMAND = r'\\([@a-zA-Z]+|[^\\])' + t_NEWLINE = r'\\\\' + SPECIAL_CHARACTERS = ( + t_LBRACKET + + t_RBRACKET + + t_RBRACE + + t_LBRACE + + r"\\" + + r" " + + r"\n" + + r"\r" + + r"%" + + r"=" + + r"," + ) + t_CHARACTER = r'[^{}]'.format(SPECIAL_CHARACTERS) + t_EQUAL = r'=' + t_COMMA = r',' + + t_SPACE = r'[ \t\n\r]+' + + def __init__(self): + self.__class__.lexer = lex.lex(module = self) + + # Define a rule so we can track line numbers + def t_newline(self, t): + r'\n+' + t.lexer.lineno += len(t.value) + + def t_comment(self, t): + r'%.*' + pass + + # Error handling rule + def t_error(self, t): + print("Illegal character '%s'" % t.value[0]) # TODO log + t.lexer.skip(1) + +class SongLexer(SimpleLexer): + + states = ( + ('beginsong', 'inclusive'), + ) + + # State beginsong + def t_INITIAL_BEGINSONG(self, t): + r'\\beginsong' + t.lexer.push_state('beginsong') + t.lexer.open_brackets = 0 + t.lexer.open_braces = 0 + return t + + def t_beginsong_LBRACKET(self, t): + r'\[' + if t.lexer.open_brackets == 0: + t.type = 'SONG_LOPTIONS' + t.lexer.open_braces += 1 # TODO Explain + t.lexer.open_brackets += 1 + return t + + def t_beginsong_RBRACKET(self, t): + r'\]' + t.lexer.open_brackets -= 1 + if t.lexer.open_brackets == 0: + t.type = 'SONG_ROPTIONS' + t.lexer.open_braces -= 1 # TODO Explain + t.lexer.pop_state() + for __ignored in t.lexer: # TODO Explain + pass + return t + + def t_beginsong_LBRACE(self, t): + r'{' + if t.lexer.open_braces == 0: + t.type = 'SONG_LTITLE' + t.lexer.open_braces += 1 + return t + + def t_beginsong_RBRACE1(self, t): + r'}(?![ \t\r\n]*\[)' + t.lexer.open_braces -= 1 + t.type = 'RBRACE' + if t.lexer.open_braces == 0: + t.lexer.pop_state() + t.type = 'SONG_RTITLE' + return t + + def t_beginsong_RBRACE2(self, t): + r'}(?=[ \t\r\n]*\[)' + t.lexer.open_braces -= 1 + t.type = 'RBRACE' + if t.lexer.open_braces == 0: + t.type = 'SONG_RTITLE' + return t + diff --git a/patacrep/latex/parsetab.py b/patacrep/latex/parsetab.py new file mode 100644 index 00000000..b98cd59e --- /dev/null +++ b/patacrep/latex/parsetab.py @@ -0,0 +1,72 @@ + +# parsetab.py +# This file is automatically generated. Do not edit. +_tabversion = '3.2' + +_lr_method = 'LALR' + +_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10' + +_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),} + +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = { } + _lr_action[_x][_k] = _y +del _lr_action_items + +_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),} + +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_goto: _lr_goto[_x] = { } + _lr_goto[_x][_k] = _y +del _lr_goto_items +_lr_productions = [ + ("S' -> expression","S'",1,None,None,None), + ('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8), + ('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9), + ('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10), + ('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11), + ('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12), + ('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13), + ('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14), + ('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15), + ('empty -> ','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26), + ('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30), + ('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34), + ('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38), + ('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42), + ('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43), + ('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52), + ('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53), + ('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62), + ('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63), + ('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64), + ('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69), + ('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70), + ('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78), + ('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82), + ('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83), + ('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91), + ('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92), + ('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100), + ('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101), + ('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111), + ('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112), + ('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120), + ('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121), + ('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126), + ('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127), + ('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135), + ('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139), + ('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140), + ('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148), + ('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149), + ('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150), + ('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151), + ('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152), + ('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153), +] diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py new file mode 100644 index 00000000..f161b19c --- /dev/null +++ b/patacrep/latex/syntax.py @@ -0,0 +1,194 @@ +import ply.yacc as yacc +import inspect # TODO supprimer + +from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer +from patacrep.latex import ast +from patacrep.latex.detex import detex + +class Parser: + + def __init__(self, filename=None): + self.tokens = tokens + self.ast = ast.AST + self.ast.init_metadata() + self.filename = filename + + def __find_column(self, token): + last_cr = token.lexer.lexdata.rfind('\n',0,token.lexpos) + if last_cr < 0: + last_cr = 0 + column = (token.lexpos - last_cr) + 1 + return column + + def p_error(self, p): + print("Erreur fichier {}, ligne {}, position {}.".format( # TODO + str(self.filename), + p.lineno, + self.__find_column(p), + ) + ) + + def p_expression(self, p): + """expression : brackets expression + | braces expression + | command expression + | NEWLINE expression + | beginsong expression + | word expression + | SPACE expression + | empty + """ + if len(p) == 3: + if p[2] is None: + p[0] = ast.Expression(p[1]) + else: + p[0] = p[2].prepend(p[1]) + else: + p[0] = None + + def p_empty(self, p): + """empty :""" + return None + + def p_brackets(self, p): + """brackets : LBRACKET expression RBRACKET""" + p[0] = p[2] + + def p_braces(self, p): + """braces : LBRACE expression RBRACE""" + p[0] = p[2] + + def p_command(self, p): + """command : COMMAND brackets_list braces_list""" + p[0] = ast.Command(p[1], p[2], p[3]) + + def p_brackets_list(self, p): + """brackets_list : brackets brackets_list + | empty + """ + if len(p) == 3: + p[0] = p[2] + p[0].insert(0, p[1]) + else: + p[0] = [] + + def p_braces_list(self, p): + """braces_list : braces braces_list + | empty + """ + if len(p) == 3: + p[0] = p[2] + p[0].insert(0, p[1]) + else: + p[0] = [] + + def p_word(self, p): + """word : CHARACTER word_next + | COMMA word_next + | EQUAL word_next + """ + p[0] = p[1] + p[2] + + def p_word_next(self, p): + """word_next : CHARACTER word_next + | empty + """ + if len(p) == 2: + p[0] = "" + else: + p[0] = p[1] + p[2] + + def p_beginsong(self, p): + """beginsong : BEGINSONG separator songbraces separator songbrackets""" + self.ast.metadata["@titles"] = p[3] + self.ast.metadata.update(p[5]) + + def p_songbrackets(self, p): + """songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS + | empty + """ + if len(p) == 6: + p[0] = p[3] + else: + p[0] = {} + + def p_songbraces(self, p): + """songbraces : SONG_LTITLE separator titles separator SONG_RTITLE + | empty + """ + if len(p) == 6: + p[0] = p[3] + else: + p[0] = [] + + def p_dictionary(self, p): + """dictionary : identifier EQUAL braces dictionary_next + | identifier EQUAL error dictionary_next + """ + if isinstance(p[3], ast.Expression): + p[0] = {} + p[0][p[1]] = p[3] + p[0].update(p[4]) + else: + raise Exception("Do enclose arguments between braces.") # TODO + + def p_identifier(self, p): + """identifier : CHARACTER identifier + | empty + """ + if len(p) == 2: + p[0] = "" + else: + p[0] = p[1] + p[2] + + def p_separator(self, p): + """separator : SPACE + | empty + """ + p[0] = None + + def p_dictonary_next(self, p): + """dictionary_next : separator COMMA separator dictionary + | empty + """ + if len(p) == 5: + p[0] = p[4] + else: + p[0] = {} + + def p_titles(self, p): + """titles : title titles_next""" + p[0] = [p[1]] + p[2] + + def p_titles_next(self, p): + """titles_next : NEWLINE title titles_next + | empty + """ + if len(p) == 2: + p[0] = [] + else: + p[0] = [p[2]] + p[3] + + def p_title(self, p): + """title : brackets title + | braces title + | command title + | word title + | SPACE title + | empty + """ + if len(p) == 2: + p[0] = None + else: + if p[2] is None: + p[0] = ast.Expression(p[1]) + else: + p[0] = p[2].prepend(p[1]) + + +def tex2plain(string): + return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer)) + +def parsesong(string, filename=None): + return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata) + diff --git a/patacrep/latex/testing.py b/patacrep/latex/testing.py new file mode 100644 index 00000000..f2d66510 --- /dev/null +++ b/patacrep/latex/testing.py @@ -0,0 +1,50 @@ + +# Test it out +song = r""" +\selectlanguage{french} +plop = tag +% Un commentaire +\columns{3} +\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre +%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre +%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre} + + Dans [Dm6]cette ruedots [E7] +""" + +isong = r""" +\selectlanguage{french} +\songcolumns{2} +\beginsong{Tous les bateaux, tous les oiseaux} + [by={Michel Polnareff},cov={passe-present},album={Passé Présent}] + + Dans \[Dm6]cette ruedots [E7] +""" + +tex = "D\\^iacritiqu\\'Es" + +# Give the lexer some input +#if 0: +# from syntax import parser +# print(parser.parse(data, debug=0)) +# print(parser.parse(data).song_data()) +#else: +# from lexer import SimpleLexer +# lexer.input(data) +# for tok in lexer: +# print(tok) + +from patacrep.latex import tex2plain +from patacrep.latex.syntax import parsesong +from patacrep.latex.ast import AST + +print(tex2plain(tex) == "DîacritiquÉs") +print(parsesong(song, AST)) +print({ + "@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"], + "@languages": set(["french"]), + "@path": "TODO", + "album": "Tagada tsoin ïtsoin", + "cov": "pouf.png", + } + ) diff --git a/patacrep/songs.py b/patacrep/songs.py index bf3ff3a6..43e70fe7 100644 --- a/patacrep/songs.py +++ b/patacrep/songs.py @@ -10,7 +10,7 @@ import pickle import re from patacrep.authors import processauthors -from patacrep.latex import parsetex +from patacrep.latex import parsesong LOGGER = logging.getLogger(__name__) @@ -74,7 +74,7 @@ class Song(object): cached_attributes = [ "titles", "unprefixed_titles", - "args", + "data", "datadir", "fullpath", "subpath", @@ -110,8 +110,9 @@ class Song(object): )) # Data extraction from the latex song - data = parsetex(self.fullpath) - self.titles = data['titles'] + self.data = parsesong(self.fullpath) + self.titles = self.data['@titles'] + self.languages = self.data['@languages'] self.datadir = datadir self.unprefixed_titles = [ unprefixed_title( @@ -121,12 +122,10 @@ class Song(object): for title in self.titles ] - self.args = data['args'] self.subpath = subpath - self.languages = data['languages'] - if "by" in self.args: + if "by" in self.data: self.authors = processauthors( - self.args["by"], + self.data["by"], **config["_compiled_authwords"] ) else: @@ -148,7 +147,7 @@ class Song(object): ) def __repr__(self): - return repr((self.titles, self.args, self.fullpath)) + return repr((self.titles, self.data, self.fullpath)) def unprefixed_title(title, prefixes): """Remove the first prefix of the list in the beginning of title (if any). From d685da1244a64cbac45a7b53fce46ffe8d682a4b Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2014 10:33:42 +0200 Subject: [PATCH 05/26] Cleaning LaTeX parser --- patacrep/latex/__init__.py | 12 ++- patacrep/latex/ast.py | 15 +++ patacrep/latex/detex.py | 14 ++- patacrep/latex/lexer.py | 122 +++++++++++++--------- patacrep/latex/parsetab.py | 72 ------------- patacrep/latex/syntax.py | 204 +++++++++++++++++++++++-------------- patacrep/latex/testing.py | 50 --------- 7 files changed, 236 insertions(+), 253 deletions(-) delete mode 100644 patacrep/latex/parsetab.py delete mode 100644 patacrep/latex/testing.py diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index b0826e6e..46ac9d10 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -1,12 +1,17 @@ # -*- coding: utf-8 -*- +"""Very simple LaTeX parser + +This module uses an LALR parser to try to parse LaTeX code. LaTeX language +*cannot* be parsed by an LALR parser, so this is a very simple attemps, which +will work on simple cases, but not on complex ones. +""" + from patacrep.latex.syntax import tex2plain as syntax_tex2plain from patacrep.latex.syntax import parsesong as syntax_parsesong from patacrep.latex.detex import detex from patacrep import encoding -"""Very simple LaTeX parser""" - def tex2plain(string): """Render LaTeX string @@ -17,9 +22,6 @@ def tex2plain(string): def parsesong(path): """Return a dictonary of data read from the latex file `path`. - This file is a drop in replacement for an old function. Elle ne devrait pas - apparaitre telle quelle dans la version finale, une fois que - https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. """ data = syntax_parsesong(encoding.open_read(path).read(), path) data['@path'] = path diff --git a/patacrep/latex/ast.py b/patacrep/latex/ast.py index 1f181172..798d4c33 100644 --- a/patacrep/latex/ast.py +++ b/patacrep/latex/ast.py @@ -1,20 +1,33 @@ +"""Abstract Syntax Tree for LaTeX code.""" + +# pylint: disable=too-few-public-methods + class AST: + """Base class for the tree.""" + # pylint: disable=no-init metadata = None @classmethod def init_metadata(cls): + """Clear metadata + + As this attribute is a class attribute, it as to be reset at each new + parsing. + """ cls.metadata = { '@languages': set(), } class Expression(AST): + """LaTeX expression""" def __init__(self, value): super().__init__() self.content = [value] def prepend(self, value): + """Add a value at the beginning of the content list.""" if value is not None: self.content.insert(0, value) return self @@ -23,6 +36,7 @@ class Expression(AST): return "".join([str(item) for item in self.content]) class Command(AST): + """LaTeX command""" def __init__(self, name, optional, mandatory): self.name = name @@ -43,6 +57,7 @@ class Command(AST): class BeginSong(AST): + """Beginsong command""" def __init__(self, titles, arguments): self.titles = titles diff --git a/patacrep/latex/detex.py b/patacrep/latex/detex.py index cb9f277f..ebfd721b 100644 --- a/patacrep/latex/detex.py +++ b/patacrep/latex/detex.py @@ -1,3 +1,9 @@ +"""Render `very simple` TeX commands in a simple TeX code.""" + +import logging + +LOGGER = logging.getLogger() + MATCH = [ # Diacritics: a (r"\'a", "á"), @@ -85,6 +91,12 @@ MATCH = [ def detex(arg): + """Render very simple TeX commands from argument. + + Argument can be: + - a string: it is processed; + - a list, dict or set: its values are processed. + """ if isinstance(arg, dict): return dict([ (key, detex(value)) @@ -104,7 +116,7 @@ def detex(arg): for (latex, plain) in MATCH: string = string.replace(latex, plain) if '\\' in string: - print("WARNING: Remaining command in string '{}'.".format(string)) + LOGGER.warning("Remaining command in string '{}'.".format(string)) return string.strip() else: return detex(str(arg)) diff --git a/patacrep/latex/lexer.py b/patacrep/latex/lexer.py index 199d6f9c..8954e14c 100644 --- a/patacrep/latex/lexer.py +++ b/patacrep/latex/lexer.py @@ -1,5 +1,11 @@ +"""Very simple LaTeX lexer.""" + +import logging import ply.lex as lex +LOGGER = logging.getLogger() + +#pylint: disable=invalid-name tokens = ( 'LBRACKET', 'RBRACKET', @@ -19,10 +25,10 @@ tokens = ( ) class SimpleLexer: + """Very simple LaTeX lexer.""" tokens = tokens - # Regular expression rules for simple tokens t_LBRACKET = r'\[' t_RBRACKET = r'\]' t_LBRACE = r'{' @@ -49,76 +55,96 @@ class SimpleLexer: t_SPACE = r'[ \t\n\r]+' def __init__(self): - self.__class__.lexer = lex.lex(module = self) + self.__class__.lexer = lex.lex(module=self) # Define a rule so we can track line numbers - def t_newline(self, t): + @staticmethod + def t_newline(token): r'\n+' - t.lexer.lineno += len(t.value) + token.lexer.lineno += len(token.value) - def t_comment(self, t): + @staticmethod + def t_comment(token): r'%.*' pass # Error handling rule - def t_error(self, t): - print("Illegal character '%s'" % t.value[0]) # TODO log - t.lexer.skip(1) + @staticmethod + def t_error(token): + """Manage errors""" + LOGGER.error("Illegal character '{}'".format(token.value[0])) + token.lexer.skip(1) class SongLexer(SimpleLexer): + r"""Very simple song lexer. + + In the context of this class, a "song" is some LaTeX code containing the + ``\beginsong`` (or ``\sortassong``) command. + """ states = ( ('beginsong', 'inclusive'), ) # State beginsong - def t_INITIAL_BEGINSONG(self, t): - r'\\beginsong' - t.lexer.push_state('beginsong') - t.lexer.open_brackets = 0 - t.lexer.open_braces = 0 - return t - - def t_beginsong_LBRACKET(self, t): + @staticmethod + def t_INITIAL_BEGINSONG(token): + r'(\\beginsong|\\sortassong)' + token.lexer.push_state('beginsong') + token.lexer.open_brackets = 0 + token.lexer.open_braces = 0 + return token + + @staticmethod + def t_beginsong_LBRACKET(token): r'\[' - if t.lexer.open_brackets == 0: - t.type = 'SONG_LOPTIONS' - t.lexer.open_braces += 1 # TODO Explain - t.lexer.open_brackets += 1 - return t + if token.lexer.open_brackets == 0: + token.type = 'SONG_LOPTIONS' + + # Count opening and closing braces to know when to leave the + # `beginsong` state. + token.lexer.open_braces += 1 + token.lexer.open_brackets += 1 + return token - def t_beginsong_RBRACKET(self, t): + @staticmethod + def t_beginsong_RBRACKET(token): r'\]' - t.lexer.open_brackets -= 1 - if t.lexer.open_brackets == 0: - t.type = 'SONG_ROPTIONS' - t.lexer.open_braces -= 1 # TODO Explain - t.lexer.pop_state() - for __ignored in t.lexer: # TODO Explain + token.lexer.open_brackets -= 1 + if token.lexer.open_brackets == 0: + token.type = 'SONG_ROPTIONS' + token.lexer.open_braces -= 1 + token.lexer.pop_state() + for __ignored in token.lexer: + # In this parser, we only want to read metadata. So, after the + # first ``\beginsong`` command, we can stop parsing. pass - return t + return token - def t_beginsong_LBRACE(self, t): + @staticmethod + def t_beginsong_LBRACE(token): r'{' - if t.lexer.open_braces == 0: - t.type = 'SONG_LTITLE' - t.lexer.open_braces += 1 - return t + if token.lexer.open_braces == 0: + token.type = 'SONG_LTITLE' + token.lexer.open_braces += 1 + return token - def t_beginsong_RBRACE1(self, t): + @staticmethod + def t_beginsong_RBRACE1(token): r'}(?![ \t\r\n]*\[)' - t.lexer.open_braces -= 1 - t.type = 'RBRACE' - if t.lexer.open_braces == 0: - t.lexer.pop_state() - t.type = 'SONG_RTITLE' - return t - - def t_beginsong_RBRACE2(self, t): + token.lexer.open_braces -= 1 + token.type = 'RBRACE' + if token.lexer.open_braces == 0: + token.lexer.pop_state() + token.type = 'SONG_RTITLE' + return token + + @staticmethod + def t_beginsong_RBRACE2(token): r'}(?=[ \t\r\n]*\[)' - t.lexer.open_braces -= 1 - t.type = 'RBRACE' - if t.lexer.open_braces == 0: - t.type = 'SONG_RTITLE' - return t + token.lexer.open_braces -= 1 + token.type = 'RBRACE' + if token.lexer.open_braces == 0: + token.type = 'SONG_RTITLE' + return token diff --git a/patacrep/latex/parsetab.py b/patacrep/latex/parsetab.py deleted file mode 100644 index b98cd59e..00000000 --- a/patacrep/latex/parsetab.py +++ /dev/null @@ -1,72 +0,0 @@ - -# parsetab.py -# This file is automatically generated. Do not edit. -_tabversion = '3.2' - -_lr_method = 'LALR' - -_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10' - -_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),} - -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } - _lr_action[_x][_k] = _y -del _lr_action_items - -_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),} - -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } - _lr_goto[_x][_k] = _y -del _lr_goto_items -_lr_productions = [ - ("S' -> expression","S'",1,None,None,None), - ('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8), - ('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9), - ('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10), - ('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11), - ('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12), - ('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13), - ('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14), - ('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15), - ('empty -> ','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26), - ('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30), - ('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34), - ('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38), - ('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42), - ('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43), - ('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52), - ('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53), - ('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62), - ('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63), - ('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64), - ('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69), - ('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70), - ('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78), - ('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82), - ('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83), - ('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91), - ('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92), - ('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100), - ('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101), - ('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111), - ('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112), - ('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120), - ('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121), - ('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126), - ('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127), - ('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135), - ('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139), - ('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140), - ('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148), - ('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149), - ('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150), - ('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151), - ('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152), - ('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153), -] diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py index f161b19c..abd71903 100644 --- a/patacrep/latex/syntax.py +++ b/patacrep/latex/syntax.py @@ -1,11 +1,28 @@ +"""Very simple LaTeX parser""" + +import logging import ply.yacc as yacc -import inspect # TODO supprimer from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer from patacrep.latex import ast +from patacrep.errors import SongbookError from patacrep.latex.detex import detex +LOGGER = logging.getLogger() + +class ParsingError(SongbookError): + """Parsing error.""" + + def __init__(self, message): + super().__init__(self) + self.message = message + + def __str__(self): + return self.message + +# pylint: disable=line-too-long class Parser: + """LaTeX parser.""" def __init__(self, filename=None): self.tokens = tokens @@ -13,22 +30,26 @@ class Parser: self.ast.init_metadata() self.filename = filename - def __find_column(self, token): - last_cr = token.lexer.lexdata.rfind('\n',0,token.lexpos) + @staticmethod + def __find_column(token): + """Return the column of ``token``.""" + last_cr = token.lexer.lexdata.rfind('\n', 0, token.lexpos) if last_cr < 0: last_cr = 0 column = (token.lexpos - last_cr) + 1 return column - def p_error(self, p): - print("Erreur fichier {}, ligne {}, position {}.".format( # TODO + def p_error(self, token): + """Manage parsing errors.""" + LOGGER.error("Erreur fichier {}, ligne {}, position {}.".format( str(self.filename), - p.lineno, - self.__find_column(p), + token.lineno, + self.__find_column(token), ) ) - def p_expression(self, p): + @staticmethod + def p_expression(symbols): """expression : brackets expression | braces expression | command expression @@ -38,138 +59,155 @@ class Parser: | SPACE expression | empty """ - if len(p) == 3: - if p[2] is None: - p[0] = ast.Expression(p[1]) + if len(symbols) == 3: + if symbols[2] is None: + symbols[0] = ast.Expression(symbols[1]) else: - p[0] = p[2].prepend(p[1]) + symbols[0] = symbols[2].prepend(symbols[1]) else: - p[0] = None + symbols[0] = None - def p_empty(self, p): + @staticmethod + def p_empty(__symbols): """empty :""" return None - def p_brackets(self, p): + @staticmethod + def p_brackets(symbols): """brackets : LBRACKET expression RBRACKET""" - p[0] = p[2] + symbols[0] = symbols[2] - def p_braces(self, p): + @staticmethod + def p_braces(symbols): """braces : LBRACE expression RBRACE""" - p[0] = p[2] + symbols[0] = symbols[2] - def p_command(self, p): + @staticmethod + def p_command(symbols): """command : COMMAND brackets_list braces_list""" - p[0] = ast.Command(p[1], p[2], p[3]) + symbols[0] = ast.Command(symbols[1], symbols[2], symbols[3]) - def p_brackets_list(self, p): + @staticmethod + def p_brackets_list(symbols): """brackets_list : brackets brackets_list | empty """ - if len(p) == 3: - p[0] = p[2] - p[0].insert(0, p[1]) + if len(symbols) == 3: + symbols[0] = symbols[2] + symbols[0].insert(0, symbols[1]) else: - p[0] = [] + symbols[0] = [] - def p_braces_list(self, p): + @staticmethod + def p_braces_list(symbols): """braces_list : braces braces_list | empty """ - if len(p) == 3: - p[0] = p[2] - p[0].insert(0, p[1]) + if len(symbols) == 3: + symbols[0] = symbols[2] + symbols[0].insert(0, symbols[1]) else: - p[0] = [] + symbols[0] = [] - def p_word(self, p): + @staticmethod + def p_word(symbols): """word : CHARACTER word_next | COMMA word_next | EQUAL word_next """ - p[0] = p[1] + p[2] + symbols[0] = symbols[1] + symbols[2] - def p_word_next(self, p): + @staticmethod + def p_word_next(symbols): """word_next : CHARACTER word_next | empty """ - if len(p) == 2: - p[0] = "" + if len(symbols) == 2: + symbols[0] = "" else: - p[0] = p[1] + p[2] + symbols[0] = symbols[1] + symbols[2] - def p_beginsong(self, p): + def p_beginsong(self, symbols): """beginsong : BEGINSONG separator songbraces separator songbrackets""" - self.ast.metadata["@titles"] = p[3] - self.ast.metadata.update(p[5]) + self.ast.metadata["@titles"] = symbols[3] + self.ast.metadata.update(symbols[5]) - def p_songbrackets(self, p): + @staticmethod + def p_songbrackets(symbols): """songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS | empty """ - if len(p) == 6: - p[0] = p[3] + if len(symbols) == 6: + symbols[0] = symbols[3] else: - p[0] = {} + symbols[0] = {} - def p_songbraces(self, p): + @staticmethod + def p_songbraces(symbols): """songbraces : SONG_LTITLE separator titles separator SONG_RTITLE | empty """ - if len(p) == 6: - p[0] = p[3] + if len(symbols) == 6: + symbols[0] = symbols[3] else: - p[0] = [] + symbols[0] = [] - def p_dictionary(self, p): + @staticmethod + def p_dictionary(symbols): """dictionary : identifier EQUAL braces dictionary_next | identifier EQUAL error dictionary_next """ - if isinstance(p[3], ast.Expression): - p[0] = {} - p[0][p[1]] = p[3] - p[0].update(p[4]) + if isinstance(symbols[3], ast.Expression): + symbols[0] = {} + symbols[0][symbols[1]] = symbols[3] + symbols[0].update(symbols[4]) else: - raise Exception("Do enclose arguments between braces.") # TODO + raise ParsingError("Do enclose arguments between braces.") - def p_identifier(self, p): + @staticmethod + def p_identifier(symbols): """identifier : CHARACTER identifier | empty """ - if len(p) == 2: - p[0] = "" + if len(symbols) == 2: + symbols[0] = "" else: - p[0] = p[1] + p[2] + symbols[0] = symbols[1] + symbols[2] - def p_separator(self, p): + @staticmethod + def p_separator(symbols): """separator : SPACE | empty """ - p[0] = None + symbols[0] = None - def p_dictonary_next(self, p): + @staticmethod + def p_dictonary_next(symbols): """dictionary_next : separator COMMA separator dictionary | empty """ - if len(p) == 5: - p[0] = p[4] + if len(symbols) == 5: + symbols[0] = symbols[4] else: - p[0] = {} + symbols[0] = {} - def p_titles(self, p): + @staticmethod + def p_titles(symbols): """titles : title titles_next""" - p[0] = [p[1]] + p[2] + symbols[0] = [symbols[1]] + symbols[2] - def p_titles_next(self, p): + @staticmethod + def p_titles_next(symbols): """titles_next : NEWLINE title titles_next | empty """ - if len(p) == 2: - p[0] = [] + if len(symbols) == 2: + symbols[0] = [] else: - p[0] = [p[2]] + p[3] + symbols[0] = [symbols[2]] + symbols[3] - def p_title(self, p): + @staticmethod + def p_title(symbols): """title : brackets title | braces title | command title @@ -177,18 +215,30 @@ class Parser: | SPACE title | empty """ - if len(p) == 2: - p[0] = None + if len(symbols) == 2: + symbols[0] = None else: - if p[2] is None: - p[0] = ast.Expression(p[1]) + if symbols[2] is None: + symbols[0] = ast.Expression(symbols[1]) else: - p[0] = p[2].prepend(p[1]) + symbols[0] = symbols[2].prepend(symbols[1]) def tex2plain(string): - return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer)) + """Parse string and return its plain text version.""" + return detex( + yacc.yacc(module=Parser()).parse( + string, + lexer=SimpleLexer().lexer, + ) + ) def parsesong(string, filename=None): - return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata) + """Parse song and return its metadata.""" + return detex( + yacc.yacc(module=Parser(filename)).parse( + string, + lexer=SongLexer().lexer, + ).metadata + ) diff --git a/patacrep/latex/testing.py b/patacrep/latex/testing.py deleted file mode 100644 index f2d66510..00000000 --- a/patacrep/latex/testing.py +++ /dev/null @@ -1,50 +0,0 @@ - -# Test it out -song = r""" -\selectlanguage{french} -plop = tag -% Un commentaire -\columns{3} -\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre -%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre -%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre} - - Dans [Dm6]cette ruedots [E7] -""" - -isong = r""" -\selectlanguage{french} -\songcolumns{2} -\beginsong{Tous les bateaux, tous les oiseaux} - [by={Michel Polnareff},cov={passe-present},album={Passé Présent}] - - Dans \[Dm6]cette ruedots [E7] -""" - -tex = "D\\^iacritiqu\\'Es" - -# Give the lexer some input -#if 0: -# from syntax import parser -# print(parser.parse(data, debug=0)) -# print(parser.parse(data).song_data()) -#else: -# from lexer import SimpleLexer -# lexer.input(data) -# for tok in lexer: -# print(tok) - -from patacrep.latex import tex2plain -from patacrep.latex.syntax import parsesong -from patacrep.latex.ast import AST - -print(tex2plain(tex) == "DîacritiquÉs") -print(parsesong(song, AST)) -print({ - "@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"], - "@languages": set(["french"]), - "@path": "TODO", - "album": "Tagada tsoin ïtsoin", - "cov": "pouf.png", - } - ) From 22aaf29e8cc8240869b7566c65c9d627bab81d14 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2014 10:34:04 +0200 Subject: [PATCH 06/26] Using logging module --- songbook | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/songbook b/songbook index 1bd48f92..06b79bba 100755 --- a/songbook +++ b/songbook @@ -92,7 +92,7 @@ def main(): locale.setlocale(locale.LC_ALL, '') except locale.Error as error: # Locale is not installed on user's system, or wrongly configured. - sys.stderr.write("Locale error: {}\n".format(str(error))) + LOGGER.error("Locale error: {}\n".format(str(error))) options = argument_parser(sys.argv[1:]) From 58f13856856a35fa66f2237d24f7e2b337fdc65d Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2014 10:34:16 +0200 Subject: [PATCH 07/26] Handling user abort --- songbook | 3 +++ 1 file changed, 3 insertions(+) diff --git a/songbook b/songbook index 06b79bba..e3bcf555 100755 --- a/songbook +++ b/songbook @@ -145,6 +145,9 @@ def main(): "Running again with option '-v' may give more information." ) sys.exit(1) + except KeyboardInterrupt: + LOGGER.warning("Aborted by user.") + sys.exit(1) sys.exit(0) From 7895acad0f3099121af9fe2dc9eb4ee9cdf1cf02 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2014 10:58:20 +0200 Subject: [PATCH 08/26] Deleting output and temporary files generated by yacc --- patacrep/latex/syntax.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py index abd71903..fac078dd 100644 --- a/patacrep/latex/syntax.py +++ b/patacrep/latex/syntax.py @@ -227,11 +227,15 @@ class Parser: def tex2plain(string): """Parse string and return its plain text version.""" return detex( - yacc.yacc(module=Parser()).parse( - string, - lexer=SimpleLexer().lexer, + yacc.yacc( + write_tables=0, + debug=0, + module=Parser(), + ).parse( + string, + lexer=SimpleLexer().lexer, + ) ) - ) def parsesong(string, filename=None): """Parse song and return its metadata.""" From d34441fbae3523fb558b6a891e389f4e15552eb6 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2014 11:12:47 +0200 Subject: [PATCH 09/26] Added intersong example --- patacrep/data/examples/example-all.sb | 4 +++- patacrep/data/examples/songs/intersong.is | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 patacrep/data/examples/songs/intersong.is diff --git a/patacrep/data/examples/example-all.sb b/patacrep/data/examples/example-all.sb index 327ed801..0b1252aa 100644 --- a/patacrep/data/examples/example-all.sb +++ b/patacrep/data/examples/example-all.sb @@ -10,5 +10,7 @@ "authwords" : { "sep" : ["and", "et"] }, - "datadir" : "." + "datadir" : ".", + "content": [["sorted"]] + } diff --git a/patacrep/data/examples/songs/intersong.is b/patacrep/data/examples/songs/intersong.is new file mode 100644 index 00000000..dbeeb547 --- /dev/null +++ b/patacrep/data/examples/songs/intersong.is @@ -0,0 +1,6 @@ +\selectlanguage{french} +\sortassong{}[by={QQ}] +\begin{intersong} + +Lorem ipsum +\end{intersong} From 04f2554759e0eb5e30645cdc8c89649922981699 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2014 13:16:58 +0200 Subject: [PATCH 10/26] Simplification --- patacrep/latex/__init__.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index 46ac9d10..fce5470f 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -7,18 +7,10 @@ This module uses an LALR parser to try to parse LaTeX code. LaTeX language will work on simple cases, but not on complex ones. """ -from patacrep.latex.syntax import tex2plain as syntax_tex2plain +from patacrep.latex.syntax import tex2plain from patacrep.latex.syntax import parsesong as syntax_parsesong -from patacrep.latex.detex import detex from patacrep import encoding -def tex2plain(string): - """Render LaTeX string - - Very few commands (mostly diacritics) are interpreted. - """ - return syntax_tex2plain(string) - def parsesong(path): """Return a dictonary of data read from the latex file `path`. From 9bc740e096d23338447aad5a58677487696fc632 Mon Sep 17 00:00:00 2001 From: Luthaf Date: Wed, 1 Oct 2014 15:34:42 +0200 Subject: [PATCH 11/26] Update setup.py --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 0f616b30..74be3198 100755 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ SETUP = {"name": 'patacrep', "author": 'The Songbook team', "author_email": 'crep@team-on-fire.com', "url": 'https://github.com/patacrep/patacrep', - "packages": ['patacrep', 'patacrep.content'], + "packages": ['patacrep', 'patacrep.content', 'patacrep.latex'], "license": "GPLv2 or any later version", "scripts": ['songbook'], "requires": [ @@ -28,7 +28,7 @@ SETUP = {"name": 'patacrep', "textwrap", "unidecode", "jinja2", "chardet" ], "install_requires": [ - "argparse", "unidecode", "jinja2", "chardet" + "argparse", "unidecode", "jinja2", "chardet", "ply" ], "package_data": {'patacrep': [ 'data/latex/*', 'data/templates/*', @@ -47,6 +47,8 @@ SETUP = {"name": 'patacrep', "Operating System :: Microsoft :: Windows", "Operating System :: MacOS :: MacOS X", "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", "Topic :: Utilities", ], "platforms": ["GNU/Linux", "Windows", "MacOsX"] From a17519bbea0677db8e085ae7c094d3d1aecda237 Mon Sep 17 00:00:00 2001 From: Luthaf Date: Wed, 1 Oct 2014 15:34:53 +0200 Subject: [PATCH 12/26] Start Python2 compatibility mode --- patacrep/latex/detex.py | 1 + setup.py | 2 +- songbook | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/patacrep/latex/detex.py b/patacrep/latex/detex.py index ebfd721b..d99c3467 100644 --- a/patacrep/latex/detex.py +++ b/patacrep/latex/detex.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """Render `very simple` TeX commands in a simple TeX code.""" import logging diff --git a/setup.py b/setup.py index 74be3198..7810c5ca 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """Installation script for songbook. diff --git a/songbook b/songbook index e3bcf555..f8fbd2a8 100755 --- a/songbook +++ b/songbook @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#! /usr/bin/env python # -*- coding: utf-8 -*- """Command line tool to compile songbooks using the songbook library.""" From b8ff69af51aeff396772bb1390a522425732e085 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2014 17:31:45 +0200 Subject: [PATCH 13/26] =?UTF-8?q?Utilisation=20de=20python3=20par=20d?= =?UTF-8?q?=C3=A9fault?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- songbook | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/songbook b/songbook index f8fbd2a8..e3bcf555 100755 --- a/songbook +++ b/songbook @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 # -*- coding: utf-8 -*- """Command line tool to compile songbooks using the songbook library.""" From a1ea7db0598ad24463c365b8623558499bcde5d4 Mon Sep 17 00:00:00 2001 From: Luthaf Date: Fri, 17 Oct 2014 17:33:25 +0200 Subject: [PATCH 14/26] English error message --- patacrep/latex/syntax.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py index fac078dd..d989fd3a 100644 --- a/patacrep/latex/syntax.py +++ b/patacrep/latex/syntax.py @@ -41,7 +41,7 @@ class Parser: def p_error(self, token): """Manage parsing errors.""" - LOGGER.error("Erreur fichier {}, ligne {}, position {}.".format( + LOGGER.error("Error in file {}, line {} at position {}.".format( str(self.filename), token.lineno, self.__find_column(token), From 4ba66ba1ae6b1745c0f625a27e536a0139f1a755 Mon Sep 17 00:00:00 2001 From: Luthaf Date: Fri, 17 Oct 2014 17:34:07 +0200 Subject: [PATCH 15/26] UTF-8 everywhere, python2 legacy mode --- patacrep/latex/ast.py | 1 + patacrep/latex/lexer.py | 1 + 2 files changed, 2 insertions(+) diff --git a/patacrep/latex/ast.py b/patacrep/latex/ast.py index 798d4c33..f8112525 100644 --- a/patacrep/latex/ast.py +++ b/patacrep/latex/ast.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """Abstract Syntax Tree for LaTeX code.""" # pylint: disable=too-few-public-methods diff --git a/patacrep/latex/lexer.py b/patacrep/latex/lexer.py index 8954e14c..c90303da 100644 --- a/patacrep/latex/lexer.py +++ b/patacrep/latex/lexer.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """Very simple LaTeX lexer.""" import logging From fe2d2da9584eb4e89f896b64ec0aa87f3f5e25eb Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 18 Oct 2014 18:39:55 +0200 Subject: [PATCH 16/26] Setting up file type plugins. --- patacrep/build.py | 21 ++++- patacrep/content/__init__.py | 51 +----------- patacrep/content/song.py | 61 +++------------ patacrep/files.py | 78 +++++++++++++++++-- patacrep/{songs.py => songs/__init__.py} | 98 ++++++++++++++++++++---- patacrep/songs/tex.py | 32 ++++++++ 6 files changed, 219 insertions(+), 122 deletions(-) rename patacrep/{songs.py => songs/__init__.py} (62%) create mode 100644 patacrep/songs/tex.py diff --git a/patacrep/build.py b/patacrep/build.py index 212305ca..be9b48f4 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -9,7 +9,7 @@ import logging import os.path from subprocess import Popen, PIPE, call -from patacrep import __DATADIR__, authors, content, errors +from patacrep import __DATADIR__, authors, content, errors, files from patacrep.index import process_sxd from patacrep.templates import TexRenderer from patacrep.songs import DataSubpath @@ -99,8 +99,25 @@ class Songbook(object): copy.deepcopy(config['authwords']) ) - # Configuration set + # Loading custom plugins + config['_content_plugins'] = files.load_plugins( + datadirs=config.get('datadir', []), + subdir=['content'], + variable='CONTENT_PLUGINS', + error=( + "File {filename}: Keyword '{keyword}' is already used. Ignored." + ), + ) + config['_file_plugins'] = files.load_plugins( + datadirs=config.get('datadir', []), + subdir=['songs'], + variable='FILE_PLUGINS', + error=( + "File {filename}: Keyword '{keyword}' is already used. Ignored." + ), + ) + # Configuration set config['render_content'] = content.render_content config['content'] = content.process_content( config.get('content', []), diff --git a/patacrep/content/__init__.py b/patacrep/content/__init__.py index a53d4d7e..5214a55b 100755 --- a/patacrep/content/__init__.py +++ b/patacrep/content/__init__.py @@ -69,7 +69,6 @@ More documentation in the docstring of Content. """ import glob -import importlib import jinja2 import logging import os @@ -134,53 +133,6 @@ class ContentError(SongbookError): def __str__(self): return "Content: {}: {}".format(self.keyword, self.message) -def load_plugins(config): - """Load all content plugins, and return a dictionary of those plugins. - - Return value: a dictionary where: - - keys are the keywords ; - - values are functions triggered when this keyword is met. - """ - plugins = {} - directory_list = ( - [ - os.path.join(datadir, "python", "content") - for datadir in config.get('datadir', []) - ] - + [os.path.dirname(__file__)] - ) - for directory in directory_list: - if not os.path.exists(directory): - LOGGER.debug( - "Ignoring non-existent directory '%s'.", - directory - ) - continue - sys.path.append(directory) - for name in glob.glob(os.path.join(directory, '*.py')): - if name.endswith(".py") and os.path.basename(name) != "__init__.py": - if directory == os.path.dirname(__file__): - plugin = importlib.import_module( - 'patacrep.content.{}'.format( - os.path.basename(name[:-len('.py')]) - ) - ) - else: - plugin = importlib.import_module( - os.path.basename(name[:-len('.py')]) - ) - for (key, value) in plugin.CONTENT_PLUGINS.items(): - if key in plugins: - LOGGER.warning( - "File %s: Keyword '%s' is already used. Ignored.", - files.relpath(name), - key, - ) - continue - plugins[key] = value - del sys.path[-1] - return plugins - @jinja2.contextfunction def render_content(context, content): """Render the content of the songbook as a LaTeX code. @@ -224,7 +176,8 @@ def process_content(content, config=None): included in the .tex file. """ contentlist = [] - plugins = load_plugins(config) + plugins = config.get('_content_plugins', {}) + keyword_re = re.compile(r'^ *(?P\w*) *(\((?P.*)\))? *$') if not content: content = [["song"]] diff --git a/patacrep/content/song.py b/patacrep/content/song.py index 02acf463..50cb7349 100755 --- a/patacrep/content/song.py +++ b/patacrep/content/song.py @@ -4,42 +4,15 @@ """Plugin to include songs to the songbook.""" import glob -import jinja2 import logging import os -from patacrep.content import Content, process_content, ContentError +from patacrep.content import process_content, ContentError from patacrep import files, errors from patacrep.songs import Song LOGGER = logging.getLogger(__name__) -class SongRenderer(Content, Song): - """Render a song in the .tex file.""" - - def begin_new_block(self, previous, __context): - """Return a boolean stating if a new block is to be created.""" - return not isinstance(previous, SongRenderer) - - def begin_block(self, context): - """Return the string to begin a block.""" - indexes = context.resolve("indexes") - if isinstance(indexes, jinja2.runtime.Undefined): - indexes = "" - return r'\begin{songs}{%s}' % indexes - - def end_block(self, __context): - """Return the string to end a block.""" - return r'\end{songs}' - - def render(self, context): - """Return the string that will render the song.""" - return r'\input{{{}}}'.format(files.path2posix( - files.relpath( - self.fullpath, - os.path.dirname(context['filename']) - ))) - #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): """Parse data associated with keyword 'song'. @@ -51,22 +24,17 @@ def parse(keyword, argument, contentlist, config): expressions (interpreted using the glob module), referring to songs. - config: the current songbook configuration dictionary. - Return a list of SongRenderer() instances. + Return a list of Song() instances. """ if '_languages' not in config: config['_languages'] = set() songlist = [] + plugins = config.get('_file_plugins', {}) for songdir in config['_songdir']: if contentlist: break - contentlist = [ - filename - for filename - in ( - files.recursive_find(songdir.fullpath, "*.sg") - + files.recursive_find(songdir.fullpath, "*.is") - ) - ] + contentlist = files.recursive_find(songdir.fullpath, plugins.keys()) + for elem in contentlist: before = len(songlist) for songdir in config['_songdir']: @@ -74,21 +42,16 @@ def parse(keyword, argument, contentlist, config): continue with files.chdir(songdir.datadir): for filename in glob.iglob(os.path.join(songdir.subpath, elem)): - if not ( - filename.endswith('.sg') or - filename.endswith('.is') - ): + LOGGER.debug('Parsing file "{}"…'.format(filename)) + try: + renderer = plugins[filename.split('.')[-1]] + except KeyError: LOGGER.warning(( - 'File "{}" is not a ".sg" or ".is" file. Ignored.' + 'I do not know how to parse file "{}". Ignored.' ).format(os.path.join(songdir.datadir, filename)) ) continue - LOGGER.debug('Parsing file "{}"…'.format(filename)) - song = SongRenderer( - songdir.datadir, - filename, - config, - ) + song = renderer(songdir.datadir, filename, config) songlist.append(song) config["_languages"].update(song.languages) if len(songlist) > before: @@ -129,7 +92,7 @@ def process_songs(content, config=None): item for item in contentlist - if not isinstance(item, SongRenderer) + if not isinstance(item, Song) ] if not_songs: raise OnlySongsError(not_songs) diff --git a/patacrep/files.py b/patacrep/files.py index 6e02d481..51737c23 100644 --- a/patacrep/files.py +++ b/patacrep/files.py @@ -2,23 +2,31 @@ """File system utilities.""" from contextlib import contextmanager -import fnmatch +import glob +import importlib +import logging import os import posixpath +import re +import sys -def recursive_find(root_directory, pattern): +LOGGER = logging.getLogger(__name__) + +def recursive_find(root_directory, extensions): """Recursively find files matching a pattern, from a root_directory. - Return a list of files matching the pattern. + Return a list of files matching the pattern. TODO """ if not os.path.isdir(root_directory): return [] matches = [] + pattern = re.compile(r'.*\.({})$'.format('|'.join(extensions))) with chdir(root_directory): for root, __ignored, filenames in os.walk(os.curdir): - for filename in fnmatch.filter(filenames, pattern): - matches.append(os.path.join(root, filename)) + for filename in filenames: + if pattern.match(filename): + matches.append(os.path.join(root, filename)) return matches def relpath(path, start=None): @@ -59,3 +67,63 @@ def chdir(path): os.chdir(olddir) else: yield + +def load_plugins(datadirs, subdir, variable, error): + """Load all content plugins, and return a dictionary of those plugins. + + A plugin is a .py file, submodule of `subdir`, located in one of the + directories of `datadirs`. It contains a dictionary `variable`. The return + value is the union of the dictionaries of the loaded plugins. + + Arguments: + - datadirs: list of directories (as strings) in which files has to be + searched. + - subdir: modules (as a list of strings) files has to be submodules of + (e.g. if `subdir` is `['first', 'second']`, search files are of the form + `first/second/*.py`. + - variable: Name of the variable holding the dictionary. + - error: Error message raised if a key appears several times. + """ + plugins = {} + directory_list = ( + [ + os.path.join(datadir, "python", *subdir) #pylint: disable=star-args + for datadir in datadirs + ] + + [os.path.dirname(__file__)] + ) + for directory in directory_list: + if not os.path.exists(directory): + LOGGER.debug( + "Ignoring non-existent directory '%s'.", + directory + ) + continue + sys.path.append(directory) + for name in glob.glob(os.path.join(directory, *(subdir + ['*.py']))): + if name.endswith(".py") and os.path.basename(name) != "__init__.py": + if directory == os.path.dirname(__file__): + plugin = importlib.import_module( + 'patacrep.{}.{}'.format( + ".".join(subdir), + os.path.basename(name[:-len('.py')]) + ) + ) + else: + plugin = importlib.import_module( + os.path.basename(name[:-len('.py')]) + ) + for (key, value) in getattr(plugin, variable, {}).items(): + if key in plugins: + LOGGER.warning( + error.format( + filename=relpath(name), + key=key, + ) + ) + continue + plugins[key] = value + del sys.path[-1] + return plugins + + diff --git a/patacrep/songs.py b/patacrep/songs/__init__.py similarity index 62% rename from patacrep/songs.py rename to patacrep/songs/__init__.py index 43e70fe7..a4a6b089 100644 --- a/patacrep/songs.py +++ b/patacrep/songs/__init__.py @@ -4,13 +4,14 @@ import errno import hashlib +import jinja2 import logging import os import pickle import re from patacrep.authors import processauthors -from patacrep.latex import parsesong +from patacrep.content import Content LOGGER = logging.getLogger(__name__) @@ -62,18 +63,32 @@ class DataSubpath(object): self.subpath = os.path.join(self.subpath, path) return self -# pylint: disable=too-few-public-methods, too-many-instance-attributes -class Song(object): - """Song management""" +# pylint: disable=too-many-instance-attributes +class Song(Content): + """Song (or song metadata) + + This class represents a song, bound to a file. + + - It can parse the file given in arguments. + - It can render the song as some LaTeX code. + - Its content is cached, so that if the file has not been changed, the + file is not parsed again. + + This class is inherited by classes implementing song management for + several file formats. Those subclasses must implement: + - `parse()` to parse the file; + - `render()` to render the song as LaTeX code. + """ # Version format of cached song. Increment this number if we update # information stored in cache. - CACHE_VERSION = 0 + CACHE_VERSION = 1 # List of attributes to cache cached_attributes = [ "titles", "unprefixed_titles", + "cached", "data", "datadir", "fullpath", @@ -109,10 +124,14 @@ class Song(object): self.fullpath )) - # Data extraction from the latex song - self.data = parsesong(self.fullpath) - self.titles = self.data['@titles'] - self.languages = self.data['@languages'] + # Default values + self.data = {} + self.titles = [] + self.languages = [] + self.authors = [] + + # Parsing and data processing + self.parse() self.datadir = datadir self.unprefixed_titles = [ unprefixed_title( @@ -123,13 +142,15 @@ class Song(object): in self.titles ] self.subpath = subpath - if "by" in self.data: - self.authors = processauthors( - self.data["by"], - **config["_compiled_authwords"] - ) - else: - self.authors = [] + self.authors = processauthors( + self.authors, + **config["_compiled_authwords"] + ) + + # Cache management + + #: Special attribute to allow plugins to store cached data + self.cached = None self._version = self.CACHE_VERSION self._write_cache() @@ -149,6 +170,50 @@ class Song(object): def __repr__(self): return repr((self.titles, self.data, self.fullpath)) + def begin_new_block(self, previous, __context): + """Return a boolean stating if a new block is to be created.""" + return not isinstance(previous, Song) + + def begin_block(self, context): + """Return the string to begin a block.""" + indexes = context.resolve("indexes") + if isinstance(indexes, jinja2.runtime.Undefined): + indexes = "" + return r'\begin{songs}{%s}' % indexes + + def end_block(self, __context): + """Return the string to end a block.""" + return r'\end{songs}' + + def render(self, __context): + """Returns the TeX code rendering the song. + + This function is to be defined by subclasses. + """ + return '' + + def parse(self): + """Parse file `self.fullpath`. + + This function is to be defined by subclasses. + + It set the following attributes: + + - titles: the list of (raw) titles. This list will be processed to + remove prefixes. + - languages: the list of languages used in the song, as languages + recognized by the LaTeX babel package. + - authors: the list of (raw) authors. This list will be processed to + 'clean' it (see function :func:`patacrep.authors.processauthors`). + - data: song metadata. Used (among others) to sort the songs. + - cached: additional data that will be cached. Thus, data stored in + this attribute must be picklable. + """ + self.data = {} + self.titles = [] + self.languages = [] + self.authors = [] + def unprefixed_title(title, prefixes): """Remove the first prefix of the list in the beginning of title (if any). """ @@ -158,4 +223,3 @@ def unprefixed_title(title, prefixes): return match.group(2) return title - diff --git a/patacrep/songs/tex.py b/patacrep/songs/tex.py new file mode 100644 index 00000000..b5710346 --- /dev/null +++ b/patacrep/songs/tex.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +"""Very simple LaTeX parsing.""" + +import os + +from patacrep import files +from patacrep.latex import parsesong +from patacrep.songs import Song + +class TexRenderer(Song): + """Renderer for song and intersong files.""" + + def parse(self): + """Parse song and set metadata.""" + self.data = parsesong(self.fullpath) + self.titles = self.data['@titles'] + self.languages = self.data['@languages'] + self.authors = self.data['by'] + + def render(self, context): + """Return the string that will render the song.""" + return r'\input{{{}}}'.format(files.path2posix( + files.relpath( + self.fullpath, + os.path.dirname(context['filename']) + ))) + +FILE_PLUGINS = { + 'sg': TexRenderer, + 'is': TexRenderer, + } From 9d5f3b270bf25167f2dd9180aae418bb0b185613 Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 18 Oct 2014 18:49:10 +0200 Subject: [PATCH 17/26] Documentation --- patacrep/files.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/patacrep/files.py b/patacrep/files.py index 51737c23..5c84e0b0 100644 --- a/patacrep/files.py +++ b/patacrep/files.py @@ -13,9 +13,13 @@ import sys LOGGER = logging.getLogger(__name__) def recursive_find(root_directory, extensions): - """Recursively find files matching a pattern, from a root_directory. + """Recursively find files with some extension, from a root_directory. - Return a list of files matching the pattern. TODO + Return a list of files matching those conditions. + + Arguments: + - `extensions`: list of accepted extensions. + - `root_directory`: root directory of the search. """ if not os.path.isdir(root_directory): return [] From db94cf06a37cffe0bc75ccbec7704fce3f23cccd Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 5 Nov 2014 22:27:00 +0100 Subject: [PATCH 18/26] Indentation --- patacrep/latex/syntax.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py index d989fd3a..a1be7181 100644 --- a/patacrep/latex/syntax.py +++ b/patacrep/latex/syntax.py @@ -41,11 +41,12 @@ class Parser: def p_error(self, token): """Manage parsing errors.""" - LOGGER.error("Error in file {}, line {} at position {}.".format( - str(self.filename), - token.lineno, - self.__find_column(token), - ) + LOGGER.error( + "Error in file {}, line {} at position {}.".format( + str(self.filename), + token.lineno, + self.__find_column(token), + ) ) @staticmethod From 5084a0b41870d9813096a4ade3bda5e4c1111a09 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 5 Nov 2014 22:27:10 +0100 Subject: [PATCH 19/26] Attempt to silence yacc Reference #65 --- patacrep/latex/syntax.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py index a1be7181..896d8442 100644 --- a/patacrep/latex/syntax.py +++ b/patacrep/latex/syntax.py @@ -224,26 +224,33 @@ class Parser: else: symbols[0] = symbols[2].prepend(symbols[1]) +def silent_yacc(*args, **kwargs): + """Call yacc, suppressing (as far as possible) output and generated files. + """ + return yacc.yacc( + write_tables=0, + debug=0, + *args, + **kwargs + ) def tex2plain(string): """Parse string and return its plain text version.""" return detex( - yacc.yacc( - write_tables=0, - debug=0, - module=Parser(), - ).parse( - string, - lexer=SimpleLexer().lexer, - ) + silent_yacc( + module=Parser(), + ).parse( + string, + lexer=SimpleLexer().lexer, ) + ) def parsesong(string, filename=None): """Parse song and return its metadata.""" return detex( - yacc.yacc(module=Parser(filename)).parse( - string, - lexer=SongLexer().lexer, - ).metadata - ) + silent_yacc(module=Parser(filename)).parse( + string, + lexer=SongLexer().lexer, + ).metadata + ) From 08aa70bd13da704ca5ab59ad056db29406a7cb70 Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 16 Nov 2014 17:38:13 +0100 Subject: [PATCH 20/26] Simplification de l'installateur --- MANIFEST.in | 2 + patacrep/songbook.py | 155 ++++++++++++++++++++++++++++++++++++++++++ readme.md | 7 -- setup.py | 91 +++++++++++-------------- songbook | 156 ++----------------------------------------- stdeb.cfg | 6 -- 6 files changed, 200 insertions(+), 217 deletions(-) create mode 100644 MANIFEST.in create mode 100755 patacrep/songbook.py delete mode 100644 stdeb.cfg diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..bca3a82b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include LICENSE NEWS readme.md Requirements.txt stdeb.cfg +recursive-include patacrep/data * diff --git a/patacrep/songbook.py b/patacrep/songbook.py new file mode 100755 index 00000000..e3bcf555 --- /dev/null +++ b/patacrep/songbook.py @@ -0,0 +1,155 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +"""Command line tool to compile songbooks using the songbook library.""" + +import argparse +import json +import locale +import logging +import os.path +import textwrap +import sys + +from patacrep.build import SongbookBuilder, DEFAULT_STEPS +from patacrep import __version__ +from patacrep import errors +from patacrep import encoding + +# Logging configuration +logging.basicConfig(level=logging.INFO) +LOGGER = logging.getLogger() + +# pylint: disable=too-few-public-methods +class ParseStepsAction(argparse.Action): + """Argparse action to split a string into a list.""" + def __call__(self, __parser, namespace, values, __option_string=None): + if not getattr(namespace, self.dest): + setattr(namespace, self.dest, []) + setattr( + namespace, + self.dest, + ( + getattr(namespace, self.dest) + + [value.strip() for value in values[0].split(',')] + ), + ) + +class VerboseAction(argparse.Action): + """Set verbosity level with option --verbose.""" + def __call__(self, *_args, **_kwargs): + LOGGER.setLevel(logging.DEBUG) + +def argument_parser(args): + """Parse arguments""" + parser = argparse.ArgumentParser(description="A song book compiler") + + parser.add_argument('--version', help='Show version', action='version', + version='%(prog)s ' + __version__) + + parser.add_argument('book', nargs=1, help=textwrap.dedent("""\ + Book to compile. + """)) + + parser.add_argument('--datadir', '-d', nargs='+', type=str, action='append', + help=textwrap.dedent("""\ + Data location. Expected (not necessarily required) + subdirectories are 'songs', 'img', 'latex', 'templates'. + """)) + + parser.add_argument('--verbose', '-v', nargs=0, action=VerboseAction, + help=textwrap.dedent("""\ + Show details about the compilation process. + """)) + + parser.add_argument('--steps', '-s', nargs=1, type=str, + action=ParseStepsAction, + help=textwrap.dedent("""\ + Steps to run. Default is "{steps}". + Available steps are: + "tex" produce .tex file from templates; + "pdf" compile .tex file; + "sbx" compile index files; + "clean" remove temporary files; + any string beginning with '%%' (in this case, it will be run + in a shell). Several steps (excepted the custom shell + command) can be combinend in one --steps argument, as a + comma separated string. + """.format(steps=','.join(DEFAULT_STEPS))), + default=None, + ) + + options = parser.parse_args(args) + + return options + + +def main(): + """Main function:""" + + # set script locale to match user's + try: + locale.setlocale(locale.LC_ALL, '') + except locale.Error as error: + # Locale is not installed on user's system, or wrongly configured. + LOGGER.error("Locale error: {}\n".format(str(error))) + + options = argument_parser(sys.argv[1:]) + + songbook_path = options.book[0] + + basename = os.path.basename(songbook_path)[:-3] + + songbook_file = None + try: + songbook_file = encoding.open_read(songbook_path) + songbook = json.load(songbook_file) + except Exception as error: # pylint: disable=broad-except + LOGGER.error(error) + LOGGER.error("Error while loading file '{}'.".format(songbook_path)) + sys.exit(1) + finally: + if songbook_file: + songbook_file.close() + + # Gathering datadirs + datadirs = [] + if options.datadir: + # Command line options + datadirs += [item[0] for item in options.datadir] + if 'datadir' in songbook: + # .sg file + if isinstance(songbook['datadir'], str): + songbook['datadir'] = [songbook['datadir']] + datadirs += [ + os.path.join( + os.path.dirname(os.path.abspath(songbook_path)), + path + ) + for path in songbook['datadir'] + ] + # Default value + datadirs.append(os.path.dirname(os.path.abspath(songbook_path))) + + songbook['datadir'] = datadirs + + try: + sb_builder = SongbookBuilder(songbook, basename) + sb_builder.unsafe = True + + sb_builder.build_steps(options.steps) + except errors.SongbookError as error: + LOGGER.error(error) + if LOGGER.level >= logging.INFO: + LOGGER.error( + "Running again with option '-v' may give more information." + ) + sys.exit(1) + except KeyboardInterrupt: + LOGGER.warning("Aborted by user.") + sys.exit(1) + + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/readme.md b/readme.md index ed4b1860..c97fef12 100644 --- a/readme.md +++ b/readme.md @@ -38,13 +38,6 @@ Look for existing songbook files in `/books/`. For example: > /songbook /books/songbook_en.sb > songbook_en.pdf -# Quick and dirty deb packages - -Install `python3-stdeb`, then: - -> python3 setup.py --command-packages=stdeb.command bdist_deb -> sudo dpkg -i deb_dist/python3-patacrep_-1_all.deb - # Documentation - Compiled, but may be outdated: http://www.patacrep.com/data/documents/doc_en.pdf diff --git a/setup.py b/setup.py index 7810c5ca..34705187 100755 --- a/setup.py +++ b/setup.py @@ -6,57 +6,42 @@ $ python setup.py install """ from patacrep import __version__ -from setuptools import setup - -import sys -import os -import site - - -SETUP = {"name": 'patacrep', - "version": __version__, - "description": 'Songbook compilation chain', - "author": 'The Songbook team', - "author_email": 'crep@team-on-fire.com', - "url": 'https://github.com/patacrep/patacrep', - "packages": ['patacrep', 'patacrep.content', 'patacrep.latex'], - "license": "GPLv2 or any later version", - "scripts": ['songbook'], - "requires": [ - "argparse", "codecs", "distutils", "fnmatch", "glob", "json", - "locale", "logging", "os", "re", "subprocess", "sys", - "textwrap", "unidecode", "jinja2", "chardet" - ], - "install_requires": [ - "argparse", "unidecode", "jinja2", "chardet", "ply" +from setuptools import setup, find_packages + +setup( + name='patacrep', + version=__version__, + description='Songbook compilation chain', + author='The Songbook team', + author_email='crep@team-on-fire.com', + url='https://github.com/patacrep/patacrep', + packages=find_packages(), + license="GPLv2 or any later version", + requires=[ + "argparse", "codecs", "distutils", "fnmatch", "glob", "json", + "locale", "logging", "os", "re", "subprocess", "sys", + "textwrap", "unidecode", "jinja2", "chardet" + ], + install_requires=[ + "argparse", "unidecode", "jinja2", "chardet", "ply" + ], + include_package_data=True, + entry_points={ + 'console_scripts': [ + "songbook = patacrep.songbook:main", ], - "package_data": {'patacrep': [ 'data/latex/*', - 'data/templates/*', - 'data/examples/*.sb', - 'data/examples/*/*.sg', - 'data/examples/*/*.ly', - 'data/examples/*/*.jpg', - 'data/examples/*/*.png', - 'data/examples/*/*.png', - 'data/examples/*/*/header']}, - "classifiers": [ - "Environment :: Console", - "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", - "Natural Language :: English", - "Operating System :: POSIX :: Linux", - "Operating System :: Microsoft :: Windows", - "Operating System :: MacOS :: MacOS X", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Topic :: Utilities", - ], - "platforms": ["GNU/Linux", "Windows", "MacOsX"] -} - -if sys.platform.startswith('win32'): - from shutil import copy - copy("songbook", "songbook.py") - SETUP["scripts"] = ['songbook.py'] - -setup(**SETUP) + }, + classifiers=[ + "Environment :: Console", + "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", + "Natural Language :: English", + "Operating System :: POSIX :: Linux", + "Operating System :: Microsoft :: Windows", + "Operating System :: MacOS :: MacOS X", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Topic :: Utilities", + ], + platforms=["GNU/Linux", "Windows", "MacOsX"] +) diff --git a/songbook b/songbook index e3bcf555..80e12f88 100755 --- a/songbook +++ b/songbook @@ -1,155 +1,9 @@ #! /usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Command line tool to compile songbooks using the songbook library.""" - -import argparse -import json -import locale -import logging -import os.path -import textwrap -import sys - -from patacrep.build import SongbookBuilder, DEFAULT_STEPS -from patacrep import __version__ -from patacrep import errors -from patacrep import encoding - -# Logging configuration -logging.basicConfig(level=logging.INFO) -LOGGER = logging.getLogger() - -# pylint: disable=too-few-public-methods -class ParseStepsAction(argparse.Action): - """Argparse action to split a string into a list.""" - def __call__(self, __parser, namespace, values, __option_string=None): - if not getattr(namespace, self.dest): - setattr(namespace, self.dest, []) - setattr( - namespace, - self.dest, - ( - getattr(namespace, self.dest) - + [value.strip() for value in values[0].split(',')] - ), - ) - -class VerboseAction(argparse.Action): - """Set verbosity level with option --verbose.""" - def __call__(self, *_args, **_kwargs): - LOGGER.setLevel(logging.DEBUG) - -def argument_parser(args): - """Parse arguments""" - parser = argparse.ArgumentParser(description="A song book compiler") - - parser.add_argument('--version', help='Show version', action='version', - version='%(prog)s ' + __version__) - - parser.add_argument('book', nargs=1, help=textwrap.dedent("""\ - Book to compile. - """)) - - parser.add_argument('--datadir', '-d', nargs='+', type=str, action='append', - help=textwrap.dedent("""\ - Data location. Expected (not necessarily required) - subdirectories are 'songs', 'img', 'latex', 'templates'. - """)) - - parser.add_argument('--verbose', '-v', nargs=0, action=VerboseAction, - help=textwrap.dedent("""\ - Show details about the compilation process. - """)) - - parser.add_argument('--steps', '-s', nargs=1, type=str, - action=ParseStepsAction, - help=textwrap.dedent("""\ - Steps to run. Default is "{steps}". - Available steps are: - "tex" produce .tex file from templates; - "pdf" compile .tex file; - "sbx" compile index files; - "clean" remove temporary files; - any string beginning with '%%' (in this case, it will be run - in a shell). Several steps (excepted the custom shell - command) can be combinend in one --steps argument, as a - comma separated string. - """.format(steps=','.join(DEFAULT_STEPS))), - default=None, - ) - - options = parser.parse_args(args) +# Do not edit this file. This file is just a helper file for development test. +# It is not part of the distributed software. - return options - - -def main(): - """Main function:""" - - # set script locale to match user's - try: - locale.setlocale(locale.LC_ALL, '') - except locale.Error as error: - # Locale is not installed on user's system, or wrongly configured. - LOGGER.error("Locale error: {}\n".format(str(error))) - - options = argument_parser(sys.argv[1:]) - - songbook_path = options.book[0] - - basename = os.path.basename(songbook_path)[:-3] - - songbook_file = None - try: - songbook_file = encoding.open_read(songbook_path) - songbook = json.load(songbook_file) - except Exception as error: # pylint: disable=broad-except - LOGGER.error(error) - LOGGER.error("Error while loading file '{}'.".format(songbook_path)) - sys.exit(1) - finally: - if songbook_file: - songbook_file.close() - - # Gathering datadirs - datadirs = [] - if options.datadir: - # Command line options - datadirs += [item[0] for item in options.datadir] - if 'datadir' in songbook: - # .sg file - if isinstance(songbook['datadir'], str): - songbook['datadir'] = [songbook['datadir']] - datadirs += [ - os.path.join( - os.path.dirname(os.path.abspath(songbook_path)), - path - ) - for path in songbook['datadir'] - ] - # Default value - datadirs.append(os.path.dirname(os.path.abspath(songbook_path))) - - songbook['datadir'] = datadirs - - try: - sb_builder = SongbookBuilder(songbook, basename) - sb_builder.unsafe = True - - sb_builder.build_steps(options.steps) - except errors.SongbookError as error: - LOGGER.error(error) - if LOGGER.level >= logging.INFO: - LOGGER.error( - "Running again with option '-v' may give more information." - ) - sys.exit(1) - except KeyboardInterrupt: - LOGGER.warning("Aborted by user.") - sys.exit(1) - - sys.exit(0) +"""Command line tool to compile songbooks using the songbook library.""" -if __name__ == '__main__': - main() +from patacrep.songbook import main +main() diff --git a/stdeb.cfg b/stdeb.cfg deleted file mode 100644 index ed8a5bed..00000000 --- a/stdeb.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[DEFAULT] -Depends: python3-jinja2, python3-pkg-resources, python3-chardet, python3-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended -Recommends: texlive-lang-english, texlive-lang-french, texlive-lang-portuguese, texlive-lang-spanish, texlive-fonts-extra -X-Python3-Version: -Section: tex - From e38359b3658c9238de63a37e465b92c772312271 Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 16 Nov 2014 17:49:05 +0100 Subject: [PATCH 21/26] Oubli --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index bca3a82b..fe4c1ab7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ -include LICENSE NEWS readme.md Requirements.txt stdeb.cfg +include LICENSE NEWS readme.md Requirements.txt recursive-include patacrep/data * From 063c46098e9d4595e4e4bb8ce67939d5d012a190 Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 16 Nov 2014 17:51:16 +0100 Subject: [PATCH 22/26] L'installateur utilise aussi Python3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 34705187..deb256f9 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """Installation script for songbook. From cb62c288cd356293698a6d78eaa9a6812a476f91 Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 19 Nov 2014 09:59:46 +0100 Subject: [PATCH 23/26] =?UTF-8?q?Python2=20n'est=20plus=20support=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index deb256f9..6d1233e0 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,6 @@ setup( "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows", "Operating System :: MacOS :: MacOS X", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Topic :: Utilities", From d40ae1cea6d4672e0b70e77a81dea063b1836420 Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 22 Nov 2014 10:46:20 +0100 Subject: [PATCH 24/26] Datadir and fullpath are no longer stored in cached songs Closes #68 --- patacrep/songs/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/patacrep/songs/__init__.py b/patacrep/songs/__init__.py index a4a6b089..512a2ced 100644 --- a/patacrep/songs/__init__.py +++ b/patacrep/songs/__init__.py @@ -90,8 +90,6 @@ class Song(Content): "unprefixed_titles", "cached", "data", - "datadir", - "fullpath", "subpath", "languages", "authors", @@ -101,6 +99,7 @@ class Song(Content): def __init__(self, datadir, subpath, config): self.fullpath = os.path.join(datadir, subpath) + self.datadir = datadir if datadir: # Only songs in datadirs are cached self._filehash = hashlib.md5( From 607e064bfb45b768c5fce74d6509cce1ec371f94 Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 22 Nov 2014 11:02:13 +0100 Subject: [PATCH 25/26] Transformation de encoding.open_read en un 'context manager' --- patacrep/content/include.py | 7 ++----- patacrep/encoding.py | 8 ++++++-- patacrep/index.py | 6 +----- patacrep/latex/__init__.py | 3 ++- patacrep/songbook.py | 8 ++------ patacrep/templates.py | 41 ++++++++++++++++--------------------- 6 files changed, 31 insertions(+), 42 deletions(-) diff --git a/patacrep/content/include.py b/patacrep/content/include.py index dcc969a9..9fc73621 100644 --- a/patacrep/content/include.py +++ b/patacrep/content/include.py @@ -48,15 +48,12 @@ def parse(keyword, config, argument, contentlist): filepath = load_from_datadirs(path, config) content_file = None try: - content_file = encoding.open_read(filepath, 'r') - new_content = json.load(content_file) + with encoding.open_read(filepath, 'r') as content_file: + new_content = json.load(content_file) except Exception as error: # pylint: disable=broad-except LOGGER.error(error) LOGGER.error("Error while loading file '{}'.".format(filepath)) sys.exit(1) - finally: - if content_file: - content_file.close() config["datadir"].append(os.path.abspath(os.path.dirname(filepath))) new_contentlist += process_content(new_content, config) diff --git a/patacrep/encoding.py b/patacrep/encoding.py index ca917295..a1084aa8 100644 --- a/patacrep/encoding.py +++ b/patacrep/encoding.py @@ -5,17 +5,21 @@ import codecs import chardet import logging +import contextlib LOGGER = logging.getLogger(__name__) + +@contextlib.contextmanager def open_read(filename, mode='r'): """Open a file for reading, guessing the right encoding. Return a fileobject, reading unicode strings. """ - return codecs.open( + with codecs.open( filename, mode=mode, encoding=chardet.detect(open(filename, 'rb').read())['encoding'], errors='replace', - ) + ) as fileobject: + yield fileobject diff --git a/patacrep/index.py b/patacrep/index.py index c715918a..f111f059 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -29,13 +29,9 @@ def process_sxd(filename): """ data = [] index_file = None - try: - index_file = encoding.open_read(filename, 'r') + with encoding.open_read(filename, 'r') as index_file: for line in index_file: data.append(line.strip()) - finally: - if index_file: - index_file.close() i = 1 idx = Index(data[0]) diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index fce5470f..f923b822 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -15,6 +15,7 @@ def parsesong(path): """Return a dictonary of data read from the latex file `path`. """ - data = syntax_parsesong(encoding.open_read(path).read(), path) + with encoding.open_read(path) as songfile: + data = syntax_parsesong(songfile.read(), path) data['@path'] = path return data diff --git a/patacrep/songbook.py b/patacrep/songbook.py index e3bcf555..dfa21651 100755 --- a/patacrep/songbook.py +++ b/patacrep/songbook.py @@ -100,17 +100,13 @@ def main(): basename = os.path.basename(songbook_path)[:-3] - songbook_file = None try: - songbook_file = encoding.open_read(songbook_path) - songbook = json.load(songbook_file) + with encoding.open_read(songbook_path) as songbook_file: + songbook = json.load(songbook_file) except Exception as error: # pylint: disable=broad-except LOGGER.error(error) LOGGER.error("Error while loading file '{}'.".format(songbook_path)) sys.exit(1) - finally: - if songbook_file: - songbook_file.close() # Gathering datadirs datadirs = [] diff --git a/patacrep/templates.py b/patacrep/templates.py index d1e9a616..55a354fb 100644 --- a/patacrep/templates.py +++ b/patacrep/templates.py @@ -186,32 +186,27 @@ class TexRenderer(object): """ subvariables = {} - template_file = None templatename = self.texenv.get_template(template).filename - try: - template_file = encoding.open_read(templatename, 'r') + with encoding.open_read(templatename, 'r') as template_file: content = template_file.read() - subtemplates = list(find_templates(self.texenv.parse(content))) - match = re.findall(_VARIABLE_REGEXP, content) - if match: - for var in match: - try: - subvariables.update(json.loads(var)) - except ValueError as exception: - raise errors.TemplateError( - exception, - ( - "Error while parsing json in file " - "{filename}. The json string was:" - "\n'''\n{jsonstring}\n'''" - ).format( - filename=templatename, - jsonstring=var, - ) + subtemplates = list(find_templates(self.texenv.parse(content))) + match = re.findall(_VARIABLE_REGEXP, content) + if match: + for var in match: + try: + subvariables.update(json.loads(var)) + except ValueError as exception: + raise errors.TemplateError( + exception, + ( + "Error while parsing json in file " + "{filename}. The json string was:" + "\n'''\n{jsonstring}\n'''" + ).format( + filename=templatename, + jsonstring=var, ) - finally: - if template_file: - template_file.close() + ) return (subvariables, subtemplates) From 88ba800fb36c07ae2e735ae19c8d8b2a5477329b Mon Sep 17 00:00:00 2001 From: Louis Date: Sat, 22 Nov 2014 11:27:54 +0100 Subject: [PATCH 26/26] Added option 'encoding' to songbook fiels --- patacrep/build.py | 2 ++ patacrep/content/include.py | 5 ++++- patacrep/encoding.py | 10 ++++++++-- patacrep/index.py | 2 +- patacrep/latex/__init__.py | 4 ++-- patacrep/songbook.py | 10 ++++++++-- patacrep/songs/__init__.py | 2 ++ patacrep/songs/tex.py | 2 +- patacrep/templates.py | 12 +++++++++--- 9 files changed, 37 insertions(+), 12 deletions(-) diff --git a/patacrep/build.py b/patacrep/build.py index be9b48f4..1bc683a4 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -33,6 +33,7 @@ DEFAULT_CONFIG = { 'lang': 'english', 'content': [], 'titleprefixwords': [], + 'encoding': None, } @@ -91,6 +92,7 @@ class Songbook(object): config['template'], config['datadir'], config['lang'], + config['encoding'], ) config.update(renderer.get_variables()) config.update(self.config) diff --git a/patacrep/content/include.py b/patacrep/content/include.py index 9fc73621..03492544 100644 --- a/patacrep/content/include.py +++ b/patacrep/content/include.py @@ -48,7 +48,10 @@ def parse(keyword, config, argument, contentlist): filepath = load_from_datadirs(path, config) content_file = None try: - with encoding.open_read(filepath, 'r') as content_file: + with encoding.open_read( + filepath, + encoding=config['encoding'] + ) as content_file: new_content = json.load(content_file) except Exception as error: # pylint: disable=broad-except LOGGER.error(error) diff --git a/patacrep/encoding.py b/patacrep/encoding.py index a1084aa8..a0501e4d 100644 --- a/patacrep/encoding.py +++ b/patacrep/encoding.py @@ -11,15 +11,21 @@ LOGGER = logging.getLogger(__name__) @contextlib.contextmanager -def open_read(filename, mode='r'): +def open_read(filename, mode='r', encoding=None): """Open a file for reading, guessing the right encoding. Return a fileobject, reading unicode strings. + If `encoding` is set, use it as the encoding (do not guess). """ + if encoding is None: + fileencoding = chardet.detect(open(filename, 'rb').read())['encoding'] + else: + fileencoding = encoding + with codecs.open( filename, mode=mode, - encoding=chardet.detect(open(filename, 'rb').read())['encoding'], + encoding=fileencoding, errors='replace', ) as fileobject: yield fileobject diff --git a/patacrep/index.py b/patacrep/index.py index f111f059..351e155e 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -29,7 +29,7 @@ def process_sxd(filename): """ data = [] index_file = None - with encoding.open_read(filename, 'r') as index_file: + with encoding.open_read(filename) as index_file: for line in index_file: data.append(line.strip()) diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index f923b822..8d0efed9 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -11,11 +11,11 @@ from patacrep.latex.syntax import tex2plain from patacrep.latex.syntax import parsesong as syntax_parsesong from patacrep import encoding -def parsesong(path): +def parsesong(path, fileencoding=None): """Return a dictonary of data read from the latex file `path`. """ - with encoding.open_read(path) as songfile: + with encoding.open_read(path, encoding=fileencoding) as songfile: data = syntax_parsesong(songfile.read(), path) data['@path'] = path return data diff --git a/patacrep/songbook.py b/patacrep/songbook.py index dfa21651..8417be91 100755 --- a/patacrep/songbook.py +++ b/patacrep/songbook.py @@ -14,7 +14,7 @@ import sys from patacrep.build import SongbookBuilder, DEFAULT_STEPS from patacrep import __version__ from patacrep import errors -from patacrep import encoding +import patacrep.encoding # Logging configuration logging.basicConfig(level=logging.INFO) @@ -101,8 +101,14 @@ def main(): basename = os.path.basename(songbook_path)[:-3] try: - with encoding.open_read(songbook_path) as songbook_file: + with patacrep.encoding.open_read(songbook_path) as songbook_file: songbook = json.load(songbook_file) + if 'encoding' in songbook: + with patacrep.encoding.open_read( + songbook_path, + encoding=songbook['encoding'] + ) as songbook_file: + songbook = json.load(songbook_file) except Exception as error: # pylint: disable=broad-except LOGGER.error(error) LOGGER.error("Error while loading file '{}'.".format(songbook_path)) diff --git a/patacrep/songs/__init__.py b/patacrep/songs/__init__.py index 512a2ced..0a1072f9 100644 --- a/patacrep/songs/__init__.py +++ b/patacrep/songs/__init__.py @@ -100,6 +100,8 @@ class Song(Content): def __init__(self, datadir, subpath, config): self.fullpath = os.path.join(datadir, subpath) self.datadir = datadir + self.encoding = config["encoding"] + if datadir: # Only songs in datadirs are cached self._filehash = hashlib.md5( diff --git a/patacrep/songs/tex.py b/patacrep/songs/tex.py index b5710346..c2a51d62 100644 --- a/patacrep/songs/tex.py +++ b/patacrep/songs/tex.py @@ -13,7 +13,7 @@ class TexRenderer(Song): def parse(self): """Parse song and set metadata.""" - self.data = parsesong(self.fullpath) + self.data = parsesong(self.fullpath, self.encoding) self.titles = self.data['@titles'] self.languages = self.data['@languages'] self.authors = self.data['by'] diff --git a/patacrep/templates.py b/patacrep/templates.py index 55a354fb..be507a27 100644 --- a/patacrep/templates.py +++ b/patacrep/templates.py @@ -9,7 +9,8 @@ import os import re import json -from patacrep import encoding, errors, files +from patacrep import errors, files +import patacrep.encoding _LATEX_SUBS = ( (re.compile(r'\\'), r'\\textbackslash'), @@ -67,7 +68,7 @@ def _escape_tex(value): class TexRenderer(object): """Render a template to a LaTeX file.""" - def __init__(self, template, datadirs, lang): + def __init__(self, template, datadirs, lang, encoding=None): '''Start a new jinja2 environment for .tex creation. Arguments: @@ -75,8 +76,10 @@ class TexRenderer(object): - datadirs: list of locations of the data directory (which may contain file /templates/