Browse Source

Suppression de plasTeX

pull/66/head
Louis 10 years ago
parent
commit
e41590c409
  1. 1
      Requirements.txt
  2. 4
      patacrep/index.py
  3. 23
      patacrep/latex/__init__.py
  4. 117
      patacrep/plastex.py
  5. 181
      patacrep/plastex_chord.py
  6. 15
      patacrep/plastex_misc_commands.py
  7. 58
      patacrep/plastex_patchedbabel.py
  8. 70
      patacrep/plastex_songs.py
  9. 4
      patacrep/songs.py
  10. 3
      readme.md
  11. 4
      setup.py
  12. 2
      stdeb.cfg

1
Requirements.txt

@ -2,4 +2,3 @@ Jinja2==2.7.3
argparse==1.2.1
chardet==2.2.1
unidecode>=0.04.16
https://github.com/tiarno/plastex/archive/master.zip

4
patacrep/index.py

@ -12,7 +12,7 @@ import re
from patacrep import authors
from patacrep import encoding
from patacrep.plastex import simpleparse
from patacrep.latex import latex2unicode
EOL = u"\n"
@ -112,7 +112,7 @@ class Index(object):
if not key in self.data[first].keys():
self.data[first][key] = {
'sortingkey': [
encoding.unidecode(simpleparse(item)).lower()
encoding.unidecode(latex2unicode(item)).lower()
for item in key
],
'entries': [],

23
patacrep/latex/__init__.py

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
"""Very simple LaTeX parser"""
def latex2unicode(string):
"""Convert LaTeX string to unicode"""
return u"TODO"
def parsetex(path):
"""Return a dictonary of data read from the latex file `path`.
This file is a drop in replacement for an old function. Elle ne devrait pas
apparaitre telle quelle dans la version finale, une fois que
https://github.com/patacrep/patacrep/issues/64 aura été pris en compte.
TODO
"""
return {
'titles': ["TODO"],
'args': {},
'languages': ['french'],
}

117
patacrep/plastex.py

@ -1,117 +0,0 @@
# -*- coding: utf-8 -*-
"""PlasTeX module to process song files."""
from plasTeX.TeX import TeX
from plasTeX.Base.LaTeX import Sentences
import locale
import os
import sys
from patacrep import encoding
def process_unbr_spaces(node):
#pylint: disable=line-too-long
r"""Replace '~' and '\ ' in node by nodes that
will be rendered as unbreakable space.
Return node object for convenience.
This function is a workaround to a bug that has been solved since:
- https://github.com/tiarno/plastex/commit/76bb78d5fbaac48e68025a3545286cc63cb4e7ad
- https://github.com/tiarno/plastex/commit/682a0d223b99d6b949bacf1c974d24dc9bb1d18e
It can be deleted once this bug has been merged in production version of
PlasTeX.
"""
if (type(node) == Sentences.InterWordSpace or
(type(node) == Sentences.NoLineBreak and node.source == '~ ')):
node.unicode = unichr(160)
for child in node.childNodes:
process_unbr_spaces(child)
return node
def simpleparse(text):
"""Parse a simple LaTeX string.
"""
tex = TeX()
tex.disableLogging()
tex.input(text)
doc = tex.parse()
return process_unbr_spaces(doc.textContent)
class SongParser(object):
"""Analyseur syntaxique de fichiers .sg"""
@staticmethod
def create_tex():
"""Create a TeX object, ready to parse a tex file."""
tex = TeX()
tex.disableLogging()
tex.ownerDocument.context.loadBaseMacros()
sys.path.append(os.path.dirname(__file__))
tex.ownerDocument.context.loadPackage(tex, "plastex_patchedbabel")
tex.ownerDocument.context.loadPackage(tex, "plastex_chord")
tex.ownerDocument.context.loadPackage(tex, "plastex_songs")
tex.ownerDocument.context.loadPackage(tex, "plastex_misc_commands")
sys.path.pop()
return tex
@classmethod
def parse(cls, filename):
"""Parse a TeX file, and return its plasTeX representation."""
tex = cls.create_tex()
tex.input(encoding.open_read(filename, 'r'))
return tex.parse()
def parsetex(filename):
r"""Analyse syntaxique d'un fichier .sg
Renvoie un dictionnaire contenant les métadonnées lues dans le fichier. Les
clefs sont :
- languages: l'ensemble des langages utilisés (recherche des
\selectlanguages{}) ;
- titles: la liste des titres ;
- args: le dictionnaire des paramètres passés à \beginsong.
"""
# /* BEGIN plasTeX patch
# The following lines, and another line a few lines later, are used to
# circumvent a plasTeX bug. It has been reported and corrected :
# https://github.com/tiarno/plastex/commit/8f4e5a385f3cb6a04d5863f731ce24a7e856f2a4
# To see if you can delete those lines, set your LC_TIME locale to French,
# during a month containing diacritics (e.g. Février), and run songbook. If
# no plasTeX bug appears, it is safe to remove those lines.
oldlocale = locale.getlocale(locale.LC_TIME)
locale.setlocale(locale.LC_TIME, 'C')
# plasTeX patch END */
# Analyse syntaxique
doc = SongParser.parse(filename)
# /* BEGIN plasTeX patch
if oldlocale[0] and oldlocale[1]:
try:
locale.setlocale(locale.LC_TIME, "%s.%s" % oldlocale)
except locale.Error:
pass # Workaround a bug on windows
# plasTeX patch END */
# Extraction des données
data = {
"languages": set(),
"_doc": doc,
"_filename": filename,
}
for node in doc.allChildNodes:
if node.nodeName == "selectlanguage":
data["languages"].add(node.attributes['lang'])
if node.nodeName in ["beginsong", "sortassong"]:
data["titles"] = node.attributes["titles"]
data["args"] = node.attributes["args"]
return data

181
patacrep/plastex_chord.py

@ -1,181 +0,0 @@
# -*- coding: utf-8 -*-
r"""PlasTeX module to deal with chords commands of the songs LaTeX package
Chords are set using commands like \[C]. This package parses those commands.
"""
import logging
import plasTeX
from plasTeX import Command, Environment, Macro
from plasTeX.Base.LaTeX.Math import BeginDisplayMath
LOGGER = logging.getLogger(__name__)
# Count the number of levels of 'verse' environment: IN_VERSE==1 means that we
# are in a 'verse' environment; IN_VERSE==2 means that we are in two included
# 'verse' environment, and so on.
IN_VERSE = 0
def wrap_displaymath(cls):
"""Decorator to store the depth of 'verse' environment
In the invoke() method classes, global variable IN_VERSE indicates the
number of 'verse' (or 'chorus' or 'verse*') environment we are in.
"""
# pylint: disable=no-init,too-few-public-methods
class WrappedClass(cls):
"""Wrapper to LaTeX environment updating IN_VERSE"""
blockType = True
# pylint: disable=super-on-old-class,global-statement,no-member
def invoke(self, tex):
"""Wrapper to invoke() to update global variable IN_VERSE."""
global IN_VERSE
if self.macroMode == Macro.MODE_BEGIN:
self.ownerDocument.context.push()
self.ownerDocument.context.catcode("\n", 13)
IN_VERSE += 1
# Removing spaces and line breaks at the beginning of verse
token = None
for token in tex:
if not match_space(token):
break
if token is not None:
tex.pushToken(token)
else:
self.ownerDocument.context.pop()
IN_VERSE -= 1
return super(WrappedClass, self).invoke(tex)
return WrappedClass
# pylint: disable=too-many-public-methods
@wrap_displaymath
class Verse(Environment):
"""LaTeX 'verse' environment"""
macroName = 'verse'
# pylint: disable=too-many-public-methods
@wrap_displaymath
class VerseStar(Environment):
"""LaTeX 'verse*' environment"""
macroName = 'verse*'
# pylint: disable=too-many-public-methods
@wrap_displaymath
class Chorus(Environment):
"""LaTeX 'chorus' environment"""
macroName = 'chorus'
def match_space(token):
"""Return True if token is a space or newline character."""
return (
isinstance(token, plasTeX.Tokenizer.Space)
or token.nodeName == 'active::\n'
)
def match_closing_square_bracket(token):
"""Return True if token is character ']'."""
return token.nodeType == token.TEXT_NODE and token.nodeValue == ']'
def match_egroup(token):
"""Return True if token is of type `egroup` (end of group)."""
return isinstance(token, plasTeX.Base.Text.egroup) #pylint: disable=no-member
def match_space_or_chord(token):
"""Return True if token is a space or a chord."""
return match_space(token) or isinstance(token, Chord)
def parse_until(tex, end=lambda x: False):
"""Parse `tex` until condition `end`, or `egroup` is met.
Arguments:
- tex: object to parse
- end: function taking a token in argument, and returning a boolean.
Parsing stops when this function returns True, or an `egroup` is met.
Return: a tuple of two items (the list of parsed tokens, last token). This
is done so that caller can decide whether they want to discard it or not.
Last token can be None if everything has been parsed without the end
condition being met.
"""
parsed = []
last = None
for token in tex:
if end(token) or match_egroup(token):
last = token
break
elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member
# pylint: disable=expression-not-assigned
[token.appendChild(item) for item in parse_until(tex, match_egroup)[0]]
parsed.append(token)
return (parsed, last)
class Chord(Command):
"""Beginning of a chord notation"""
macroName = 'chord'
macroMode = Command.MODE_NONE
class BeginChordOrDisplayMath(BeginDisplayMath):
r"""Wrapper to BeginDisplayMath
In a 'verse' (or 'verse*' or 'chorus') environment, the '\[' macro
displays a chord. Otherwise, it corresponds to the usual LaTeX math mode.
This class calls the right method, depending on the inclusion of this
macro in a verse environment.
"""
macroName = '['
def invoke(self, tex):
"""Process this macro"""
if IN_VERSE:
chord = Chord()
self.ownerDocument.context.push() #pylint: disable=no-member
self.ownerDocument.context.catcode("&", 13) #pylint: disable=no-member
chord.setAttribute(
'name',
parse_until(tex, match_closing_square_bracket)[0],
)
self.ownerDocument.context.pop() #pylint: disable=no-member
token = next(iter(tex), None)
if token is None:
return [chord]
elif match_space(token):
return [chord, token]
elif (
isinstance(token, Verse)
or isinstance(token, VerseStar)
or isinstance(token, Chorus)
):
LOGGER.warning((
"{} L{}: '\\end{{verse}}' (or 'verse*' or 'chorus') not "
"allowed directly after '\\['."
).format(tex.filename, tex.lineNumber)
)
return [chord]
elif isinstance(token, Chord):
token.attributes['name'] = (
chord.attributes['name']
+ token.attributes['name']
)
chord = token
return [chord]
elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member
# pylint: disable=expression-not-assigned
[chord.appendChild(item) for item in parse_until(tex)[0]]
return [chord]
else:
chord.appendChild(token)
(parsed, last) = parse_until(tex, match_space_or_chord)
# pylint: disable=expression-not-assigned
[chord.appendChild(item) for item in parsed]
return [chord, last]
else:
return super(BeginChordOrDisplayMath, self).invoke(tex)

15
patacrep/plastex_misc_commands.py

@ -1,15 +0,0 @@
# -*- coding: utf-8 -*-
"""Quick management of random LaTeX commands."""
from plasTeX import Command
# pylint: disable=invalid-name,too-many-public-methods
class songcolumns(Command):
r"""Manage `\songcolumns` command"""
args = '{num:int}'
# pylint: disable=invalid-name,too-many-public-methods
class gtab(Command):
r"""Manage `\gta` command"""
args = '{chord:str}{diagram:str}'

58
patacrep/plastex_patchedbabel.py

@ -1,58 +0,0 @@
# -*- coding: utf-8 -*-
r"""Patch pour le paquet Babel de PlasTeX
Un bug dans PlasTeX intervient lorsqu'on essaye d'analyser une commande LaTeX
\selectlanguage{}, que nous voulons utiliser ici. Un patch a été proposé aux
développeurs de plasTeX, et accepté. Mais il faut que cette correction arrive
en production. En attendant, nous utilisons cette version modifiée.
Dés que la correction sera entrée en production, il faudra supprimer ce
fichier, et remplater l'occurence à "patchedbabel" par "babel" dans le fichier
"plastex.py".
La correction à suveiller est la révision
41a48c0c229dd46b69fb0e3720595000a71b17d8 du fichier babel.py :
https://github.com/tiarno/plastex/commit/41a48c0c229dd46b69fb0e3720595000a71b17d8
# Comment vérifier si on peut supprimer ce fichier ?
1) Remplacer l'occurence à patchedbabel par babel dans le fichier plastex.py.
2) Générer un fichier .tex à partir d'un fichier .sb, ce dernier faisant
intervenir des chansons dans lesquelles \selectlanguage est utilisé (par
exemple, "make -B matteo.tex" ou "make -B naheulbeuk.tex" pour des fichiers pas
trop gros.
3) Si l'erreur suivante apparaît, c'est qu'il faut encore attendre.
> Traceback (most recent call last):
> [...]
> File "/usr/lib/pymodules/python2.7/plasTeX/Packages/babel.py", line 18, in
> invoke context.loadLanguage(self.attributes['lang'], self.ownerDocument)
> NameError: global name 'context' is not defined
3 bis) Si elle n'apparait pas : youpi ! Supprimez ce fichier !
# Contact et commentaires
Mercredi 27 mars 2013
Louis <spalax(at)gresille.org>
"""
from plasTeX import Command
# pylint: disable=invalid-name,too-many-public-methods
class selectlanguage(Command):
"""Patch of vanilla selectlanguage class.
See module docstring for more information."""
args = 'lang:str'
def invoke(self, tex):
res = Command.invoke(self, tex)
self.ownerDocument.context.loadLanguage( # pylint: disable=no-member
self.attributes['lang'],
self.ownerDocument
)
return res

70
patacrep/plastex_songs.py

@ -1,70 +0,0 @@
# -*- coding: utf-8 -*-
"""Module to process song LaTeX environment.
"""
import plasTeX
from patacrep import encoding
from patacrep.plastex import process_unbr_spaces
def split_linebreak(texlist):
"""Return a list of alternative title.
A title can be defined with alternative names :
A real name\\
Alternative name\\
Another alternative name
This function takes the object representation of a list of titles, and
return a list of titles.
"""
return_list = []
current = []
for token in texlist:
if token.nodeName == '\\':
return_list.append(current)
current = []
else:
current.append(encoding.basestring2unicode(
process_unbr_spaces(token).textContent
))
if current:
return_list.append(current)
return return_list
class beginsong(plasTeX.Command): # pylint: disable=invalid-name,too-many-public-methods
"""Class parsing the LaTeX song environment."""
args = '{titles}[args:dict]'
def invoke(self, tex):
"""Parse an occurence of song environment."""
plasTeX.Command.invoke(self, tex)
# Parsing title
titles = []
for tokens in split_linebreak(self.attributes['titles'].allChildNodes):
titles.append("".join(tokens))
self.attributes['titles'] = encoding.list2unicode(titles)
# Parsing keyval arguments
args = {}
for (key, val) in self.attributes['args'].iteritems():
if isinstance(val, plasTeX.DOM.Element):
args[key] = encoding.basestring2unicode(
process_unbr_spaces(val).textContent
)
elif isinstance(val, basestring):
args[key] = encoding.basestring2unicode(val)
else:
args[key] = unicode(val)
self.attributes['args'] = args
class sortassong(beginsong): # pylint: disable=invalid-name,too-many-public-methods
r"""Treat '\sortassong' exactly as if it were a '\beginsong'."""
pass

4
patacrep/songs.py

@ -14,7 +14,7 @@ except ImportError:
import pickle
from patacrep.authors import processauthors
from patacrep.plastex import parsetex
from patacrep.latex import parsetex
LOGGER = logging.getLogger(__name__)
@ -113,7 +113,7 @@ class Song(object):
self.fullpath
))
# Data extraction from the song with plastex
# Data extraction from the latex song
data = parsetex(self.fullpath)
self.titles = data['titles']
self.datadir = datadir

3
readme.md

@ -12,8 +12,7 @@ is precised in the header.
# Python version
Patacrep is compatible with Python 2.7 (no Python3 since [one of the
library](http://plastex.sourceforge.net/) we are using is not).
Patacrep is compatible with Python 2.7 (no Python3 yet).
# Download

4
setup.py

@ -24,11 +24,11 @@ SETUP = {"name": 'patacrep',
"scripts": ['songbook'],
"requires": [
"argparse", "codecs", "distutils", "fnmatch", "glob", "json",
"locale", "logging", "os", "plasTeX", "re", "subprocess", "sys",
"locale", "logging", "os", "re", "subprocess", "sys",
"textwrap", "unidecode", "jinja2", "chardet"
],
"install_requires": [
"argparse", "plasTeX", "unidecode", "jinja2", "chardet"
"argparse", "unidecode", "jinja2", "chardet"
],
"package_data": {'patacrep': [ 'data/latex/*',
'data/templates/*',

2
stdeb.cfg

@ -1,5 +1,5 @@
[DEFAULT]
Depends: python-jinja2, python-pkg-resources, python-plastex, python-chardet, python-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended
Depends: python-jinja2, python-pkg-resources, python-chardet, python-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended
Recommends: texlive-lang-english, texlive-lang-french, texlive-lang-portuguese, texlive-lang-spanish, texlive-fonts-extra
XS-Python-Version: >=2.7
Section: tex

Loading…
Cancel
Save