mirror of https://github.com/patacrep/patacrep.git
Louis
10 years ago
12 changed files with 31 additions and 451 deletions
@ -0,0 +1,23 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
"""Very simple LaTeX parser""" |
|||
|
|||
def latex2unicode(string): |
|||
"""Convert LaTeX string to unicode""" |
|||
return u"TODO" |
|||
|
|||
def parsetex(path): |
|||
"""Return a dictonary of data read from the latex file `path`. |
|||
|
|||
This file is a drop in replacement for an old function. Elle ne devrait pas |
|||
apparaitre telle quelle dans la version finale, une fois que |
|||
https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. |
|||
|
|||
TODO |
|||
""" |
|||
return { |
|||
'titles': ["TODO"], |
|||
'args': {}, |
|||
'languages': ['french'], |
|||
} |
|||
|
@ -1,117 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
"""PlasTeX module to process song files.""" |
|||
|
|||
from plasTeX.TeX import TeX |
|||
from plasTeX.Base.LaTeX import Sentences |
|||
|
|||
import locale |
|||
import os |
|||
import sys |
|||
|
|||
from patacrep import encoding |
|||
|
|||
def process_unbr_spaces(node): |
|||
#pylint: disable=line-too-long |
|||
r"""Replace '~' and '\ ' in node by nodes that |
|||
will be rendered as unbreakable space. |
|||
|
|||
Return node object for convenience. |
|||
|
|||
This function is a workaround to a bug that has been solved since: |
|||
- https://github.com/tiarno/plastex/commit/76bb78d5fbaac48e68025a3545286cc63cb4e7ad |
|||
- https://github.com/tiarno/plastex/commit/682a0d223b99d6b949bacf1c974d24dc9bb1d18e |
|||
|
|||
It can be deleted once this bug has been merged in production version of |
|||
PlasTeX. |
|||
""" |
|||
if (type(node) == Sentences.InterWordSpace or |
|||
(type(node) == Sentences.NoLineBreak and node.source == '~ ')): |
|||
node.unicode = unichr(160) |
|||
for child in node.childNodes: |
|||
process_unbr_spaces(child) |
|||
|
|||
return node |
|||
|
|||
|
|||
def simpleparse(text): |
|||
"""Parse a simple LaTeX string. |
|||
""" |
|||
tex = TeX() |
|||
tex.disableLogging() |
|||
tex.input(text) |
|||
doc = tex.parse() |
|||
return process_unbr_spaces(doc.textContent) |
|||
|
|||
|
|||
class SongParser(object): |
|||
"""Analyseur syntaxique de fichiers .sg""" |
|||
|
|||
@staticmethod |
|||
def create_tex(): |
|||
"""Create a TeX object, ready to parse a tex file.""" |
|||
tex = TeX() |
|||
tex.disableLogging() |
|||
tex.ownerDocument.context.loadBaseMacros() |
|||
sys.path.append(os.path.dirname(__file__)) |
|||
tex.ownerDocument.context.loadPackage(tex, "plastex_patchedbabel") |
|||
tex.ownerDocument.context.loadPackage(tex, "plastex_chord") |
|||
tex.ownerDocument.context.loadPackage(tex, "plastex_songs") |
|||
tex.ownerDocument.context.loadPackage(tex, "plastex_misc_commands") |
|||
sys.path.pop() |
|||
return tex |
|||
|
|||
@classmethod |
|||
def parse(cls, filename): |
|||
"""Parse a TeX file, and return its plasTeX representation.""" |
|||
tex = cls.create_tex() |
|||
tex.input(encoding.open_read(filename, 'r')) |
|||
return tex.parse() |
|||
|
|||
|
|||
def parsetex(filename): |
|||
r"""Analyse syntaxique d'un fichier .sg |
|||
|
|||
Renvoie un dictionnaire contenant les métadonnées lues dans le fichier. Les |
|||
clefs sont : |
|||
- languages: l'ensemble des langages utilisés (recherche des |
|||
\selectlanguages{}) ; |
|||
- titles: la liste des titres ; |
|||
- args: le dictionnaire des paramètres passés à \beginsong. |
|||
""" |
|||
# /* BEGIN plasTeX patch |
|||
# The following lines, and another line a few lines later, are used to |
|||
# circumvent a plasTeX bug. It has been reported and corrected : |
|||
# https://github.com/tiarno/plastex/commit/8f4e5a385f3cb6a04d5863f731ce24a7e856f2a4 |
|||
# To see if you can delete those lines, set your LC_TIME locale to French, |
|||
# during a month containing diacritics (e.g. Février), and run songbook. If |
|||
# no plasTeX bug appears, it is safe to remove those lines. |
|||
oldlocale = locale.getlocale(locale.LC_TIME) |
|||
locale.setlocale(locale.LC_TIME, 'C') |
|||
# plasTeX patch END */ |
|||
|
|||
# Analyse syntaxique |
|||
doc = SongParser.parse(filename) |
|||
|
|||
# /* BEGIN plasTeX patch |
|||
if oldlocale[0] and oldlocale[1]: |
|||
try: |
|||
locale.setlocale(locale.LC_TIME, "%s.%s" % oldlocale) |
|||
except locale.Error: |
|||
pass # Workaround a bug on windows |
|||
# plasTeX patch END */ |
|||
|
|||
# Extraction des données |
|||
data = { |
|||
"languages": set(), |
|||
"_doc": doc, |
|||
"_filename": filename, |
|||
} |
|||
for node in doc.allChildNodes: |
|||
if node.nodeName == "selectlanguage": |
|||
data["languages"].add(node.attributes['lang']) |
|||
if node.nodeName in ["beginsong", "sortassong"]: |
|||
data["titles"] = node.attributes["titles"] |
|||
data["args"] = node.attributes["args"] |
|||
|
|||
return data |
@ -1,181 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
r"""PlasTeX module to deal with chords commands of the songs LaTeX package |
|||
|
|||
Chords are set using commands like \[C]. This package parses those commands. |
|||
""" |
|||
|
|||
import logging |
|||
|
|||
import plasTeX |
|||
from plasTeX import Command, Environment, Macro |
|||
from plasTeX.Base.LaTeX.Math import BeginDisplayMath |
|||
|
|||
LOGGER = logging.getLogger(__name__) |
|||
|
|||
# Count the number of levels of 'verse' environment: IN_VERSE==1 means that we |
|||
# are in a 'verse' environment; IN_VERSE==2 means that we are in two included |
|||
# 'verse' environment, and so on. |
|||
IN_VERSE = 0 |
|||
|
|||
def wrap_displaymath(cls): |
|||
"""Decorator to store the depth of 'verse' environment |
|||
|
|||
In the invoke() method classes, global variable IN_VERSE indicates the |
|||
number of 'verse' (or 'chorus' or 'verse*') environment we are in. |
|||
""" |
|||
|
|||
# pylint: disable=no-init,too-few-public-methods |
|||
class WrappedClass(cls): |
|||
"""Wrapper to LaTeX environment updating IN_VERSE""" |
|||
blockType = True |
|||
# pylint: disable=super-on-old-class,global-statement,no-member |
|||
def invoke(self, tex): |
|||
"""Wrapper to invoke() to update global variable IN_VERSE.""" |
|||
global IN_VERSE |
|||
if self.macroMode == Macro.MODE_BEGIN: |
|||
self.ownerDocument.context.push() |
|||
self.ownerDocument.context.catcode("\n", 13) |
|||
IN_VERSE += 1 |
|||
|
|||
# Removing spaces and line breaks at the beginning of verse |
|||
token = None |
|||
for token in tex: |
|||
if not match_space(token): |
|||
break |
|||
if token is not None: |
|||
tex.pushToken(token) |
|||
|
|||
else: |
|||
self.ownerDocument.context.pop() |
|||
IN_VERSE -= 1 |
|||
return super(WrappedClass, self).invoke(tex) |
|||
return WrappedClass |
|||
|
|||
# pylint: disable=too-many-public-methods |
|||
@wrap_displaymath |
|||
class Verse(Environment): |
|||
"""LaTeX 'verse' environment""" |
|||
macroName = 'verse' |
|||
|
|||
# pylint: disable=too-many-public-methods |
|||
@wrap_displaymath |
|||
class VerseStar(Environment): |
|||
"""LaTeX 'verse*' environment""" |
|||
macroName = 'verse*' |
|||
|
|||
# pylint: disable=too-many-public-methods |
|||
@wrap_displaymath |
|||
class Chorus(Environment): |
|||
"""LaTeX 'chorus' environment""" |
|||
macroName = 'chorus' |
|||
|
|||
def match_space(token): |
|||
"""Return True if token is a space or newline character.""" |
|||
return ( |
|||
isinstance(token, plasTeX.Tokenizer.Space) |
|||
or token.nodeName == 'active::\n' |
|||
) |
|||
|
|||
def match_closing_square_bracket(token): |
|||
"""Return True if token is character ']'.""" |
|||
return token.nodeType == token.TEXT_NODE and token.nodeValue == ']' |
|||
|
|||
def match_egroup(token): |
|||
"""Return True if token is of type `egroup` (end of group).""" |
|||
return isinstance(token, plasTeX.Base.Text.egroup) #pylint: disable=no-member |
|||
|
|||
def match_space_or_chord(token): |
|||
"""Return True if token is a space or a chord.""" |
|||
return match_space(token) or isinstance(token, Chord) |
|||
|
|||
def parse_until(tex, end=lambda x: False): |
|||
"""Parse `tex` until condition `end`, or `egroup` is met. |
|||
|
|||
Arguments: |
|||
- tex: object to parse |
|||
- end: function taking a token in argument, and returning a boolean. |
|||
Parsing stops when this function returns True, or an `egroup` is met. |
|||
|
|||
Return: a tuple of two items (the list of parsed tokens, last token). This |
|||
is done so that caller can decide whether they want to discard it or not. |
|||
Last token can be None if everything has been parsed without the end |
|||
condition being met. |
|||
""" |
|||
parsed = [] |
|||
last = None |
|||
for token in tex: |
|||
if end(token) or match_egroup(token): |
|||
last = token |
|||
break |
|||
elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member |
|||
# pylint: disable=expression-not-assigned |
|||
[token.appendChild(item) for item in parse_until(tex, match_egroup)[0]] |
|||
parsed.append(token) |
|||
return (parsed, last) |
|||
|
|||
|
|||
class Chord(Command): |
|||
"""Beginning of a chord notation""" |
|||
macroName = 'chord' |
|||
macroMode = Command.MODE_NONE |
|||
|
|||
class BeginChordOrDisplayMath(BeginDisplayMath): |
|||
r"""Wrapper to BeginDisplayMath |
|||
|
|||
In a 'verse' (or 'verse*' or 'chorus') environment, the '\[' macro |
|||
displays a chord. Otherwise, it corresponds to the usual LaTeX math mode. |
|||
This class calls the right method, depending on the inclusion of this |
|||
macro in a verse environment. |
|||
""" |
|||
macroName = '[' |
|||
|
|||
def invoke(self, tex): |
|||
"""Process this macro""" |
|||
if IN_VERSE: |
|||
chord = Chord() |
|||
|
|||
self.ownerDocument.context.push() #pylint: disable=no-member |
|||
self.ownerDocument.context.catcode("&", 13) #pylint: disable=no-member |
|||
chord.setAttribute( |
|||
'name', |
|||
parse_until(tex, match_closing_square_bracket)[0], |
|||
) |
|||
self.ownerDocument.context.pop() #pylint: disable=no-member |
|||
|
|||
token = next(iter(tex), None) |
|||
if token is None: |
|||
return [chord] |
|||
elif match_space(token): |
|||
return [chord, token] |
|||
elif ( |
|||
isinstance(token, Verse) |
|||
or isinstance(token, VerseStar) |
|||
or isinstance(token, Chorus) |
|||
): |
|||
LOGGER.warning(( |
|||
"{} L{}: '\\end{{verse}}' (or 'verse*' or 'chorus') not " |
|||
"allowed directly after '\\['." |
|||
).format(tex.filename, tex.lineNumber) |
|||
) |
|||
return [chord] |
|||
elif isinstance(token, Chord): |
|||
token.attributes['name'] = ( |
|||
chord.attributes['name'] |
|||
+ token.attributes['name'] |
|||
) |
|||
chord = token |
|||
return [chord] |
|||
elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member |
|||
# pylint: disable=expression-not-assigned |
|||
[chord.appendChild(item) for item in parse_until(tex)[0]] |
|||
return [chord] |
|||
else: |
|||
chord.appendChild(token) |
|||
(parsed, last) = parse_until(tex, match_space_or_chord) |
|||
# pylint: disable=expression-not-assigned |
|||
[chord.appendChild(item) for item in parsed] |
|||
return [chord, last] |
|||
else: |
|||
return super(BeginChordOrDisplayMath, self).invoke(tex) |
|||
|
@ -1,15 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
"""Quick management of random LaTeX commands.""" |
|||
|
|||
from plasTeX import Command |
|||
|
|||
# pylint: disable=invalid-name,too-many-public-methods |
|||
class songcolumns(Command): |
|||
r"""Manage `\songcolumns` command""" |
|||
args = '{num:int}' |
|||
|
|||
# pylint: disable=invalid-name,too-many-public-methods |
|||
class gtab(Command): |
|||
r"""Manage `\gta` command""" |
|||
args = '{chord:str}{diagram:str}' |
@ -1,58 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
r"""Patch pour le paquet Babel de PlasTeX |
|||
|
|||
Un bug dans PlasTeX intervient lorsqu'on essaye d'analyser une commande LaTeX |
|||
\selectlanguage{}, que nous voulons utiliser ici. Un patch a été proposé aux |
|||
développeurs de plasTeX, et accepté. Mais il faut que cette correction arrive |
|||
en production. En attendant, nous utilisons cette version modifiée. |
|||
|
|||
Dés que la correction sera entrée en production, il faudra supprimer ce |
|||
fichier, et remplater l'occurence à "patchedbabel" par "babel" dans le fichier |
|||
"plastex.py". |
|||
La correction à suveiller est la révision |
|||
41a48c0c229dd46b69fb0e3720595000a71b17d8 du fichier babel.py : |
|||
https://github.com/tiarno/plastex/commit/41a48c0c229dd46b69fb0e3720595000a71b17d8 |
|||
|
|||
# Comment vérifier si on peut supprimer ce fichier ? |
|||
|
|||
1) Remplacer l'occurence à patchedbabel par babel dans le fichier plastex.py. |
|||
|
|||
2) Générer un fichier .tex à partir d'un fichier .sb, ce dernier faisant |
|||
intervenir des chansons dans lesquelles \selectlanguage est utilisé (par |
|||
exemple, "make -B matteo.tex" ou "make -B naheulbeuk.tex" pour des fichiers pas |
|||
trop gros. |
|||
|
|||
3) Si l'erreur suivante apparaît, c'est qu'il faut encore attendre. |
|||
|
|||
> Traceback (most recent call last): |
|||
> [...] |
|||
> File "/usr/lib/pymodules/python2.7/plasTeX/Packages/babel.py", line 18, in |
|||
> invoke context.loadLanguage(self.attributes['lang'], self.ownerDocument) |
|||
> NameError: global name 'context' is not defined |
|||
|
|||
3 bis) Si elle n'apparait pas : youpi ! Supprimez ce fichier ! |
|||
|
|||
# Contact et commentaires |
|||
|
|||
Mercredi 27 mars 2013 |
|||
Louis <spalax(at)gresille.org> |
|||
|
|||
""" |
|||
|
|||
from plasTeX import Command |
|||
|
|||
# pylint: disable=invalid-name,too-many-public-methods |
|||
class selectlanguage(Command): |
|||
"""Patch of vanilla selectlanguage class. |
|||
|
|||
See module docstring for more information.""" |
|||
args = 'lang:str' |
|||
|
|||
def invoke(self, tex): |
|||
res = Command.invoke(self, tex) |
|||
self.ownerDocument.context.loadLanguage( # pylint: disable=no-member |
|||
self.attributes['lang'], |
|||
self.ownerDocument |
|||
) |
|||
return res |
@ -1,70 +0,0 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
"""Module to process song LaTeX environment. |
|||
""" |
|||
|
|||
import plasTeX |
|||
|
|||
from patacrep import encoding |
|||
from patacrep.plastex import process_unbr_spaces |
|||
|
|||
|
|||
def split_linebreak(texlist): |
|||
"""Return a list of alternative title. |
|||
|
|||
A title can be defined with alternative names : |
|||
|
|||
A real name\\ |
|||
Alternative name\\ |
|||
Another alternative name |
|||
|
|||
This function takes the object representation of a list of titles, and |
|||
return a list of titles. |
|||
""" |
|||
return_list = [] |
|||
current = [] |
|||
for token in texlist: |
|||
if token.nodeName == '\\': |
|||
return_list.append(current) |
|||
current = [] |
|||
else: |
|||
current.append(encoding.basestring2unicode( |
|||
process_unbr_spaces(token).textContent |
|||
)) |
|||
if current: |
|||
return_list.append(current) |
|||
return return_list |
|||
|
|||
|
|||
class beginsong(plasTeX.Command): # pylint: disable=invalid-name,too-many-public-methods |
|||
"""Class parsing the LaTeX song environment.""" |
|||
|
|||
args = '{titles}[args:dict]' |
|||
|
|||
def invoke(self, tex): |
|||
"""Parse an occurence of song environment.""" |
|||
|
|||
plasTeX.Command.invoke(self, tex) |
|||
|
|||
# Parsing title |
|||
titles = [] |
|||
for tokens in split_linebreak(self.attributes['titles'].allChildNodes): |
|||
titles.append("".join(tokens)) |
|||
self.attributes['titles'] = encoding.list2unicode(titles) |
|||
|
|||
# Parsing keyval arguments |
|||
args = {} |
|||
for (key, val) in self.attributes['args'].iteritems(): |
|||
if isinstance(val, plasTeX.DOM.Element): |
|||
args[key] = encoding.basestring2unicode( |
|||
process_unbr_spaces(val).textContent |
|||
) |
|||
elif isinstance(val, basestring): |
|||
args[key] = encoding.basestring2unicode(val) |
|||
else: |
|||
args[key] = unicode(val) |
|||
self.attributes['args'] = args |
|||
|
|||
class sortassong(beginsong): # pylint: disable=invalid-name,too-many-public-methods |
|||
r"""Treat '\sortassong' exactly as if it were a '\beginsong'.""" |
|||
pass |
Loading…
Reference in new issue