mirror of https://github.com/patacrep/patacrep.git
Louis
10 years ago
11 changed files with 632 additions and 34 deletions
@ -1,23 +1,26 @@ |
|||
# -*- coding: utf-8 -*- |
|||
|
|||
from patacrep.latex.syntax import tex2plain as syntax_tex2plain |
|||
from patacrep.latex.syntax import parsesong as syntax_parsesong |
|||
from patacrep.latex.detex import detex |
|||
from patacrep import encoding |
|||
|
|||
"""Very simple LaTeX parser""" |
|||
|
|||
def latex2unicode(string): |
|||
"""Convert LaTeX string to unicode""" |
|||
return "TODO" |
|||
def tex2plain(string): |
|||
"""Render LaTeX string |
|||
|
|||
Very few commands (mostly diacritics) are interpreted. |
|||
""" |
|||
return syntax_tex2plain(string) |
|||
|
|||
def parsetex(path): |
|||
def parsesong(path): |
|||
"""Return a dictonary of data read from the latex file `path`. |
|||
|
|||
This file is a drop in replacement for an old function. Elle ne devrait pas |
|||
apparaitre telle quelle dans la version finale, une fois que |
|||
https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. |
|||
|
|||
TODO |
|||
""" |
|||
return { |
|||
'titles': ["TODO"], |
|||
'args': {}, |
|||
'languages': ['french', 'english', 'portuguese', 'spanish'], |
|||
} |
|||
|
|||
data = syntax_parsesong(encoding.open_read(path).read(), path) |
|||
data['@path'] = path |
|||
return data |
|||
|
@ -0,0 +1,49 @@ |
|||
class AST: |
|||
|
|||
metadata = None |
|||
|
|||
@classmethod |
|||
def init_metadata(cls): |
|||
cls.metadata = { |
|||
'@languages': set(), |
|||
} |
|||
|
|||
class Expression(AST): |
|||
|
|||
def __init__(self, value): |
|||
super().__init__() |
|||
self.content = [value] |
|||
|
|||
def prepend(self, value): |
|||
if value is not None: |
|||
self.content.insert(0, value) |
|||
return self |
|||
|
|||
def __str__(self): |
|||
return "".join([str(item) for item in self.content]) |
|||
|
|||
class Command(AST): |
|||
|
|||
def __init__(self, name, optional, mandatory): |
|||
self.name = name |
|||
self.mandatory = mandatory |
|||
self.optional = optional |
|||
|
|||
if name == r'\selectlanguage': |
|||
self.metadata['@languages'] |= set(self.mandatory) |
|||
|
|||
def __str__(self): |
|||
if self.name in [r'\emph']: |
|||
return str(self.mandatory[0]) |
|||
return "{}{}{}".format( |
|||
self.name, |
|||
"".join(["[{}]".format(item) for item in self.optional]), |
|||
"".join(["{{{}}}".format(item) for item in self.mandatory]), |
|||
) |
|||
|
|||
|
|||
class BeginSong(AST): |
|||
|
|||
def __init__(self, titles, arguments): |
|||
self.titles = titles |
|||
self.arguments = arguments |
@ -0,0 +1,110 @@ |
|||
MATCH = [ |
|||
# Diacritics: a |
|||
(r"\'a", "á"), |
|||
(r"\'A", "Á"), |
|||
(r"\`a", "à"), |
|||
(r"\`A", "À"), |
|||
(r"\^a", "â"), |
|||
(r"\^A", "Â"), |
|||
(r"\"a", "ä"), |
|||
(r"\"A", "Ä"), |
|||
|
|||
# Diacritics: e |
|||
(r"\'e", "é"), |
|||
(r"\'E", "É"), |
|||
(r"\`e", "è"), |
|||
(r"\`E", "È"), |
|||
(r"\^e", "ê"), |
|||
(r"\^E", "Ê"), |
|||
(r"\"e", "ë"), |
|||
(r"\"E", "Ë"), |
|||
|
|||
# Diacritics: i |
|||
(r"\'i", "í"), |
|||
(r"\'I", "Í"), |
|||
(r"\`i", "ì"), |
|||
(r"\`I", "Ì"), |
|||
(r"\^i", "î"), |
|||
(r"\^I", "Î"), |
|||
(r"\"i", "ï"), |
|||
(r"\"I", "Ï"), |
|||
(r"\'\i", "í"), |
|||
(r"\'\I", "Í"), |
|||
(r"\`\i", "ì"), |
|||
(r"\`\I", "Ì"), |
|||
(r"\^\i", "î"), |
|||
(r"\^\I", "Î"), |
|||
(r"\"\i", "ï"), |
|||
(r"\"\I", "Ï"), |
|||
|
|||
# Diacritics: o |
|||
(r"\'o", "ó"), |
|||
(r"\'O", "Ó"), |
|||
(r"\`o", "ò"), |
|||
(r"\`O", "Ò"), |
|||
(r"\^o", "ô"), |
|||
(r"\^O", "Ô"), |
|||
(r"\"o", "ö"), |
|||
(r"\"O", "Ö"), |
|||
|
|||
# Diacritics: u |
|||
(r"\'u", "ú"), |
|||
(r"\'U", "Ú"), |
|||
(r"\`u", "ù"), |
|||
(r"\`U", "Ù"), |
|||
(r"\^u", "û"), |
|||
(r"\^U", "Û"), |
|||
(r"\"u", "ü"), |
|||
(r"\"U", "Ü"), |
|||
|
|||
# Cedille |
|||
(r"\c c", "ç"), |
|||
(r"\c C", "Ç"), |
|||
|
|||
# œ, æ |
|||
(r"\oe", "œ"), |
|||
(r"\OE", "Œ"), |
|||
(r"\ae", "æ"), |
|||
(r"\AE", "Æ"), |
|||
|
|||
# Spaces |
|||
(r"\ ", " "), |
|||
(r"\,", " "), |
|||
(r"\~", " "), |
|||
|
|||
# IeC |
|||
(r"\IeC ", ""), |
|||
|
|||
# Miscallenous |
|||
(r"\dots", "…"), |
|||
(r"\%", "%"), |
|||
(r"\&", "&"), |
|||
(r"\_", "_"), |
|||
|
|||
] |
|||
|
|||
|
|||
def detex(arg): |
|||
if isinstance(arg, dict): |
|||
return dict([ |
|||
(key, detex(value)) |
|||
for (key, value) |
|||
in arg.items() |
|||
]) |
|||
elif isinstance(arg, list): |
|||
return [ |
|||
detex(item) |
|||
for item |
|||
in arg |
|||
] |
|||
elif isinstance(arg, set): |
|||
return set(detex(list(arg))) |
|||
elif isinstance(arg, str): |
|||
string = arg |
|||
for (latex, plain) in MATCH: |
|||
string = string.replace(latex, plain) |
|||
if '\\' in string: |
|||
print("WARNING: Remaining command in string '{}'.".format(string)) |
|||
return string.strip() |
|||
else: |
|||
return detex(str(arg)) |
@ -0,0 +1,124 @@ |
|||
import ply.lex as lex |
|||
|
|||
tokens = ( |
|||
'LBRACKET', |
|||
'RBRACKET', |
|||
'LBRACE', |
|||
'RBRACE', |
|||
'COMMAND', |
|||
'NEWLINE', |
|||
'COMMA', |
|||
'EQUAL', |
|||
'CHARACTER', |
|||
'SPACE', |
|||
'BEGINSONG', |
|||
'SONG_LTITLE', |
|||
'SONG_RTITLE', |
|||
'SONG_LOPTIONS', |
|||
'SONG_ROPTIONS', |
|||
) |
|||
|
|||
class SimpleLexer: |
|||
|
|||
tokens = tokens |
|||
|
|||
# Regular expression rules for simple tokens |
|||
t_LBRACKET = r'\[' |
|||
t_RBRACKET = r'\]' |
|||
t_LBRACE = r'{' |
|||
t_RBRACE = r'}' |
|||
t_COMMAND = r'\\([@a-zA-Z]+|[^\\])' |
|||
t_NEWLINE = r'\\\\' |
|||
SPECIAL_CHARACTERS = ( |
|||
t_LBRACKET + |
|||
t_RBRACKET + |
|||
t_RBRACE + |
|||
t_LBRACE + |
|||
r"\\" + |
|||
r" " + |
|||
r"\n" + |
|||
r"\r" + |
|||
r"%" + |
|||
r"=" + |
|||
r"," |
|||
) |
|||
t_CHARACTER = r'[^{}]'.format(SPECIAL_CHARACTERS) |
|||
t_EQUAL = r'=' |
|||
t_COMMA = r',' |
|||
|
|||
t_SPACE = r'[ \t\n\r]+' |
|||
|
|||
def __init__(self): |
|||
self.__class__.lexer = lex.lex(module = self) |
|||
|
|||
# Define a rule so we can track line numbers |
|||
def t_newline(self, t): |
|||
r'\n+' |
|||
t.lexer.lineno += len(t.value) |
|||
|
|||
def t_comment(self, t): |
|||
r'%.*' |
|||
pass |
|||
|
|||
# Error handling rule |
|||
def t_error(self, t): |
|||
print("Illegal character '%s'" % t.value[0]) # TODO log |
|||
t.lexer.skip(1) |
|||
|
|||
class SongLexer(SimpleLexer): |
|||
|
|||
states = ( |
|||
('beginsong', 'inclusive'), |
|||
) |
|||
|
|||
# State beginsong |
|||
def t_INITIAL_BEGINSONG(self, t): |
|||
r'\\beginsong' |
|||
t.lexer.push_state('beginsong') |
|||
t.lexer.open_brackets = 0 |
|||
t.lexer.open_braces = 0 |
|||
return t |
|||
|
|||
def t_beginsong_LBRACKET(self, t): |
|||
r'\[' |
|||
if t.lexer.open_brackets == 0: |
|||
t.type = 'SONG_LOPTIONS' |
|||
t.lexer.open_braces += 1 # TODO Explain |
|||
t.lexer.open_brackets += 1 |
|||
return t |
|||
|
|||
def t_beginsong_RBRACKET(self, t): |
|||
r'\]' |
|||
t.lexer.open_brackets -= 1 |
|||
if t.lexer.open_brackets == 0: |
|||
t.type = 'SONG_ROPTIONS' |
|||
t.lexer.open_braces -= 1 # TODO Explain |
|||
t.lexer.pop_state() |
|||
for __ignored in t.lexer: # TODO Explain |
|||
pass |
|||
return t |
|||
|
|||
def t_beginsong_LBRACE(self, t): |
|||
r'{' |
|||
if t.lexer.open_braces == 0: |
|||
t.type = 'SONG_LTITLE' |
|||
t.lexer.open_braces += 1 |
|||
return t |
|||
|
|||
def t_beginsong_RBRACE1(self, t): |
|||
r'}(?![ \t\r\n]*\[)' |
|||
t.lexer.open_braces -= 1 |
|||
t.type = 'RBRACE' |
|||
if t.lexer.open_braces == 0: |
|||
t.lexer.pop_state() |
|||
t.type = 'SONG_RTITLE' |
|||
return t |
|||
|
|||
def t_beginsong_RBRACE2(self, t): |
|||
r'}(?=[ \t\r\n]*\[)' |
|||
t.lexer.open_braces -= 1 |
|||
t.type = 'RBRACE' |
|||
if t.lexer.open_braces == 0: |
|||
t.type = 'SONG_RTITLE' |
|||
return t |
|||
|
@ -0,0 +1,72 @@ |
|||
|
|||
# parsetab.py |
|||
# This file is automatically generated. Do not edit. |
|||
_tabversion = '3.2' |
|||
|
|||
_lr_method = 'LALR' |
|||
|
|||
_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10' |
|||
|
|||
_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),} |
|||
|
|||
_lr_action = { } |
|||
for _k, _v in _lr_action_items.items(): |
|||
for _x,_y in zip(_v[0],_v[1]): |
|||
if not _x in _lr_action: _lr_action[_x] = { } |
|||
_lr_action[_x][_k] = _y |
|||
del _lr_action_items |
|||
|
|||
_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),} |
|||
|
|||
_lr_goto = { } |
|||
for _k, _v in _lr_goto_items.items(): |
|||
for _x,_y in zip(_v[0],_v[1]): |
|||
if not _x in _lr_goto: _lr_goto[_x] = { } |
|||
_lr_goto[_x][_k] = _y |
|||
del _lr_goto_items |
|||
_lr_productions = [ |
|||
("S' -> expression","S'",1,None,None,None), |
|||
('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8), |
|||
('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9), |
|||
('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10), |
|||
('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11), |
|||
('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12), |
|||
('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13), |
|||
('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14), |
|||
('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15), |
|||
('empty -> <empty>','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26), |
|||
('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30), |
|||
('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34), |
|||
('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38), |
|||
('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42), |
|||
('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43), |
|||
('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52), |
|||
('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53), |
|||
('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62), |
|||
('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63), |
|||
('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64), |
|||
('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69), |
|||
('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70), |
|||
('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78), |
|||
('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82), |
|||
('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83), |
|||
('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91), |
|||
('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92), |
|||
('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100), |
|||
('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101), |
|||
('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111), |
|||
('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112), |
|||
('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120), |
|||
('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121), |
|||
('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126), |
|||
('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127), |
|||
('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135), |
|||
('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139), |
|||
('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140), |
|||
('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148), |
|||
('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149), |
|||
('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150), |
|||
('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151), |
|||
('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152), |
|||
('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153), |
|||
] |
@ -0,0 +1,194 @@ |
|||
import ply.yacc as yacc |
|||
import inspect # TODO supprimer |
|||
|
|||
from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer |
|||
from patacrep.latex import ast |
|||
from patacrep.latex.detex import detex |
|||
|
|||
class Parser: |
|||
|
|||
def __init__(self, filename=None): |
|||
self.tokens = tokens |
|||
self.ast = ast.AST |
|||
self.ast.init_metadata() |
|||
self.filename = filename |
|||
|
|||
def __find_column(self, token): |
|||
last_cr = token.lexer.lexdata.rfind('\n',0,token.lexpos) |
|||
if last_cr < 0: |
|||
last_cr = 0 |
|||
column = (token.lexpos - last_cr) + 1 |
|||
return column |
|||
|
|||
def p_error(self, p): |
|||
print("Erreur fichier {}, ligne {}, position {}.".format( # TODO |
|||
str(self.filename), |
|||
p.lineno, |
|||
self.__find_column(p), |
|||
) |
|||
) |
|||
|
|||
def p_expression(self, p): |
|||
"""expression : brackets expression |
|||
| braces expression |
|||
| command expression |
|||
| NEWLINE expression |
|||
| beginsong expression |
|||
| word expression |
|||
| SPACE expression |
|||
| empty |
|||
""" |
|||
if len(p) == 3: |
|||
if p[2] is None: |
|||
p[0] = ast.Expression(p[1]) |
|||
else: |
|||
p[0] = p[2].prepend(p[1]) |
|||
else: |
|||
p[0] = None |
|||
|
|||
def p_empty(self, p): |
|||
"""empty :""" |
|||
return None |
|||
|
|||
def p_brackets(self, p): |
|||
"""brackets : LBRACKET expression RBRACKET""" |
|||
p[0] = p[2] |
|||
|
|||
def p_braces(self, p): |
|||
"""braces : LBRACE expression RBRACE""" |
|||
p[0] = p[2] |
|||
|
|||
def p_command(self, p): |
|||
"""command : COMMAND brackets_list braces_list""" |
|||
p[0] = ast.Command(p[1], p[2], p[3]) |
|||
|
|||
def p_brackets_list(self, p): |
|||
"""brackets_list : brackets brackets_list |
|||
| empty |
|||
""" |
|||
if len(p) == 3: |
|||
p[0] = p[2] |
|||
p[0].insert(0, p[1]) |
|||
else: |
|||
p[0] = [] |
|||
|
|||
def p_braces_list(self, p): |
|||
"""braces_list : braces braces_list |
|||
| empty |
|||
""" |
|||
if len(p) == 3: |
|||
p[0] = p[2] |
|||
p[0].insert(0, p[1]) |
|||
else: |
|||
p[0] = [] |
|||
|
|||
def p_word(self, p): |
|||
"""word : CHARACTER word_next |
|||
| COMMA word_next |
|||
| EQUAL word_next |
|||
""" |
|||
p[0] = p[1] + p[2] |
|||
|
|||
def p_word_next(self, p): |
|||
"""word_next : CHARACTER word_next |
|||
| empty |
|||
""" |
|||
if len(p) == 2: |
|||
p[0] = "" |
|||
else: |
|||
p[0] = p[1] + p[2] |
|||
|
|||
def p_beginsong(self, p): |
|||
"""beginsong : BEGINSONG separator songbraces separator songbrackets""" |
|||
self.ast.metadata["@titles"] = p[3] |
|||
self.ast.metadata.update(p[5]) |
|||
|
|||
def p_songbrackets(self, p): |
|||
"""songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS |
|||
| empty |
|||
""" |
|||
if len(p) == 6: |
|||
p[0] = p[3] |
|||
else: |
|||
p[0] = {} |
|||
|
|||
def p_songbraces(self, p): |
|||
"""songbraces : SONG_LTITLE separator titles separator SONG_RTITLE |
|||
| empty |
|||
""" |
|||
if len(p) == 6: |
|||
p[0] = p[3] |
|||
else: |
|||
p[0] = [] |
|||
|
|||
def p_dictionary(self, p): |
|||
"""dictionary : identifier EQUAL braces dictionary_next |
|||
| identifier EQUAL error dictionary_next |
|||
""" |
|||
if isinstance(p[3], ast.Expression): |
|||
p[0] = {} |
|||
p[0][p[1]] = p[3] |
|||
p[0].update(p[4]) |
|||
else: |
|||
raise Exception("Do enclose arguments between braces.") # TODO |
|||
|
|||
def p_identifier(self, p): |
|||
"""identifier : CHARACTER identifier |
|||
| empty |
|||
""" |
|||
if len(p) == 2: |
|||
p[0] = "" |
|||
else: |
|||
p[0] = p[1] + p[2] |
|||
|
|||
def p_separator(self, p): |
|||
"""separator : SPACE |
|||
| empty |
|||
""" |
|||
p[0] = None |
|||
|
|||
def p_dictonary_next(self, p): |
|||
"""dictionary_next : separator COMMA separator dictionary |
|||
| empty |
|||
""" |
|||
if len(p) == 5: |
|||
p[0] = p[4] |
|||
else: |
|||
p[0] = {} |
|||
|
|||
def p_titles(self, p): |
|||
"""titles : title titles_next""" |
|||
p[0] = [p[1]] + p[2] |
|||
|
|||
def p_titles_next(self, p): |
|||
"""titles_next : NEWLINE title titles_next |
|||
| empty |
|||
""" |
|||
if len(p) == 2: |
|||
p[0] = [] |
|||
else: |
|||
p[0] = [p[2]] + p[3] |
|||
|
|||
def p_title(self, p): |
|||
"""title : brackets title |
|||
| braces title |
|||
| command title |
|||
| word title |
|||
| SPACE title |
|||
| empty |
|||
""" |
|||
if len(p) == 2: |
|||
p[0] = None |
|||
else: |
|||
if p[2] is None: |
|||
p[0] = ast.Expression(p[1]) |
|||
else: |
|||
p[0] = p[2].prepend(p[1]) |
|||
|
|||
|
|||
def tex2plain(string): |
|||
return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer)) |
|||
|
|||
def parsesong(string, filename=None): |
|||
return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata) |
|||
|
@ -0,0 +1,50 @@ |
|||
|
|||
# Test it out |
|||
song = r""" |
|||
\selectlanguage{french} |
|||
plop = tag |
|||
% Un commentaire |
|||
\columns{3} |
|||
\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre |
|||
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre |
|||
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre} |
|||
|
|||
Dans [Dm6]cette ruedots [E7] |
|||
""" |
|||
|
|||
isong = r""" |
|||
\selectlanguage{french} |
|||
\songcolumns{2} |
|||
\beginsong{Tous les bateaux, tous les oiseaux} |
|||
[by={Michel Polnareff},cov={passe-present},album={Passé Présent}] |
|||
|
|||
Dans \[Dm6]cette ruedots [E7] |
|||
""" |
|||
|
|||
tex = "D\\^iacritiqu\\'Es" |
|||
|
|||
# Give the lexer some input |
|||
#if 0: |
|||
# from syntax import parser |
|||
# print(parser.parse(data, debug=0)) |
|||
# print(parser.parse(data).song_data()) |
|||
#else: |
|||
# from lexer import SimpleLexer |
|||
# lexer.input(data) |
|||
# for tok in lexer: |
|||
# print(tok) |
|||
|
|||
from patacrep.latex import tex2plain |
|||
from patacrep.latex.syntax import parsesong |
|||
from patacrep.latex.ast import AST |
|||
|
|||
print(tex2plain(tex) == "DîacritiquÉs") |
|||
print(parsesong(song, AST)) |
|||
print({ |
|||
"@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"], |
|||
"@languages": set(["french"]), |
|||
"@path": "TODO", |
|||
"album": "Tagada tsoin ïtsoin", |
|||
"cov": "pouf.png", |
|||
} |
|||
) |
Loading…
Reference in new issue