Browse Source

[WIP] LaTeX files are parsed again

pull/66/head
Louis 11 years ago
parent
commit
baedd3b236
  1. 17
      patacrep/build.py
  2. 2
      patacrep/content/sorted.py
  3. 4
      patacrep/index.py
  4. 27
      patacrep/latex/__init__.py
  5. 49
      patacrep/latex/ast.py
  6. 110
      patacrep/latex/detex.py
  7. 124
      patacrep/latex/lexer.py
  8. 72
      patacrep/latex/parsetab.py
  9. 194
      patacrep/latex/syntax.py
  10. 50
      patacrep/latex/testing.py
  11. 17
      patacrep/songs.py

17
patacrep/build.py

@ -50,7 +50,6 @@ class Songbook(object):
super(Songbook, self).__init__() super(Songbook, self).__init__()
self.config = raw_songbook self.config = raw_songbook
self.basename = basename self.basename = basename
self.contentlist = []
# Some special keys have their value processed. # Some special keys have their value processed.
self._set_datadir() self._set_datadir()
@ -86,7 +85,7 @@ class Songbook(object):
- output: a file object, in which the file will be written. - output: a file object, in which the file will be written.
""" """
# Updating configuration # Updating configuration
config = DEFAULT_CONFIG config = DEFAULT_CONFIG.copy()
config.update(self.config) config.update(self.config)
renderer = TexRenderer( renderer = TexRenderer(
config['template'], config['template'],
@ -100,18 +99,16 @@ class Songbook(object):
copy.deepcopy(config['authwords']) copy.deepcopy(config['authwords'])
) )
self.config = config
# Configuration set # Configuration set
self.contentlist = content.process_content( config['render_content'] = content.render_content
self.config.get('content', []), config['content'] = content.process_content(
self.config, config.get('content', []),
config,
) )
self.config['render_content'] = content.render_content config['filename'] = output.name[:-4]
self.config['content'] = self.contentlist
self.config['filename'] = output.name[:-4]
renderer.render_tex(output, self.config) renderer.render_tex(output, config)
class SongbookBuilder(object): class SongbookBuilder(object):

2
patacrep/content/sorted.py

@ -55,7 +55,7 @@ def key_generator(sort):
field = song.authors field = song.authors
else: else:
try: try:
field = song.args[key] field = song.data[key]
except KeyError: except KeyError:
LOGGER.debug( LOGGER.debug(
"Ignoring unknown key '{}' for song {}.".format( "Ignoring unknown key '{}' for song {}.".format(

4
patacrep/index.py

@ -13,7 +13,7 @@ import re
from patacrep import authors from patacrep import authors
from patacrep import encoding from patacrep import encoding
from patacrep.latex import latex2unicode from patacrep.latex import tex2plain
EOL = "\n" EOL = "\n"
@ -113,7 +113,7 @@ class Index(object):
if not key in self.data[first]: if not key in self.data[first]:
self.data[first][key] = { self.data[first][key] = {
'sortingkey': [ 'sortingkey': [
unidecode.unidecode(latex2unicode(item)).lower() unidecode.unidecode(tex2plain(item)).lower()
for item in key for item in key
], ],
'entries': [], 'entries': [],

27
patacrep/latex/__init__.py

@ -1,23 +1,26 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from patacrep.latex.syntax import tex2plain as syntax_tex2plain
from patacrep.latex.syntax import parsesong as syntax_parsesong
from patacrep.latex.detex import detex
from patacrep import encoding
"""Very simple LaTeX parser""" """Very simple LaTeX parser"""
def latex2unicode(string): def tex2plain(string):
"""Convert LaTeX string to unicode""" """Render LaTeX string
return "TODO"
Very few commands (mostly diacritics) are interpreted.
"""
return syntax_tex2plain(string)
def parsetex(path): def parsesong(path):
"""Return a dictonary of data read from the latex file `path`. """Return a dictonary of data read from the latex file `path`.
This file is a drop in replacement for an old function. Elle ne devrait pas This file is a drop in replacement for an old function. Elle ne devrait pas
apparaitre telle quelle dans la version finale, une fois que apparaitre telle quelle dans la version finale, une fois que
https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. https://github.com/patacrep/patacrep/issues/64 aura été pris en compte.
TODO
""" """
return { data = syntax_parsesong(encoding.open_read(path).read(), path)
'titles': ["TODO"], data['@path'] = path
'args': {}, return data
'languages': ['french', 'english', 'portuguese', 'spanish'],
}

49
patacrep/latex/ast.py

@ -0,0 +1,49 @@
class AST:
metadata = None
@classmethod
def init_metadata(cls):
cls.metadata = {
'@languages': set(),
}
class Expression(AST):
def __init__(self, value):
super().__init__()
self.content = [value]
def prepend(self, value):
if value is not None:
self.content.insert(0, value)
return self
def __str__(self):
return "".join([str(item) for item in self.content])
class Command(AST):
def __init__(self, name, optional, mandatory):
self.name = name
self.mandatory = mandatory
self.optional = optional
if name == r'\selectlanguage':
self.metadata['@languages'] |= set(self.mandatory)
def __str__(self):
if self.name in [r'\emph']:
return str(self.mandatory[0])
return "{}{}{}".format(
self.name,
"".join(["[{}]".format(item) for item in self.optional]),
"".join(["{{{}}}".format(item) for item in self.mandatory]),
)
class BeginSong(AST):
def __init__(self, titles, arguments):
self.titles = titles
self.arguments = arguments

110
patacrep/latex/detex.py

@ -0,0 +1,110 @@
MATCH = [
# Diacritics: a
(r"\'a", "á"),
(r"\'A", "Á"),
(r"\`a", "à"),
(r"\`A", "À"),
(r"\^a", "â"),
(r"\^A", "Â"),
(r"\"a", "ä"),
(r"\"A", "Ä"),
# Diacritics: e
(r"\'e", "é"),
(r"\'E", "É"),
(r"\`e", "è"),
(r"\`E", "È"),
(r"\^e", "ê"),
(r"\^E", "Ê"),
(r"\"e", "ë"),
(r"\"E", "Ë"),
# Diacritics: i
(r"\'i", "í"),
(r"\'I", "Í"),
(r"\`i", "ì"),
(r"\`I", "Ì"),
(r"\^i", "î"),
(r"\^I", "Î"),
(r"\"i", "ï"),
(r"\"I", "Ï"),
(r"\'\i", "í"),
(r"\'\I", "Í"),
(r"\`\i", "ì"),
(r"\`\I", "Ì"),
(r"\^\i", "î"),
(r"\^\I", "Î"),
(r"\"\i", "ï"),
(r"\"\I", "Ï"),
# Diacritics: o
(r"\'o", "ó"),
(r"\'O", "Ó"),
(r"\`o", "ò"),
(r"\`O", "Ò"),
(r"\^o", "ô"),
(r"\^O", "Ô"),
(r"\"o", "ö"),
(r"\"O", "Ö"),
# Diacritics: u
(r"\'u", "ú"),
(r"\'U", "Ú"),
(r"\`u", "ù"),
(r"\`U", "Ù"),
(r"\^u", "û"),
(r"\^U", "Û"),
(r"\"u", "ü"),
(r"\"U", "Ü"),
# Cedille
(r"\c c", "ç"),
(r"\c C", "Ç"),
# œ, æ
(r"\oe", "œ"),
(r"\OE", "Œ"),
(r"\ae", "æ"),
(r"\AE", "Æ"),
# Spaces
(r"\ ", " "),
(r"\,", " "),
(r"\~", " "),
# IeC
(r"\IeC ", ""),
# Miscallenous
(r"\dots", ""),
(r"\%", "%"),
(r"\&", "&"),
(r"\_", "_"),
]
def detex(arg):
if isinstance(arg, dict):
return dict([
(key, detex(value))
for (key, value)
in arg.items()
])
elif isinstance(arg, list):
return [
detex(item)
for item
in arg
]
elif isinstance(arg, set):
return set(detex(list(arg)))
elif isinstance(arg, str):
string = arg
for (latex, plain) in MATCH:
string = string.replace(latex, plain)
if '\\' in string:
print("WARNING: Remaining command in string '{}'.".format(string))
return string.strip()
else:
return detex(str(arg))

124
patacrep/latex/lexer.py

@ -0,0 +1,124 @@
import ply.lex as lex
tokens = (
'LBRACKET',
'RBRACKET',
'LBRACE',
'RBRACE',
'COMMAND',
'NEWLINE',
'COMMA',
'EQUAL',
'CHARACTER',
'SPACE',
'BEGINSONG',
'SONG_LTITLE',
'SONG_RTITLE',
'SONG_LOPTIONS',
'SONG_ROPTIONS',
)
class SimpleLexer:
tokens = tokens
# Regular expression rules for simple tokens
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_COMMAND = r'\\([@a-zA-Z]+|[^\\])'
t_NEWLINE = r'\\\\'
SPECIAL_CHARACTERS = (
t_LBRACKET +
t_RBRACKET +
t_RBRACE +
t_LBRACE +
r"\\" +
r" " +
r"\n" +
r"\r" +
r"%" +
r"=" +
r","
)
t_CHARACTER = r'[^{}]'.format(SPECIAL_CHARACTERS)
t_EQUAL = r'='
t_COMMA = r','
t_SPACE = r'[ \t\n\r]+'
def __init__(self):
self.__class__.lexer = lex.lex(module = self)
# Define a rule so we can track line numbers
def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_comment(self, t):
r'%.*'
pass
# Error handling rule
def t_error(self, t):
print("Illegal character '%s'" % t.value[0]) # TODO log
t.lexer.skip(1)
class SongLexer(SimpleLexer):
states = (
('beginsong', 'inclusive'),
)
# State beginsong
def t_INITIAL_BEGINSONG(self, t):
r'\\beginsong'
t.lexer.push_state('beginsong')
t.lexer.open_brackets = 0
t.lexer.open_braces = 0
return t
def t_beginsong_LBRACKET(self, t):
r'\['
if t.lexer.open_brackets == 0:
t.type = 'SONG_LOPTIONS'
t.lexer.open_braces += 1 # TODO Explain
t.lexer.open_brackets += 1
return t
def t_beginsong_RBRACKET(self, t):
r'\]'
t.lexer.open_brackets -= 1
if t.lexer.open_brackets == 0:
t.type = 'SONG_ROPTIONS'
t.lexer.open_braces -= 1 # TODO Explain
t.lexer.pop_state()
for __ignored in t.lexer: # TODO Explain
pass
return t
def t_beginsong_LBRACE(self, t):
r'{'
if t.lexer.open_braces == 0:
t.type = 'SONG_LTITLE'
t.lexer.open_braces += 1
return t
def t_beginsong_RBRACE1(self, t):
r'}(?![ \t\r\n]*\[)'
t.lexer.open_braces -= 1
t.type = 'RBRACE'
if t.lexer.open_braces == 0:
t.lexer.pop_state()
t.type = 'SONG_RTITLE'
return t
def t_beginsong_RBRACE2(self, t):
r'}(?=[ \t\r\n]*\[)'
t.lexer.open_braces -= 1
t.type = 'RBRACE'
if t.lexer.open_braces == 0:
t.type = 'SONG_RTITLE'
return t

72
patacrep/latex/parsetab.py

@ -0,0 +1,72 @@
# parsetab.py
# This file is automatically generated. Do not edit.
_tabversion = '3.2'
_lr_method = 'LALR'
_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10'
_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),}
_lr_action = { }
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = { }
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),}
_lr_goto = { }
for _k, _v in _lr_goto_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_goto: _lr_goto[_x] = { }
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> expression","S'",1,None,None,None),
('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8),
('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9),
('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10),
('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11),
('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12),
('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13),
('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14),
('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15),
('empty -> <empty>','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26),
('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30),
('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34),
('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38),
('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42),
('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43),
('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52),
('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53),
('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62),
('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63),
('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64),
('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69),
('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70),
('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78),
('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82),
('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83),
('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91),
('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92),
('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100),
('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101),
('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111),
('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112),
('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120),
('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121),
('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126),
('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127),
('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135),
('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139),
('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140),
('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148),
('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149),
('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150),
('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151),
('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152),
('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153),
]

194
patacrep/latex/syntax.py

@ -0,0 +1,194 @@
import ply.yacc as yacc
import inspect # TODO supprimer
from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer
from patacrep.latex import ast
from patacrep.latex.detex import detex
class Parser:
def __init__(self, filename=None):
self.tokens = tokens
self.ast = ast.AST
self.ast.init_metadata()
self.filename = filename
def __find_column(self, token):
last_cr = token.lexer.lexdata.rfind('\n',0,token.lexpos)
if last_cr < 0:
last_cr = 0
column = (token.lexpos - last_cr) + 1
return column
def p_error(self, p):
print("Erreur fichier {}, ligne {}, position {}.".format( # TODO
str(self.filename),
p.lineno,
self.__find_column(p),
)
)
def p_expression(self, p):
"""expression : brackets expression
| braces expression
| command expression
| NEWLINE expression
| beginsong expression
| word expression
| SPACE expression
| empty
"""
if len(p) == 3:
if p[2] is None:
p[0] = ast.Expression(p[1])
else:
p[0] = p[2].prepend(p[1])
else:
p[0] = None
def p_empty(self, p):
"""empty :"""
return None
def p_brackets(self, p):
"""brackets : LBRACKET expression RBRACKET"""
p[0] = p[2]
def p_braces(self, p):
"""braces : LBRACE expression RBRACE"""
p[0] = p[2]
def p_command(self, p):
"""command : COMMAND brackets_list braces_list"""
p[0] = ast.Command(p[1], p[2], p[3])
def p_brackets_list(self, p):
"""brackets_list : brackets brackets_list
| empty
"""
if len(p) == 3:
p[0] = p[2]
p[0].insert(0, p[1])
else:
p[0] = []
def p_braces_list(self, p):
"""braces_list : braces braces_list
| empty
"""
if len(p) == 3:
p[0] = p[2]
p[0].insert(0, p[1])
else:
p[0] = []
def p_word(self, p):
"""word : CHARACTER word_next
| COMMA word_next
| EQUAL word_next
"""
p[0] = p[1] + p[2]
def p_word_next(self, p):
"""word_next : CHARACTER word_next
| empty
"""
if len(p) == 2:
p[0] = ""
else:
p[0] = p[1] + p[2]
def p_beginsong(self, p):
"""beginsong : BEGINSONG separator songbraces separator songbrackets"""
self.ast.metadata["@titles"] = p[3]
self.ast.metadata.update(p[5])
def p_songbrackets(self, p):
"""songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS
| empty
"""
if len(p) == 6:
p[0] = p[3]
else:
p[0] = {}
def p_songbraces(self, p):
"""songbraces : SONG_LTITLE separator titles separator SONG_RTITLE
| empty
"""
if len(p) == 6:
p[0] = p[3]
else:
p[0] = []
def p_dictionary(self, p):
"""dictionary : identifier EQUAL braces dictionary_next
| identifier EQUAL error dictionary_next
"""
if isinstance(p[3], ast.Expression):
p[0] = {}
p[0][p[1]] = p[3]
p[0].update(p[4])
else:
raise Exception("Do enclose arguments between braces.") # TODO
def p_identifier(self, p):
"""identifier : CHARACTER identifier
| empty
"""
if len(p) == 2:
p[0] = ""
else:
p[0] = p[1] + p[2]
def p_separator(self, p):
"""separator : SPACE
| empty
"""
p[0] = None
def p_dictonary_next(self, p):
"""dictionary_next : separator COMMA separator dictionary
| empty
"""
if len(p) == 5:
p[0] = p[4]
else:
p[0] = {}
def p_titles(self, p):
"""titles : title titles_next"""
p[0] = [p[1]] + p[2]
def p_titles_next(self, p):
"""titles_next : NEWLINE title titles_next
| empty
"""
if len(p) == 2:
p[0] = []
else:
p[0] = [p[2]] + p[3]
def p_title(self, p):
"""title : brackets title
| braces title
| command title
| word title
| SPACE title
| empty
"""
if len(p) == 2:
p[0] = None
else:
if p[2] is None:
p[0] = ast.Expression(p[1])
else:
p[0] = p[2].prepend(p[1])
def tex2plain(string):
return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer))
def parsesong(string, filename=None):
return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata)

50
patacrep/latex/testing.py

@ -0,0 +1,50 @@
# Test it out
song = r"""
\selectlanguage{french}
plop = tag
% Un commentaire
\columns{3}
\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}
Dans [Dm6]cette ruedots [E7]
"""
isong = r"""
\selectlanguage{french}
\songcolumns{2}
\beginsong{Tous les bateaux, tous les oiseaux}
[by={Michel Polnareff},cov={passe-present},album={Passé Présent}]
Dans \[Dm6]cette ruedots [E7]
"""
tex = "D\\^iacritiqu\\'Es"
# Give the lexer some input
#if 0:
# from syntax import parser
# print(parser.parse(data, debug=0))
# print(parser.parse(data).song_data())
#else:
# from lexer import SimpleLexer
# lexer.input(data)
# for tok in lexer:
# print(tok)
from patacrep.latex import tex2plain
from patacrep.latex.syntax import parsesong
from patacrep.latex.ast import AST
print(tex2plain(tex) == "DîacritiquÉs")
print(parsesong(song, AST))
print({
"@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"],
"@languages": set(["french"]),
"@path": "TODO",
"album": "Tagada tsoin ïtsoin",
"cov": "pouf.png",
}
)

17
patacrep/songs.py

@ -10,7 +10,7 @@ import pickle
import re import re
from patacrep.authors import processauthors from patacrep.authors import processauthors
from patacrep.latex import parsetex from patacrep.latex import parsesong
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
@ -74,7 +74,7 @@ class Song(object):
cached_attributes = [ cached_attributes = [
"titles", "titles",
"unprefixed_titles", "unprefixed_titles",
"args", "data",
"datadir", "datadir",
"fullpath", "fullpath",
"subpath", "subpath",
@ -110,8 +110,9 @@ class Song(object):
)) ))
# Data extraction from the latex song # Data extraction from the latex song
data = parsetex(self.fullpath) self.data = parsesong(self.fullpath)
self.titles = data['titles'] self.titles = self.data['@titles']
self.languages = self.data['@languages']
self.datadir = datadir self.datadir = datadir
self.unprefixed_titles = [ self.unprefixed_titles = [
unprefixed_title( unprefixed_title(
@ -121,12 +122,10 @@ class Song(object):
for title for title
in self.titles in self.titles
] ]
self.args = data['args']
self.subpath = subpath self.subpath = subpath
self.languages = data['languages'] if "by" in self.data:
if "by" in self.args:
self.authors = processauthors( self.authors = processauthors(
self.args["by"], self.data["by"],
**config["_compiled_authwords"] **config["_compiled_authwords"]
) )
else: else:
@ -148,7 +147,7 @@ class Song(object):
) )
def __repr__(self): def __repr__(self):
return repr((self.titles, self.args, self.fullpath)) return repr((self.titles, self.data, self.fullpath))
def unprefixed_title(title, prefixes): def unprefixed_title(title, prefixes):
"""Remove the first prefix of the list in the beginning of title (if any). """Remove the first prefix of the list in the beginning of title (if any).

Loading…
Cancel
Save