Browse Source

[WIP] LaTeX files are parsed again

pull/66/head
Louis 10 years ago
parent
commit
baedd3b236
  1. 17
      patacrep/build.py
  2. 2
      patacrep/content/sorted.py
  3. 4
      patacrep/index.py
  4. 27
      patacrep/latex/__init__.py
  5. 49
      patacrep/latex/ast.py
  6. 110
      patacrep/latex/detex.py
  7. 124
      patacrep/latex/lexer.py
  8. 72
      patacrep/latex/parsetab.py
  9. 194
      patacrep/latex/syntax.py
  10. 50
      patacrep/latex/testing.py
  11. 17
      patacrep/songs.py

17
patacrep/build.py

@ -50,7 +50,6 @@ class Songbook(object):
super(Songbook, self).__init__()
self.config = raw_songbook
self.basename = basename
self.contentlist = []
# Some special keys have their value processed.
self._set_datadir()
@ -86,7 +85,7 @@ class Songbook(object):
- output: a file object, in which the file will be written.
"""
# Updating configuration
config = DEFAULT_CONFIG
config = DEFAULT_CONFIG.copy()
config.update(self.config)
renderer = TexRenderer(
config['template'],
@ -100,18 +99,16 @@ class Songbook(object):
copy.deepcopy(config['authwords'])
)
self.config = config
# Configuration set
self.contentlist = content.process_content(
self.config.get('content', []),
self.config,
config['render_content'] = content.render_content
config['content'] = content.process_content(
config.get('content', []),
config,
)
self.config['render_content'] = content.render_content
self.config['content'] = self.contentlist
self.config['filename'] = output.name[:-4]
config['filename'] = output.name[:-4]
renderer.render_tex(output, self.config)
renderer.render_tex(output, config)
class SongbookBuilder(object):

2
patacrep/content/sorted.py

@ -55,7 +55,7 @@ def key_generator(sort):
field = song.authors
else:
try:
field = song.args[key]
field = song.data[key]
except KeyError:
LOGGER.debug(
"Ignoring unknown key '{}' for song {}.".format(

4
patacrep/index.py

@ -13,7 +13,7 @@ import re
from patacrep import authors
from patacrep import encoding
from patacrep.latex import latex2unicode
from patacrep.latex import tex2plain
EOL = "\n"
@ -113,7 +113,7 @@ class Index(object):
if not key in self.data[first]:
self.data[first][key] = {
'sortingkey': [
unidecode.unidecode(latex2unicode(item)).lower()
unidecode.unidecode(tex2plain(item)).lower()
for item in key
],
'entries': [],

27
patacrep/latex/__init__.py

@ -1,23 +1,26 @@
# -*- coding: utf-8 -*-
from patacrep.latex.syntax import tex2plain as syntax_tex2plain
from patacrep.latex.syntax import parsesong as syntax_parsesong
from patacrep.latex.detex import detex
from patacrep import encoding
"""Very simple LaTeX parser"""
def latex2unicode(string):
"""Convert LaTeX string to unicode"""
return "TODO"
def tex2plain(string):
"""Render LaTeX string
Very few commands (mostly diacritics) are interpreted.
"""
return syntax_tex2plain(string)
def parsetex(path):
def parsesong(path):
"""Return a dictonary of data read from the latex file `path`.
This file is a drop in replacement for an old function. Elle ne devrait pas
apparaitre telle quelle dans la version finale, une fois que
https://github.com/patacrep/patacrep/issues/64 aura été pris en compte.
TODO
"""
return {
'titles': ["TODO"],
'args': {},
'languages': ['french', 'english', 'portuguese', 'spanish'],
}
data = syntax_parsesong(encoding.open_read(path).read(), path)
data['@path'] = path
return data

49
patacrep/latex/ast.py

@ -0,0 +1,49 @@
class AST:
metadata = None
@classmethod
def init_metadata(cls):
cls.metadata = {
'@languages': set(),
}
class Expression(AST):
def __init__(self, value):
super().__init__()
self.content = [value]
def prepend(self, value):
if value is not None:
self.content.insert(0, value)
return self
def __str__(self):
return "".join([str(item) for item in self.content])
class Command(AST):
def __init__(self, name, optional, mandatory):
self.name = name
self.mandatory = mandatory
self.optional = optional
if name == r'\selectlanguage':
self.metadata['@languages'] |= set(self.mandatory)
def __str__(self):
if self.name in [r'\emph']:
return str(self.mandatory[0])
return "{}{}{}".format(
self.name,
"".join(["[{}]".format(item) for item in self.optional]),
"".join(["{{{}}}".format(item) for item in self.mandatory]),
)
class BeginSong(AST):
def __init__(self, titles, arguments):
self.titles = titles
self.arguments = arguments

110
patacrep/latex/detex.py

@ -0,0 +1,110 @@
MATCH = [
# Diacritics: a
(r"\'a", "á"),
(r"\'A", "Á"),
(r"\`a", "à"),
(r"\`A", "À"),
(r"\^a", "â"),
(r"\^A", "Â"),
(r"\"a", "ä"),
(r"\"A", "Ä"),
# Diacritics: e
(r"\'e", "é"),
(r"\'E", "É"),
(r"\`e", "è"),
(r"\`E", "È"),
(r"\^e", "ê"),
(r"\^E", "Ê"),
(r"\"e", "ë"),
(r"\"E", "Ë"),
# Diacritics: i
(r"\'i", "í"),
(r"\'I", "Í"),
(r"\`i", "ì"),
(r"\`I", "Ì"),
(r"\^i", "î"),
(r"\^I", "Î"),
(r"\"i", "ï"),
(r"\"I", "Ï"),
(r"\'\i", "í"),
(r"\'\I", "Í"),
(r"\`\i", "ì"),
(r"\`\I", "Ì"),
(r"\^\i", "î"),
(r"\^\I", "Î"),
(r"\"\i", "ï"),
(r"\"\I", "Ï"),
# Diacritics: o
(r"\'o", "ó"),
(r"\'O", "Ó"),
(r"\`o", "ò"),
(r"\`O", "Ò"),
(r"\^o", "ô"),
(r"\^O", "Ô"),
(r"\"o", "ö"),
(r"\"O", "Ö"),
# Diacritics: u
(r"\'u", "ú"),
(r"\'U", "Ú"),
(r"\`u", "ù"),
(r"\`U", "Ù"),
(r"\^u", "û"),
(r"\^U", "Û"),
(r"\"u", "ü"),
(r"\"U", "Ü"),
# Cedille
(r"\c c", "ç"),
(r"\c C", "Ç"),
# œ, æ
(r"\oe", "œ"),
(r"\OE", "Œ"),
(r"\ae", "æ"),
(r"\AE", "Æ"),
# Spaces
(r"\ ", " "),
(r"\,", " "),
(r"\~", " "),
# IeC
(r"\IeC ", ""),
# Miscallenous
(r"\dots", ""),
(r"\%", "%"),
(r"\&", "&"),
(r"\_", "_"),
]
def detex(arg):
if isinstance(arg, dict):
return dict([
(key, detex(value))
for (key, value)
in arg.items()
])
elif isinstance(arg, list):
return [
detex(item)
for item
in arg
]
elif isinstance(arg, set):
return set(detex(list(arg)))
elif isinstance(arg, str):
string = arg
for (latex, plain) in MATCH:
string = string.replace(latex, plain)
if '\\' in string:
print("WARNING: Remaining command in string '{}'.".format(string))
return string.strip()
else:
return detex(str(arg))

124
patacrep/latex/lexer.py

@ -0,0 +1,124 @@
import ply.lex as lex
tokens = (
'LBRACKET',
'RBRACKET',
'LBRACE',
'RBRACE',
'COMMAND',
'NEWLINE',
'COMMA',
'EQUAL',
'CHARACTER',
'SPACE',
'BEGINSONG',
'SONG_LTITLE',
'SONG_RTITLE',
'SONG_LOPTIONS',
'SONG_ROPTIONS',
)
class SimpleLexer:
tokens = tokens
# Regular expression rules for simple tokens
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_COMMAND = r'\\([@a-zA-Z]+|[^\\])'
t_NEWLINE = r'\\\\'
SPECIAL_CHARACTERS = (
t_LBRACKET +
t_RBRACKET +
t_RBRACE +
t_LBRACE +
r"\\" +
r" " +
r"\n" +
r"\r" +
r"%" +
r"=" +
r","
)
t_CHARACTER = r'[^{}]'.format(SPECIAL_CHARACTERS)
t_EQUAL = r'='
t_COMMA = r','
t_SPACE = r'[ \t\n\r]+'
def __init__(self):
self.__class__.lexer = lex.lex(module = self)
# Define a rule so we can track line numbers
def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_comment(self, t):
r'%.*'
pass
# Error handling rule
def t_error(self, t):
print("Illegal character '%s'" % t.value[0]) # TODO log
t.lexer.skip(1)
class SongLexer(SimpleLexer):
states = (
('beginsong', 'inclusive'),
)
# State beginsong
def t_INITIAL_BEGINSONG(self, t):
r'\\beginsong'
t.lexer.push_state('beginsong')
t.lexer.open_brackets = 0
t.lexer.open_braces = 0
return t
def t_beginsong_LBRACKET(self, t):
r'\['
if t.lexer.open_brackets == 0:
t.type = 'SONG_LOPTIONS'
t.lexer.open_braces += 1 # TODO Explain
t.lexer.open_brackets += 1
return t
def t_beginsong_RBRACKET(self, t):
r'\]'
t.lexer.open_brackets -= 1
if t.lexer.open_brackets == 0:
t.type = 'SONG_ROPTIONS'
t.lexer.open_braces -= 1 # TODO Explain
t.lexer.pop_state()
for __ignored in t.lexer: # TODO Explain
pass
return t
def t_beginsong_LBRACE(self, t):
r'{'
if t.lexer.open_braces == 0:
t.type = 'SONG_LTITLE'
t.lexer.open_braces += 1
return t
def t_beginsong_RBRACE1(self, t):
r'}(?![ \t\r\n]*\[)'
t.lexer.open_braces -= 1
t.type = 'RBRACE'
if t.lexer.open_braces == 0:
t.lexer.pop_state()
t.type = 'SONG_RTITLE'
return t
def t_beginsong_RBRACE2(self, t):
r'}(?=[ \t\r\n]*\[)'
t.lexer.open_braces -= 1
t.type = 'RBRACE'
if t.lexer.open_braces == 0:
t.type = 'SONG_RTITLE'
return t

72
patacrep/latex/parsetab.py

@ -0,0 +1,72 @@
# parsetab.py
# This file is automatically generated. Do not edit.
_tabversion = '3.2'
_lr_method = 'LALR'
_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10'
_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),}
_lr_action = { }
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = { }
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),}
_lr_goto = { }
for _k, _v in _lr_goto_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_goto: _lr_goto[_x] = { }
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> expression","S'",1,None,None,None),
('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8),
('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9),
('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10),
('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11),
('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12),
('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13),
('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14),
('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15),
('empty -> <empty>','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26),
('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30),
('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34),
('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38),
('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42),
('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43),
('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52),
('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53),
('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62),
('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63),
('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64),
('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69),
('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70),
('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78),
('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82),
('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83),
('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91),
('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92),
('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100),
('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101),
('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111),
('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112),
('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120),
('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121),
('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126),
('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127),
('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135),
('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139),
('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140),
('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148),
('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149),
('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150),
('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151),
('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152),
('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153),
]

194
patacrep/latex/syntax.py

@ -0,0 +1,194 @@
import ply.yacc as yacc
import inspect # TODO supprimer
from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer
from patacrep.latex import ast
from patacrep.latex.detex import detex
class Parser:
def __init__(self, filename=None):
self.tokens = tokens
self.ast = ast.AST
self.ast.init_metadata()
self.filename = filename
def __find_column(self, token):
last_cr = token.lexer.lexdata.rfind('\n',0,token.lexpos)
if last_cr < 0:
last_cr = 0
column = (token.lexpos - last_cr) + 1
return column
def p_error(self, p):
print("Erreur fichier {}, ligne {}, position {}.".format( # TODO
str(self.filename),
p.lineno,
self.__find_column(p),
)
)
def p_expression(self, p):
"""expression : brackets expression
| braces expression
| command expression
| NEWLINE expression
| beginsong expression
| word expression
| SPACE expression
| empty
"""
if len(p) == 3:
if p[2] is None:
p[0] = ast.Expression(p[1])
else:
p[0] = p[2].prepend(p[1])
else:
p[0] = None
def p_empty(self, p):
"""empty :"""
return None
def p_brackets(self, p):
"""brackets : LBRACKET expression RBRACKET"""
p[0] = p[2]
def p_braces(self, p):
"""braces : LBRACE expression RBRACE"""
p[0] = p[2]
def p_command(self, p):
"""command : COMMAND brackets_list braces_list"""
p[0] = ast.Command(p[1], p[2], p[3])
def p_brackets_list(self, p):
"""brackets_list : brackets brackets_list
| empty
"""
if len(p) == 3:
p[0] = p[2]
p[0].insert(0, p[1])
else:
p[0] = []
def p_braces_list(self, p):
"""braces_list : braces braces_list
| empty
"""
if len(p) == 3:
p[0] = p[2]
p[0].insert(0, p[1])
else:
p[0] = []
def p_word(self, p):
"""word : CHARACTER word_next
| COMMA word_next
| EQUAL word_next
"""
p[0] = p[1] + p[2]
def p_word_next(self, p):
"""word_next : CHARACTER word_next
| empty
"""
if len(p) == 2:
p[0] = ""
else:
p[0] = p[1] + p[2]
def p_beginsong(self, p):
"""beginsong : BEGINSONG separator songbraces separator songbrackets"""
self.ast.metadata["@titles"] = p[3]
self.ast.metadata.update(p[5])
def p_songbrackets(self, p):
"""songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS
| empty
"""
if len(p) == 6:
p[0] = p[3]
else:
p[0] = {}
def p_songbraces(self, p):
"""songbraces : SONG_LTITLE separator titles separator SONG_RTITLE
| empty
"""
if len(p) == 6:
p[0] = p[3]
else:
p[0] = []
def p_dictionary(self, p):
"""dictionary : identifier EQUAL braces dictionary_next
| identifier EQUAL error dictionary_next
"""
if isinstance(p[3], ast.Expression):
p[0] = {}
p[0][p[1]] = p[3]
p[0].update(p[4])
else:
raise Exception("Do enclose arguments between braces.") # TODO
def p_identifier(self, p):
"""identifier : CHARACTER identifier
| empty
"""
if len(p) == 2:
p[0] = ""
else:
p[0] = p[1] + p[2]
def p_separator(self, p):
"""separator : SPACE
| empty
"""
p[0] = None
def p_dictonary_next(self, p):
"""dictionary_next : separator COMMA separator dictionary
| empty
"""
if len(p) == 5:
p[0] = p[4]
else:
p[0] = {}
def p_titles(self, p):
"""titles : title titles_next"""
p[0] = [p[1]] + p[2]
def p_titles_next(self, p):
"""titles_next : NEWLINE title titles_next
| empty
"""
if len(p) == 2:
p[0] = []
else:
p[0] = [p[2]] + p[3]
def p_title(self, p):
"""title : brackets title
| braces title
| command title
| word title
| SPACE title
| empty
"""
if len(p) == 2:
p[0] = None
else:
if p[2] is None:
p[0] = ast.Expression(p[1])
else:
p[0] = p[2].prepend(p[1])
def tex2plain(string):
return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer))
def parsesong(string, filename=None):
return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata)

50
patacrep/latex/testing.py

@ -0,0 +1,50 @@
# Test it out
song = r"""
\selectlanguage{french}
plop = tag
% Un commentaire
\columns{3}
\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}
Dans [Dm6]cette ruedots [E7]
"""
isong = r"""
\selectlanguage{french}
\songcolumns{2}
\beginsong{Tous les bateaux, tous les oiseaux}
[by={Michel Polnareff},cov={passe-present},album={Passé Présent}]
Dans \[Dm6]cette ruedots [E7]
"""
tex = "D\\^iacritiqu\\'Es"
# Give the lexer some input
#if 0:
# from syntax import parser
# print(parser.parse(data, debug=0))
# print(parser.parse(data).song_data())
#else:
# from lexer import SimpleLexer
# lexer.input(data)
# for tok in lexer:
# print(tok)
from patacrep.latex import tex2plain
from patacrep.latex.syntax import parsesong
from patacrep.latex.ast import AST
print(tex2plain(tex) == "DîacritiquÉs")
print(parsesong(song, AST))
print({
"@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"],
"@languages": set(["french"]),
"@path": "TODO",
"album": "Tagada tsoin ïtsoin",
"cov": "pouf.png",
}
)

17
patacrep/songs.py

@ -10,7 +10,7 @@ import pickle
import re
from patacrep.authors import processauthors
from patacrep.latex import parsetex
from patacrep.latex import parsesong
LOGGER = logging.getLogger(__name__)
@ -74,7 +74,7 @@ class Song(object):
cached_attributes = [
"titles",
"unprefixed_titles",
"args",
"data",
"datadir",
"fullpath",
"subpath",
@ -110,8 +110,9 @@ class Song(object):
))
# Data extraction from the latex song
data = parsetex(self.fullpath)
self.titles = data['titles']
self.data = parsesong(self.fullpath)
self.titles = self.data['@titles']
self.languages = self.data['@languages']
self.datadir = datadir
self.unprefixed_titles = [
unprefixed_title(
@ -121,12 +122,10 @@ class Song(object):
for title
in self.titles
]
self.args = data['args']
self.subpath = subpath
self.languages = data['languages']
if "by" in self.args:
if "by" in self.data:
self.authors = processauthors(
self.args["by"],
self.data["by"],
**config["_compiled_authwords"]
)
else:
@ -148,7 +147,7 @@ class Song(object):
)
def __repr__(self):
return repr((self.titles, self.args, self.fullpath))
return repr((self.titles, self.data, self.fullpath))
def unprefixed_title(title, prefixes):
"""Remove the first prefix of the list in the beginning of title (if any).

Loading…
Cancel
Save