From baedd3b2366868dbf20e59d9290ee2f755d9740b Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 30 Sep 2014 23:41:33 +0200 Subject: [PATCH] [WIP] LaTeX files are parsed again --- patacrep/build.py | 17 ++-- patacrep/content/sorted.py | 2 +- patacrep/index.py | 4 +- patacrep/latex/__init__.py | 27 +++--- patacrep/latex/ast.py | 49 ++++++++++ patacrep/latex/detex.py | 110 +++++++++++++++++++++ patacrep/latex/lexer.py | 124 ++++++++++++++++++++++++ patacrep/latex/parsetab.py | 72 ++++++++++++++ patacrep/latex/syntax.py | 194 +++++++++++++++++++++++++++++++++++++ patacrep/latex/testing.py | 50 ++++++++++ patacrep/songs.py | 17 ++-- 11 files changed, 632 insertions(+), 34 deletions(-) create mode 100644 patacrep/latex/ast.py create mode 100644 patacrep/latex/detex.py create mode 100644 patacrep/latex/lexer.py create mode 100644 patacrep/latex/parsetab.py create mode 100644 patacrep/latex/syntax.py create mode 100644 patacrep/latex/testing.py diff --git a/patacrep/build.py b/patacrep/build.py index 9f79bb6a..212305ca 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -50,7 +50,6 @@ class Songbook(object): super(Songbook, self).__init__() self.config = raw_songbook self.basename = basename - self.contentlist = [] # Some special keys have their value processed. self._set_datadir() @@ -86,7 +85,7 @@ class Songbook(object): - output: a file object, in which the file will be written. """ # Updating configuration - config = DEFAULT_CONFIG + config = DEFAULT_CONFIG.copy() config.update(self.config) renderer = TexRenderer( config['template'], @@ -100,18 +99,16 @@ class Songbook(object): copy.deepcopy(config['authwords']) ) - self.config = config # Configuration set - self.contentlist = content.process_content( - self.config.get('content', []), - self.config, + config['render_content'] = content.render_content + config['content'] = content.process_content( + config.get('content', []), + config, ) - self.config['render_content'] = content.render_content - self.config['content'] = self.contentlist - self.config['filename'] = output.name[:-4] + config['filename'] = output.name[:-4] - renderer.render_tex(output, self.config) + renderer.render_tex(output, config) class SongbookBuilder(object): diff --git a/patacrep/content/sorted.py b/patacrep/content/sorted.py index 651bd07e..f95065d6 100755 --- a/patacrep/content/sorted.py +++ b/patacrep/content/sorted.py @@ -55,7 +55,7 @@ def key_generator(sort): field = song.authors else: try: - field = song.args[key] + field = song.data[key] except KeyError: LOGGER.debug( "Ignoring unknown key '{}' for song {}.".format( diff --git a/patacrep/index.py b/patacrep/index.py index ac927569..c715918a 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -13,7 +13,7 @@ import re from patacrep import authors from patacrep import encoding -from patacrep.latex import latex2unicode +from patacrep.latex import tex2plain EOL = "\n" @@ -113,7 +113,7 @@ class Index(object): if not key in self.data[first]: self.data[first][key] = { 'sortingkey': [ - unidecode.unidecode(latex2unicode(item)).lower() + unidecode.unidecode(tex2plain(item)).lower() for item in key ], 'entries': [], diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index 08d1805f..b0826e6e 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -1,23 +1,26 @@ # -*- coding: utf-8 -*- +from patacrep.latex.syntax import tex2plain as syntax_tex2plain +from patacrep.latex.syntax import parsesong as syntax_parsesong +from patacrep.latex.detex import detex +from patacrep import encoding + """Very simple LaTeX parser""" -def latex2unicode(string): - """Convert LaTeX string to unicode""" - return "TODO" +def tex2plain(string): + """Render LaTeX string + + Very few commands (mostly diacritics) are interpreted. + """ + return syntax_tex2plain(string) -def parsetex(path): +def parsesong(path): """Return a dictonary of data read from the latex file `path`. This file is a drop in replacement for an old function. Elle ne devrait pas apparaitre telle quelle dans la version finale, une fois que https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. - - TODO """ - return { - 'titles': ["TODO"], - 'args': {}, - 'languages': ['french', 'english', 'portuguese', 'spanish'], - } - + data = syntax_parsesong(encoding.open_read(path).read(), path) + data['@path'] = path + return data diff --git a/patacrep/latex/ast.py b/patacrep/latex/ast.py new file mode 100644 index 00000000..1f181172 --- /dev/null +++ b/patacrep/latex/ast.py @@ -0,0 +1,49 @@ +class AST: + + metadata = None + + @classmethod + def init_metadata(cls): + cls.metadata = { + '@languages': set(), + } + +class Expression(AST): + + def __init__(self, value): + super().__init__() + self.content = [value] + + def prepend(self, value): + if value is not None: + self.content.insert(0, value) + return self + + def __str__(self): + return "".join([str(item) for item in self.content]) + +class Command(AST): + + def __init__(self, name, optional, mandatory): + self.name = name + self.mandatory = mandatory + self.optional = optional + + if name == r'\selectlanguage': + self.metadata['@languages'] |= set(self.mandatory) + + def __str__(self): + if self.name in [r'\emph']: + return str(self.mandatory[0]) + return "{}{}{}".format( + self.name, + "".join(["[{}]".format(item) for item in self.optional]), + "".join(["{{{}}}".format(item) for item in self.mandatory]), + ) + + +class BeginSong(AST): + + def __init__(self, titles, arguments): + self.titles = titles + self.arguments = arguments diff --git a/patacrep/latex/detex.py b/patacrep/latex/detex.py new file mode 100644 index 00000000..cb9f277f --- /dev/null +++ b/patacrep/latex/detex.py @@ -0,0 +1,110 @@ +MATCH = [ + # Diacritics: a + (r"\'a", "á"), + (r"\'A", "Á"), + (r"\`a", "à"), + (r"\`A", "À"), + (r"\^a", "â"), + (r"\^A", "Â"), + (r"\"a", "ä"), + (r"\"A", "Ä"), + + # Diacritics: e + (r"\'e", "é"), + (r"\'E", "É"), + (r"\`e", "è"), + (r"\`E", "È"), + (r"\^e", "ê"), + (r"\^E", "Ê"), + (r"\"e", "ë"), + (r"\"E", "Ë"), + + # Diacritics: i + (r"\'i", "í"), + (r"\'I", "Í"), + (r"\`i", "ì"), + (r"\`I", "Ì"), + (r"\^i", "î"), + (r"\^I", "Î"), + (r"\"i", "ï"), + (r"\"I", "Ï"), + (r"\'\i", "í"), + (r"\'\I", "Í"), + (r"\`\i", "ì"), + (r"\`\I", "Ì"), + (r"\^\i", "î"), + (r"\^\I", "Î"), + (r"\"\i", "ï"), + (r"\"\I", "Ï"), + + # Diacritics: o + (r"\'o", "ó"), + (r"\'O", "Ó"), + (r"\`o", "ò"), + (r"\`O", "Ò"), + (r"\^o", "ô"), + (r"\^O", "Ô"), + (r"\"o", "ö"), + (r"\"O", "Ö"), + + # Diacritics: u + (r"\'u", "ú"), + (r"\'U", "Ú"), + (r"\`u", "ù"), + (r"\`U", "Ù"), + (r"\^u", "û"), + (r"\^U", "Û"), + (r"\"u", "ü"), + (r"\"U", "Ü"), + + # Cedille + (r"\c c", "ç"), + (r"\c C", "Ç"), + + # œ, æ + (r"\oe", "œ"), + (r"\OE", "Œ"), + (r"\ae", "æ"), + (r"\AE", "Æ"), + + # Spaces + (r"\ ", " "), + (r"\,", " "), + (r"\~", " "), + + # IeC + (r"\IeC ", ""), + + # Miscallenous + (r"\dots", "…"), + (r"\%", "%"), + (r"\&", "&"), + (r"\_", "_"), + + ] + + +def detex(arg): + if isinstance(arg, dict): + return dict([ + (key, detex(value)) + for (key, value) + in arg.items() + ]) + elif isinstance(arg, list): + return [ + detex(item) + for item + in arg + ] + elif isinstance(arg, set): + return set(detex(list(arg))) + elif isinstance(arg, str): + string = arg + for (latex, plain) in MATCH: + string = string.replace(latex, plain) + if '\\' in string: + print("WARNING: Remaining command in string '{}'.".format(string)) + return string.strip() + else: + return detex(str(arg)) diff --git a/patacrep/latex/lexer.py b/patacrep/latex/lexer.py new file mode 100644 index 00000000..199d6f9c --- /dev/null +++ b/patacrep/latex/lexer.py @@ -0,0 +1,124 @@ +import ply.lex as lex + +tokens = ( + 'LBRACKET', + 'RBRACKET', + 'LBRACE', + 'RBRACE', + 'COMMAND', + 'NEWLINE', + 'COMMA', + 'EQUAL', + 'CHARACTER', + 'SPACE', + 'BEGINSONG', + 'SONG_LTITLE', + 'SONG_RTITLE', + 'SONG_LOPTIONS', + 'SONG_ROPTIONS', +) + +class SimpleLexer: + + tokens = tokens + + # Regular expression rules for simple tokens + t_LBRACKET = r'\[' + t_RBRACKET = r'\]' + t_LBRACE = r'{' + t_RBRACE = r'}' + t_COMMAND = r'\\([@a-zA-Z]+|[^\\])' + t_NEWLINE = r'\\\\' + SPECIAL_CHARACTERS = ( + t_LBRACKET + + t_RBRACKET + + t_RBRACE + + t_LBRACE + + r"\\" + + r" " + + r"\n" + + r"\r" + + r"%" + + r"=" + + r"," + ) + t_CHARACTER = r'[^{}]'.format(SPECIAL_CHARACTERS) + t_EQUAL = r'=' + t_COMMA = r',' + + t_SPACE = r'[ \t\n\r]+' + + def __init__(self): + self.__class__.lexer = lex.lex(module = self) + + # Define a rule so we can track line numbers + def t_newline(self, t): + r'\n+' + t.lexer.lineno += len(t.value) + + def t_comment(self, t): + r'%.*' + pass + + # Error handling rule + def t_error(self, t): + print("Illegal character '%s'" % t.value[0]) # TODO log + t.lexer.skip(1) + +class SongLexer(SimpleLexer): + + states = ( + ('beginsong', 'inclusive'), + ) + + # State beginsong + def t_INITIAL_BEGINSONG(self, t): + r'\\beginsong' + t.lexer.push_state('beginsong') + t.lexer.open_brackets = 0 + t.lexer.open_braces = 0 + return t + + def t_beginsong_LBRACKET(self, t): + r'\[' + if t.lexer.open_brackets == 0: + t.type = 'SONG_LOPTIONS' + t.lexer.open_braces += 1 # TODO Explain + t.lexer.open_brackets += 1 + return t + + def t_beginsong_RBRACKET(self, t): + r'\]' + t.lexer.open_brackets -= 1 + if t.lexer.open_brackets == 0: + t.type = 'SONG_ROPTIONS' + t.lexer.open_braces -= 1 # TODO Explain + t.lexer.pop_state() + for __ignored in t.lexer: # TODO Explain + pass + return t + + def t_beginsong_LBRACE(self, t): + r'{' + if t.lexer.open_braces == 0: + t.type = 'SONG_LTITLE' + t.lexer.open_braces += 1 + return t + + def t_beginsong_RBRACE1(self, t): + r'}(?![ \t\r\n]*\[)' + t.lexer.open_braces -= 1 + t.type = 'RBRACE' + if t.lexer.open_braces == 0: + t.lexer.pop_state() + t.type = 'SONG_RTITLE' + return t + + def t_beginsong_RBRACE2(self, t): + r'}(?=[ \t\r\n]*\[)' + t.lexer.open_braces -= 1 + t.type = 'RBRACE' + if t.lexer.open_braces == 0: + t.type = 'SONG_RTITLE' + return t + diff --git a/patacrep/latex/parsetab.py b/patacrep/latex/parsetab.py new file mode 100644 index 00000000..b98cd59e --- /dev/null +++ b/patacrep/latex/parsetab.py @@ -0,0 +1,72 @@ + +# parsetab.py +# This file is automatically generated. Do not edit. +_tabversion = '3.2' + +_lr_method = 'LALR' + +_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10' + +_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),} + +_lr_action = { } +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = { } + _lr_action[_x][_k] = _y +del _lr_action_items + +_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),} + +_lr_goto = { } +for _k, _v in _lr_goto_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_goto: _lr_goto[_x] = { } + _lr_goto[_x][_k] = _y +del _lr_goto_items +_lr_productions = [ + ("S' -> expression","S'",1,None,None,None), + ('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8), + ('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9), + ('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10), + ('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11), + ('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12), + ('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13), + ('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14), + ('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15), + ('empty -> ','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26), + ('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30), + ('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34), + ('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38), + ('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42), + ('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43), + ('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52), + ('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53), + ('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62), + ('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63), + ('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64), + ('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69), + ('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70), + ('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78), + ('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82), + ('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83), + ('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91), + ('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92), + ('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100), + ('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101), + ('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111), + ('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112), + ('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120), + ('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121), + ('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126), + ('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127), + ('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135), + ('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139), + ('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140), + ('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148), + ('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149), + ('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150), + ('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151), + ('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152), + ('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153), +] diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py new file mode 100644 index 00000000..f161b19c --- /dev/null +++ b/patacrep/latex/syntax.py @@ -0,0 +1,194 @@ +import ply.yacc as yacc +import inspect # TODO supprimer + +from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer +from patacrep.latex import ast +from patacrep.latex.detex import detex + +class Parser: + + def __init__(self, filename=None): + self.tokens = tokens + self.ast = ast.AST + self.ast.init_metadata() + self.filename = filename + + def __find_column(self, token): + last_cr = token.lexer.lexdata.rfind('\n',0,token.lexpos) + if last_cr < 0: + last_cr = 0 + column = (token.lexpos - last_cr) + 1 + return column + + def p_error(self, p): + print("Erreur fichier {}, ligne {}, position {}.".format( # TODO + str(self.filename), + p.lineno, + self.__find_column(p), + ) + ) + + def p_expression(self, p): + """expression : brackets expression + | braces expression + | command expression + | NEWLINE expression + | beginsong expression + | word expression + | SPACE expression + | empty + """ + if len(p) == 3: + if p[2] is None: + p[0] = ast.Expression(p[1]) + else: + p[0] = p[2].prepend(p[1]) + else: + p[0] = None + + def p_empty(self, p): + """empty :""" + return None + + def p_brackets(self, p): + """brackets : LBRACKET expression RBRACKET""" + p[0] = p[2] + + def p_braces(self, p): + """braces : LBRACE expression RBRACE""" + p[0] = p[2] + + def p_command(self, p): + """command : COMMAND brackets_list braces_list""" + p[0] = ast.Command(p[1], p[2], p[3]) + + def p_brackets_list(self, p): + """brackets_list : brackets brackets_list + | empty + """ + if len(p) == 3: + p[0] = p[2] + p[0].insert(0, p[1]) + else: + p[0] = [] + + def p_braces_list(self, p): + """braces_list : braces braces_list + | empty + """ + if len(p) == 3: + p[0] = p[2] + p[0].insert(0, p[1]) + else: + p[0] = [] + + def p_word(self, p): + """word : CHARACTER word_next + | COMMA word_next + | EQUAL word_next + """ + p[0] = p[1] + p[2] + + def p_word_next(self, p): + """word_next : CHARACTER word_next + | empty + """ + if len(p) == 2: + p[0] = "" + else: + p[0] = p[1] + p[2] + + def p_beginsong(self, p): + """beginsong : BEGINSONG separator songbraces separator songbrackets""" + self.ast.metadata["@titles"] = p[3] + self.ast.metadata.update(p[5]) + + def p_songbrackets(self, p): + """songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS + | empty + """ + if len(p) == 6: + p[0] = p[3] + else: + p[0] = {} + + def p_songbraces(self, p): + """songbraces : SONG_LTITLE separator titles separator SONG_RTITLE + | empty + """ + if len(p) == 6: + p[0] = p[3] + else: + p[0] = [] + + def p_dictionary(self, p): + """dictionary : identifier EQUAL braces dictionary_next + | identifier EQUAL error dictionary_next + """ + if isinstance(p[3], ast.Expression): + p[0] = {} + p[0][p[1]] = p[3] + p[0].update(p[4]) + else: + raise Exception("Do enclose arguments between braces.") # TODO + + def p_identifier(self, p): + """identifier : CHARACTER identifier + | empty + """ + if len(p) == 2: + p[0] = "" + else: + p[0] = p[1] + p[2] + + def p_separator(self, p): + """separator : SPACE + | empty + """ + p[0] = None + + def p_dictonary_next(self, p): + """dictionary_next : separator COMMA separator dictionary + | empty + """ + if len(p) == 5: + p[0] = p[4] + else: + p[0] = {} + + def p_titles(self, p): + """titles : title titles_next""" + p[0] = [p[1]] + p[2] + + def p_titles_next(self, p): + """titles_next : NEWLINE title titles_next + | empty + """ + if len(p) == 2: + p[0] = [] + else: + p[0] = [p[2]] + p[3] + + def p_title(self, p): + """title : brackets title + | braces title + | command title + | word title + | SPACE title + | empty + """ + if len(p) == 2: + p[0] = None + else: + if p[2] is None: + p[0] = ast.Expression(p[1]) + else: + p[0] = p[2].prepend(p[1]) + + +def tex2plain(string): + return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer)) + +def parsesong(string, filename=None): + return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata) + diff --git a/patacrep/latex/testing.py b/patacrep/latex/testing.py new file mode 100644 index 00000000..f2d66510 --- /dev/null +++ b/patacrep/latex/testing.py @@ -0,0 +1,50 @@ + +# Test it out +song = r""" +\selectlanguage{french} +plop = tag +% Un commentaire +\columns{3} +\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre +%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre +%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre} + + Dans [Dm6]cette ruedots [E7] +""" + +isong = r""" +\selectlanguage{french} +\songcolumns{2} +\beginsong{Tous les bateaux, tous les oiseaux} + [by={Michel Polnareff},cov={passe-present},album={Passé Présent}] + + Dans \[Dm6]cette ruedots [E7] +""" + +tex = "D\\^iacritiqu\\'Es" + +# Give the lexer some input +#if 0: +# from syntax import parser +# print(parser.parse(data, debug=0)) +# print(parser.parse(data).song_data()) +#else: +# from lexer import SimpleLexer +# lexer.input(data) +# for tok in lexer: +# print(tok) + +from patacrep.latex import tex2plain +from patacrep.latex.syntax import parsesong +from patacrep.latex.ast import AST + +print(tex2plain(tex) == "DîacritiquÉs") +print(parsesong(song, AST)) +print({ + "@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"], + "@languages": set(["french"]), + "@path": "TODO", + "album": "Tagada tsoin ïtsoin", + "cov": "pouf.png", + } + ) diff --git a/patacrep/songs.py b/patacrep/songs.py index bf3ff3a6..43e70fe7 100644 --- a/patacrep/songs.py +++ b/patacrep/songs.py @@ -10,7 +10,7 @@ import pickle import re from patacrep.authors import processauthors -from patacrep.latex import parsetex +from patacrep.latex import parsesong LOGGER = logging.getLogger(__name__) @@ -74,7 +74,7 @@ class Song(object): cached_attributes = [ "titles", "unprefixed_titles", - "args", + "data", "datadir", "fullpath", "subpath", @@ -110,8 +110,9 @@ class Song(object): )) # Data extraction from the latex song - data = parsetex(self.fullpath) - self.titles = data['titles'] + self.data = parsesong(self.fullpath) + self.titles = self.data['@titles'] + self.languages = self.data['@languages'] self.datadir = datadir self.unprefixed_titles = [ unprefixed_title( @@ -121,12 +122,10 @@ class Song(object): for title in self.titles ] - self.args = data['args'] self.subpath = subpath - self.languages = data['languages'] - if "by" in self.args: + if "by" in self.data: self.authors = processauthors( - self.args["by"], + self.data["by"], **config["_compiled_authwords"] ) else: @@ -148,7 +147,7 @@ class Song(object): ) def __repr__(self): - return repr((self.titles, self.args, self.fullpath)) + return repr((self.titles, self.data, self.fullpath)) def unprefixed_title(title, prefixes): """Remove the first prefix of the list in the beginning of title (if any).