From d685da1244a64cbac45a7b53fce46ffe8d682a4b Mon Sep 17 00:00:00 2001 From: Louis Date: Wed, 1 Oct 2014 10:33:42 +0200 Subject: [PATCH] Cleaning LaTeX parser --- patacrep/latex/__init__.py | 12 ++- patacrep/latex/ast.py | 15 +++ patacrep/latex/detex.py | 14 ++- patacrep/latex/lexer.py | 122 +++++++++++++--------- patacrep/latex/parsetab.py | 72 ------------- patacrep/latex/syntax.py | 204 +++++++++++++++++++++++-------------- patacrep/latex/testing.py | 50 --------- 7 files changed, 236 insertions(+), 253 deletions(-) delete mode 100644 patacrep/latex/parsetab.py delete mode 100644 patacrep/latex/testing.py diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py index b0826e6e..46ac9d10 100644 --- a/patacrep/latex/__init__.py +++ b/patacrep/latex/__init__.py @@ -1,12 +1,17 @@ # -*- coding: utf-8 -*- +"""Very simple LaTeX parser + +This module uses an LALR parser to try to parse LaTeX code. LaTeX language +*cannot* be parsed by an LALR parser, so this is a very simple attemps, which +will work on simple cases, but not on complex ones. +""" + from patacrep.latex.syntax import tex2plain as syntax_tex2plain from patacrep.latex.syntax import parsesong as syntax_parsesong from patacrep.latex.detex import detex from patacrep import encoding -"""Very simple LaTeX parser""" - def tex2plain(string): """Render LaTeX string @@ -17,9 +22,6 @@ def tex2plain(string): def parsesong(path): """Return a dictonary of data read from the latex file `path`. - This file is a drop in replacement for an old function. Elle ne devrait pas - apparaitre telle quelle dans la version finale, une fois que - https://github.com/patacrep/patacrep/issues/64 aura été pris en compte. """ data = syntax_parsesong(encoding.open_read(path).read(), path) data['@path'] = path diff --git a/patacrep/latex/ast.py b/patacrep/latex/ast.py index 1f181172..798d4c33 100644 --- a/patacrep/latex/ast.py +++ b/patacrep/latex/ast.py @@ -1,20 +1,33 @@ +"""Abstract Syntax Tree for LaTeX code.""" + +# pylint: disable=too-few-public-methods + class AST: + """Base class for the tree.""" + # pylint: disable=no-init metadata = None @classmethod def init_metadata(cls): + """Clear metadata + + As this attribute is a class attribute, it as to be reset at each new + parsing. + """ cls.metadata = { '@languages': set(), } class Expression(AST): + """LaTeX expression""" def __init__(self, value): super().__init__() self.content = [value] def prepend(self, value): + """Add a value at the beginning of the content list.""" if value is not None: self.content.insert(0, value) return self @@ -23,6 +36,7 @@ class Expression(AST): return "".join([str(item) for item in self.content]) class Command(AST): + """LaTeX command""" def __init__(self, name, optional, mandatory): self.name = name @@ -43,6 +57,7 @@ class Command(AST): class BeginSong(AST): + """Beginsong command""" def __init__(self, titles, arguments): self.titles = titles diff --git a/patacrep/latex/detex.py b/patacrep/latex/detex.py index cb9f277f..ebfd721b 100644 --- a/patacrep/latex/detex.py +++ b/patacrep/latex/detex.py @@ -1,3 +1,9 @@ +"""Render `very simple` TeX commands in a simple TeX code.""" + +import logging + +LOGGER = logging.getLogger() + MATCH = [ # Diacritics: a (r"\'a", "á"), @@ -85,6 +91,12 @@ MATCH = [ def detex(arg): + """Render very simple TeX commands from argument. + + Argument can be: + - a string: it is processed; + - a list, dict or set: its values are processed. + """ if isinstance(arg, dict): return dict([ (key, detex(value)) @@ -104,7 +116,7 @@ def detex(arg): for (latex, plain) in MATCH: string = string.replace(latex, plain) if '\\' in string: - print("WARNING: Remaining command in string '{}'.".format(string)) + LOGGER.warning("Remaining command in string '{}'.".format(string)) return string.strip() else: return detex(str(arg)) diff --git a/patacrep/latex/lexer.py b/patacrep/latex/lexer.py index 199d6f9c..8954e14c 100644 --- a/patacrep/latex/lexer.py +++ b/patacrep/latex/lexer.py @@ -1,5 +1,11 @@ +"""Very simple LaTeX lexer.""" + +import logging import ply.lex as lex +LOGGER = logging.getLogger() + +#pylint: disable=invalid-name tokens = ( 'LBRACKET', 'RBRACKET', @@ -19,10 +25,10 @@ tokens = ( ) class SimpleLexer: + """Very simple LaTeX lexer.""" tokens = tokens - # Regular expression rules for simple tokens t_LBRACKET = r'\[' t_RBRACKET = r'\]' t_LBRACE = r'{' @@ -49,76 +55,96 @@ class SimpleLexer: t_SPACE = r'[ \t\n\r]+' def __init__(self): - self.__class__.lexer = lex.lex(module = self) + self.__class__.lexer = lex.lex(module=self) # Define a rule so we can track line numbers - def t_newline(self, t): + @staticmethod + def t_newline(token): r'\n+' - t.lexer.lineno += len(t.value) + token.lexer.lineno += len(token.value) - def t_comment(self, t): + @staticmethod + def t_comment(token): r'%.*' pass # Error handling rule - def t_error(self, t): - print("Illegal character '%s'" % t.value[0]) # TODO log - t.lexer.skip(1) + @staticmethod + def t_error(token): + """Manage errors""" + LOGGER.error("Illegal character '{}'".format(token.value[0])) + token.lexer.skip(1) class SongLexer(SimpleLexer): + r"""Very simple song lexer. + + In the context of this class, a "song" is some LaTeX code containing the + ``\beginsong`` (or ``\sortassong``) command. + """ states = ( ('beginsong', 'inclusive'), ) # State beginsong - def t_INITIAL_BEGINSONG(self, t): - r'\\beginsong' - t.lexer.push_state('beginsong') - t.lexer.open_brackets = 0 - t.lexer.open_braces = 0 - return t - - def t_beginsong_LBRACKET(self, t): + @staticmethod + def t_INITIAL_BEGINSONG(token): + r'(\\beginsong|\\sortassong)' + token.lexer.push_state('beginsong') + token.lexer.open_brackets = 0 + token.lexer.open_braces = 0 + return token + + @staticmethod + def t_beginsong_LBRACKET(token): r'\[' - if t.lexer.open_brackets == 0: - t.type = 'SONG_LOPTIONS' - t.lexer.open_braces += 1 # TODO Explain - t.lexer.open_brackets += 1 - return t + if token.lexer.open_brackets == 0: + token.type = 'SONG_LOPTIONS' + + # Count opening and closing braces to know when to leave the + # `beginsong` state. + token.lexer.open_braces += 1 + token.lexer.open_brackets += 1 + return token - def t_beginsong_RBRACKET(self, t): + @staticmethod + def t_beginsong_RBRACKET(token): r'\]' - t.lexer.open_brackets -= 1 - if t.lexer.open_brackets == 0: - t.type = 'SONG_ROPTIONS' - t.lexer.open_braces -= 1 # TODO Explain - t.lexer.pop_state() - for __ignored in t.lexer: # TODO Explain + token.lexer.open_brackets -= 1 + if token.lexer.open_brackets == 0: + token.type = 'SONG_ROPTIONS' + token.lexer.open_braces -= 1 + token.lexer.pop_state() + for __ignored in token.lexer: + # In this parser, we only want to read metadata. So, after the + # first ``\beginsong`` command, we can stop parsing. pass - return t + return token - def t_beginsong_LBRACE(self, t): + @staticmethod + def t_beginsong_LBRACE(token): r'{' - if t.lexer.open_braces == 0: - t.type = 'SONG_LTITLE' - t.lexer.open_braces += 1 - return t + if token.lexer.open_braces == 0: + token.type = 'SONG_LTITLE' + token.lexer.open_braces += 1 + return token - def t_beginsong_RBRACE1(self, t): + @staticmethod + def t_beginsong_RBRACE1(token): r'}(?![ \t\r\n]*\[)' - t.lexer.open_braces -= 1 - t.type = 'RBRACE' - if t.lexer.open_braces == 0: - t.lexer.pop_state() - t.type = 'SONG_RTITLE' - return t - - def t_beginsong_RBRACE2(self, t): + token.lexer.open_braces -= 1 + token.type = 'RBRACE' + if token.lexer.open_braces == 0: + token.lexer.pop_state() + token.type = 'SONG_RTITLE' + return token + + @staticmethod + def t_beginsong_RBRACE2(token): r'}(?=[ \t\r\n]*\[)' - t.lexer.open_braces -= 1 - t.type = 'RBRACE' - if t.lexer.open_braces == 0: - t.type = 'SONG_RTITLE' - return t + token.lexer.open_braces -= 1 + token.type = 'RBRACE' + if token.lexer.open_braces == 0: + token.type = 'SONG_RTITLE' + return token diff --git a/patacrep/latex/parsetab.py b/patacrep/latex/parsetab.py deleted file mode 100644 index b98cd59e..00000000 --- a/patacrep/latex/parsetab.py +++ /dev/null @@ -1,72 +0,0 @@ - -# parsetab.py -# This file is automatically generated. Do not edit. -_tabversion = '3.2' - -_lr_method = 'LALR' - -_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10' - -_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),} - -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } - _lr_action[_x][_k] = _y -del _lr_action_items - -_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),} - -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } - _lr_goto[_x][_k] = _y -del _lr_goto_items -_lr_productions = [ - ("S' -> expression","S'",1,None,None,None), - ('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8), - ('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9), - ('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10), - ('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11), - ('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12), - ('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13), - ('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14), - ('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15), - ('empty -> ','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26), - ('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30), - ('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34), - ('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38), - ('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42), - ('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43), - ('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52), - ('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53), - ('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62), - ('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63), - ('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64), - ('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69), - ('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70), - ('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78), - ('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82), - ('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83), - ('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91), - ('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92), - ('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100), - ('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101), - ('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111), - ('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112), - ('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120), - ('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121), - ('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126), - ('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127), - ('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135), - ('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139), - ('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140), - ('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148), - ('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149), - ('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150), - ('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151), - ('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152), - ('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153), -] diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py index f161b19c..abd71903 100644 --- a/patacrep/latex/syntax.py +++ b/patacrep/latex/syntax.py @@ -1,11 +1,28 @@ +"""Very simple LaTeX parser""" + +import logging import ply.yacc as yacc -import inspect # TODO supprimer from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer from patacrep.latex import ast +from patacrep.errors import SongbookError from patacrep.latex.detex import detex +LOGGER = logging.getLogger() + +class ParsingError(SongbookError): + """Parsing error.""" + + def __init__(self, message): + super().__init__(self) + self.message = message + + def __str__(self): + return self.message + +# pylint: disable=line-too-long class Parser: + """LaTeX parser.""" def __init__(self, filename=None): self.tokens = tokens @@ -13,22 +30,26 @@ class Parser: self.ast.init_metadata() self.filename = filename - def __find_column(self, token): - last_cr = token.lexer.lexdata.rfind('\n',0,token.lexpos) + @staticmethod + def __find_column(token): + """Return the column of ``token``.""" + last_cr = token.lexer.lexdata.rfind('\n', 0, token.lexpos) if last_cr < 0: last_cr = 0 column = (token.lexpos - last_cr) + 1 return column - def p_error(self, p): - print("Erreur fichier {}, ligne {}, position {}.".format( # TODO + def p_error(self, token): + """Manage parsing errors.""" + LOGGER.error("Erreur fichier {}, ligne {}, position {}.".format( str(self.filename), - p.lineno, - self.__find_column(p), + token.lineno, + self.__find_column(token), ) ) - def p_expression(self, p): + @staticmethod + def p_expression(symbols): """expression : brackets expression | braces expression | command expression @@ -38,138 +59,155 @@ class Parser: | SPACE expression | empty """ - if len(p) == 3: - if p[2] is None: - p[0] = ast.Expression(p[1]) + if len(symbols) == 3: + if symbols[2] is None: + symbols[0] = ast.Expression(symbols[1]) else: - p[0] = p[2].prepend(p[1]) + symbols[0] = symbols[2].prepend(symbols[1]) else: - p[0] = None + symbols[0] = None - def p_empty(self, p): + @staticmethod + def p_empty(__symbols): """empty :""" return None - def p_brackets(self, p): + @staticmethod + def p_brackets(symbols): """brackets : LBRACKET expression RBRACKET""" - p[0] = p[2] + symbols[0] = symbols[2] - def p_braces(self, p): + @staticmethod + def p_braces(symbols): """braces : LBRACE expression RBRACE""" - p[0] = p[2] + symbols[0] = symbols[2] - def p_command(self, p): + @staticmethod + def p_command(symbols): """command : COMMAND brackets_list braces_list""" - p[0] = ast.Command(p[1], p[2], p[3]) + symbols[0] = ast.Command(symbols[1], symbols[2], symbols[3]) - def p_brackets_list(self, p): + @staticmethod + def p_brackets_list(symbols): """brackets_list : brackets brackets_list | empty """ - if len(p) == 3: - p[0] = p[2] - p[0].insert(0, p[1]) + if len(symbols) == 3: + symbols[0] = symbols[2] + symbols[0].insert(0, symbols[1]) else: - p[0] = [] + symbols[0] = [] - def p_braces_list(self, p): + @staticmethod + def p_braces_list(symbols): """braces_list : braces braces_list | empty """ - if len(p) == 3: - p[0] = p[2] - p[0].insert(0, p[1]) + if len(symbols) == 3: + symbols[0] = symbols[2] + symbols[0].insert(0, symbols[1]) else: - p[0] = [] + symbols[0] = [] - def p_word(self, p): + @staticmethod + def p_word(symbols): """word : CHARACTER word_next | COMMA word_next | EQUAL word_next """ - p[0] = p[1] + p[2] + symbols[0] = symbols[1] + symbols[2] - def p_word_next(self, p): + @staticmethod + def p_word_next(symbols): """word_next : CHARACTER word_next | empty """ - if len(p) == 2: - p[0] = "" + if len(symbols) == 2: + symbols[0] = "" else: - p[0] = p[1] + p[2] + symbols[0] = symbols[1] + symbols[2] - def p_beginsong(self, p): + def p_beginsong(self, symbols): """beginsong : BEGINSONG separator songbraces separator songbrackets""" - self.ast.metadata["@titles"] = p[3] - self.ast.metadata.update(p[5]) + self.ast.metadata["@titles"] = symbols[3] + self.ast.metadata.update(symbols[5]) - def p_songbrackets(self, p): + @staticmethod + def p_songbrackets(symbols): """songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS | empty """ - if len(p) == 6: - p[0] = p[3] + if len(symbols) == 6: + symbols[0] = symbols[3] else: - p[0] = {} + symbols[0] = {} - def p_songbraces(self, p): + @staticmethod + def p_songbraces(symbols): """songbraces : SONG_LTITLE separator titles separator SONG_RTITLE | empty """ - if len(p) == 6: - p[0] = p[3] + if len(symbols) == 6: + symbols[0] = symbols[3] else: - p[0] = [] + symbols[0] = [] - def p_dictionary(self, p): + @staticmethod + def p_dictionary(symbols): """dictionary : identifier EQUAL braces dictionary_next | identifier EQUAL error dictionary_next """ - if isinstance(p[3], ast.Expression): - p[0] = {} - p[0][p[1]] = p[3] - p[0].update(p[4]) + if isinstance(symbols[3], ast.Expression): + symbols[0] = {} + symbols[0][symbols[1]] = symbols[3] + symbols[0].update(symbols[4]) else: - raise Exception("Do enclose arguments between braces.") # TODO + raise ParsingError("Do enclose arguments between braces.") - def p_identifier(self, p): + @staticmethod + def p_identifier(symbols): """identifier : CHARACTER identifier | empty """ - if len(p) == 2: - p[0] = "" + if len(symbols) == 2: + symbols[0] = "" else: - p[0] = p[1] + p[2] + symbols[0] = symbols[1] + symbols[2] - def p_separator(self, p): + @staticmethod + def p_separator(symbols): """separator : SPACE | empty """ - p[0] = None + symbols[0] = None - def p_dictonary_next(self, p): + @staticmethod + def p_dictonary_next(symbols): """dictionary_next : separator COMMA separator dictionary | empty """ - if len(p) == 5: - p[0] = p[4] + if len(symbols) == 5: + symbols[0] = symbols[4] else: - p[0] = {} + symbols[0] = {} - def p_titles(self, p): + @staticmethod + def p_titles(symbols): """titles : title titles_next""" - p[0] = [p[1]] + p[2] + symbols[0] = [symbols[1]] + symbols[2] - def p_titles_next(self, p): + @staticmethod + def p_titles_next(symbols): """titles_next : NEWLINE title titles_next | empty """ - if len(p) == 2: - p[0] = [] + if len(symbols) == 2: + symbols[0] = [] else: - p[0] = [p[2]] + p[3] + symbols[0] = [symbols[2]] + symbols[3] - def p_title(self, p): + @staticmethod + def p_title(symbols): """title : brackets title | braces title | command title @@ -177,18 +215,30 @@ class Parser: | SPACE title | empty """ - if len(p) == 2: - p[0] = None + if len(symbols) == 2: + symbols[0] = None else: - if p[2] is None: - p[0] = ast.Expression(p[1]) + if symbols[2] is None: + symbols[0] = ast.Expression(symbols[1]) else: - p[0] = p[2].prepend(p[1]) + symbols[0] = symbols[2].prepend(symbols[1]) def tex2plain(string): - return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer)) + """Parse string and return its plain text version.""" + return detex( + yacc.yacc(module=Parser()).parse( + string, + lexer=SimpleLexer().lexer, + ) + ) def parsesong(string, filename=None): - return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata) + """Parse song and return its metadata.""" + return detex( + yacc.yacc(module=Parser(filename)).parse( + string, + lexer=SongLexer().lexer, + ).metadata + ) diff --git a/patacrep/latex/testing.py b/patacrep/latex/testing.py deleted file mode 100644 index f2d66510..00000000 --- a/patacrep/latex/testing.py +++ /dev/null @@ -1,50 +0,0 @@ - -# Test it out -song = r""" -\selectlanguage{french} -plop = tag -% Un commentaire -\columns{3} -\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre -%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre -%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre} - - Dans [Dm6]cette ruedots [E7] -""" - -isong = r""" -\selectlanguage{french} -\songcolumns{2} -\beginsong{Tous les bateaux, tous les oiseaux} - [by={Michel Polnareff},cov={passe-present},album={Passé Présent}] - - Dans \[Dm6]cette ruedots [E7] -""" - -tex = "D\\^iacritiqu\\'Es" - -# Give the lexer some input -#if 0: -# from syntax import parser -# print(parser.parse(data, debug=0)) -# print(parser.parse(data).song_data()) -#else: -# from lexer import SimpleLexer -# lexer.input(data) -# for tok in lexer: -# print(tok) - -from patacrep.latex import tex2plain -from patacrep.latex.syntax import parsesong -from patacrep.latex.ast import AST - -print(tex2plain(tex) == "DîacritiquÉs") -print(parsesong(song, AST)) -print({ - "@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"], - "@languages": set(["french"]), - "@path": "TODO", - "album": "Tagada tsoin ïtsoin", - "cov": "pouf.png", - } - )