Browse Source

Cleaning LaTeX parser

pull/66/head
Louis 10 years ago
parent
commit
d685da1244
  1. 12
      patacrep/latex/__init__.py
  2. 15
      patacrep/latex/ast.py
  3. 14
      patacrep/latex/detex.py
  4. 120
      patacrep/latex/lexer.py
  5. 72
      patacrep/latex/parsetab.py
  6. 202
      patacrep/latex/syntax.py
  7. 50
      patacrep/latex/testing.py

12
patacrep/latex/__init__.py

@ -1,12 +1,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Very simple LaTeX parser
This module uses an LALR parser to try to parse LaTeX code. LaTeX language
*cannot* be parsed by an LALR parser, so this is a very simple attemps, which
will work on simple cases, but not on complex ones.
"""
from patacrep.latex.syntax import tex2plain as syntax_tex2plain from patacrep.latex.syntax import tex2plain as syntax_tex2plain
from patacrep.latex.syntax import parsesong as syntax_parsesong from patacrep.latex.syntax import parsesong as syntax_parsesong
from patacrep.latex.detex import detex from patacrep.latex.detex import detex
from patacrep import encoding from patacrep import encoding
"""Very simple LaTeX parser"""
def tex2plain(string): def tex2plain(string):
"""Render LaTeX string """Render LaTeX string
@ -17,9 +22,6 @@ def tex2plain(string):
def parsesong(path): def parsesong(path):
"""Return a dictonary of data read from the latex file `path`. """Return a dictonary of data read from the latex file `path`.
This file is a drop in replacement for an old function. Elle ne devrait pas
apparaitre telle quelle dans la version finale, une fois que
https://github.com/patacrep/patacrep/issues/64 aura été pris en compte.
""" """
data = syntax_parsesong(encoding.open_read(path).read(), path) data = syntax_parsesong(encoding.open_read(path).read(), path)
data['@path'] = path data['@path'] = path

15
patacrep/latex/ast.py

@ -1,20 +1,33 @@
"""Abstract Syntax Tree for LaTeX code."""
# pylint: disable=too-few-public-methods
class AST: class AST:
"""Base class for the tree."""
# pylint: disable=no-init
metadata = None metadata = None
@classmethod @classmethod
def init_metadata(cls): def init_metadata(cls):
"""Clear metadata
As this attribute is a class attribute, it as to be reset at each new
parsing.
"""
cls.metadata = { cls.metadata = {
'@languages': set(), '@languages': set(),
} }
class Expression(AST): class Expression(AST):
"""LaTeX expression"""
def __init__(self, value): def __init__(self, value):
super().__init__() super().__init__()
self.content = [value] self.content = [value]
def prepend(self, value): def prepend(self, value):
"""Add a value at the beginning of the content list."""
if value is not None: if value is not None:
self.content.insert(0, value) self.content.insert(0, value)
return self return self
@ -23,6 +36,7 @@ class Expression(AST):
return "".join([str(item) for item in self.content]) return "".join([str(item) for item in self.content])
class Command(AST): class Command(AST):
"""LaTeX command"""
def __init__(self, name, optional, mandatory): def __init__(self, name, optional, mandatory):
self.name = name self.name = name
@ -43,6 +57,7 @@ class Command(AST):
class BeginSong(AST): class BeginSong(AST):
"""Beginsong command"""
def __init__(self, titles, arguments): def __init__(self, titles, arguments):
self.titles = titles self.titles = titles

14
patacrep/latex/detex.py

@ -1,3 +1,9 @@
"""Render `very simple` TeX commands in a simple TeX code."""
import logging
LOGGER = logging.getLogger()
MATCH = [ MATCH = [
# Diacritics: a # Diacritics: a
(r"\'a", "á"), (r"\'a", "á"),
@ -85,6 +91,12 @@ MATCH = [
def detex(arg): def detex(arg):
"""Render very simple TeX commands from argument.
Argument can be:
- a string: it is processed;
- a list, dict or set: its values are processed.
"""
if isinstance(arg, dict): if isinstance(arg, dict):
return dict([ return dict([
(key, detex(value)) (key, detex(value))
@ -104,7 +116,7 @@ def detex(arg):
for (latex, plain) in MATCH: for (latex, plain) in MATCH:
string = string.replace(latex, plain) string = string.replace(latex, plain)
if '\\' in string: if '\\' in string:
print("WARNING: Remaining command in string '{}'.".format(string)) LOGGER.warning("Remaining command in string '{}'.".format(string))
return string.strip() return string.strip()
else: else:
return detex(str(arg)) return detex(str(arg))

120
patacrep/latex/lexer.py

@ -1,5 +1,11 @@
"""Very simple LaTeX lexer."""
import logging
import ply.lex as lex import ply.lex as lex
LOGGER = logging.getLogger()
#pylint: disable=invalid-name
tokens = ( tokens = (
'LBRACKET', 'LBRACKET',
'RBRACKET', 'RBRACKET',
@ -19,10 +25,10 @@ tokens = (
) )
class SimpleLexer: class SimpleLexer:
"""Very simple LaTeX lexer."""
tokens = tokens tokens = tokens
# Regular expression rules for simple tokens
t_LBRACKET = r'\[' t_LBRACKET = r'\['
t_RBRACKET = r'\]' t_RBRACKET = r'\]'
t_LBRACE = r'{' t_LBRACE = r'{'
@ -52,73 +58,93 @@ class SimpleLexer:
self.__class__.lexer = lex.lex(module=self) self.__class__.lexer = lex.lex(module=self)
# Define a rule so we can track line numbers # Define a rule so we can track line numbers
def t_newline(self, t): @staticmethod
def t_newline(token):
r'\n+' r'\n+'
t.lexer.lineno += len(t.value) token.lexer.lineno += len(token.value)
def t_comment(self, t): @staticmethod
def t_comment(token):
r'%.*' r'%.*'
pass pass
# Error handling rule # Error handling rule
def t_error(self, t): @staticmethod
print("Illegal character '%s'" % t.value[0]) # TODO log def t_error(token):
t.lexer.skip(1) """Manage errors"""
LOGGER.error("Illegal character '{}'".format(token.value[0]))
token.lexer.skip(1)
class SongLexer(SimpleLexer): class SongLexer(SimpleLexer):
r"""Very simple song lexer.
In the context of this class, a "song" is some LaTeX code containing the
``\beginsong`` (or ``\sortassong``) command.
"""
states = ( states = (
('beginsong', 'inclusive'), ('beginsong', 'inclusive'),
) )
# State beginsong # State beginsong
def t_INITIAL_BEGINSONG(self, t): @staticmethod
r'\\beginsong' def t_INITIAL_BEGINSONG(token):
t.lexer.push_state('beginsong') r'(\\beginsong|\\sortassong)'
t.lexer.open_brackets = 0 token.lexer.push_state('beginsong')
t.lexer.open_braces = 0 token.lexer.open_brackets = 0
return t token.lexer.open_braces = 0
return token
def t_beginsong_LBRACKET(self, t):
@staticmethod
def t_beginsong_LBRACKET(token):
r'\[' r'\['
if t.lexer.open_brackets == 0: if token.lexer.open_brackets == 0:
t.type = 'SONG_LOPTIONS' token.type = 'SONG_LOPTIONS'
t.lexer.open_braces += 1 # TODO Explain
t.lexer.open_brackets += 1 # Count opening and closing braces to know when to leave the
return t # `beginsong` state.
token.lexer.open_braces += 1
token.lexer.open_brackets += 1
return token
def t_beginsong_RBRACKET(self, t): @staticmethod
def t_beginsong_RBRACKET(token):
r'\]' r'\]'
t.lexer.open_brackets -= 1 token.lexer.open_brackets -= 1
if t.lexer.open_brackets == 0: if token.lexer.open_brackets == 0:
t.type = 'SONG_ROPTIONS' token.type = 'SONG_ROPTIONS'
t.lexer.open_braces -= 1 # TODO Explain token.lexer.open_braces -= 1
t.lexer.pop_state() token.lexer.pop_state()
for __ignored in t.lexer: # TODO Explain for __ignored in token.lexer:
# In this parser, we only want to read metadata. So, after the
# first ``\beginsong`` command, we can stop parsing.
pass pass
return t return token
def t_beginsong_LBRACE(self, t): @staticmethod
def t_beginsong_LBRACE(token):
r'{' r'{'
if t.lexer.open_braces == 0: if token.lexer.open_braces == 0:
t.type = 'SONG_LTITLE' token.type = 'SONG_LTITLE'
t.lexer.open_braces += 1 token.lexer.open_braces += 1
return t return token
def t_beginsong_RBRACE1(self, t): @staticmethod
def t_beginsong_RBRACE1(token):
r'}(?![ \t\r\n]*\[)' r'}(?![ \t\r\n]*\[)'
t.lexer.open_braces -= 1 token.lexer.open_braces -= 1
t.type = 'RBRACE' token.type = 'RBRACE'
if t.lexer.open_braces == 0: if token.lexer.open_braces == 0:
t.lexer.pop_state() token.lexer.pop_state()
t.type = 'SONG_RTITLE' token.type = 'SONG_RTITLE'
return t return token
def t_beginsong_RBRACE2(self, t): @staticmethod
def t_beginsong_RBRACE2(token):
r'}(?=[ \t\r\n]*\[)' r'}(?=[ \t\r\n]*\[)'
t.lexer.open_braces -= 1 token.lexer.open_braces -= 1
t.type = 'RBRACE' token.type = 'RBRACE'
if t.lexer.open_braces == 0: if token.lexer.open_braces == 0:
t.type = 'SONG_RTITLE' token.type = 'SONG_RTITLE'
return t return token

72
patacrep/latex/parsetab.py

@ -1,72 +0,0 @@
# parsetab.py
# This file is automatically generated. Do not edit.
_tabversion = '3.2'
_lr_method = 'LALR'
_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10'
_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),}
_lr_action = { }
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = { }
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),}
_lr_goto = { }
for _k, _v in _lr_goto_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_goto: _lr_goto[_x] = { }
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> expression","S'",1,None,None,None),
('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8),
('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9),
('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10),
('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11),
('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12),
('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13),
('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14),
('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15),
('empty -> <empty>','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26),
('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30),
('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34),
('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38),
('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42),
('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43),
('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52),
('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53),
('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62),
('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63),
('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64),
('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69),
('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70),
('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78),
('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82),
('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83),
('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91),
('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92),
('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100),
('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101),
('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111),
('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112),
('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120),
('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121),
('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126),
('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127),
('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135),
('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139),
('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140),
('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148),
('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149),
('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150),
('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151),
('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152),
('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153),
]

202
patacrep/latex/syntax.py

@ -1,11 +1,28 @@
"""Very simple LaTeX parser"""
import logging
import ply.yacc as yacc import ply.yacc as yacc
import inspect # TODO supprimer
from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer
from patacrep.latex import ast from patacrep.latex import ast
from patacrep.errors import SongbookError
from patacrep.latex.detex import detex from patacrep.latex.detex import detex
LOGGER = logging.getLogger()
class ParsingError(SongbookError):
"""Parsing error."""
def __init__(self, message):
super().__init__(self)
self.message = message
def __str__(self):
return self.message
# pylint: disable=line-too-long
class Parser: class Parser:
"""LaTeX parser."""
def __init__(self, filename=None): def __init__(self, filename=None):
self.tokens = tokens self.tokens = tokens
@ -13,22 +30,26 @@ class Parser:
self.ast.init_metadata() self.ast.init_metadata()
self.filename = filename self.filename = filename
def __find_column(self, token): @staticmethod
def __find_column(token):
"""Return the column of ``token``."""
last_cr = token.lexer.lexdata.rfind('\n', 0, token.lexpos) last_cr = token.lexer.lexdata.rfind('\n', 0, token.lexpos)
if last_cr < 0: if last_cr < 0:
last_cr = 0 last_cr = 0
column = (token.lexpos - last_cr) + 1 column = (token.lexpos - last_cr) + 1
return column return column
def p_error(self, p): def p_error(self, token):
print("Erreur fichier {}, ligne {}, position {}.".format( # TODO """Manage parsing errors."""
LOGGER.error("Erreur fichier {}, ligne {}, position {}.".format(
str(self.filename), str(self.filename),
p.lineno, token.lineno,
self.__find_column(p), self.__find_column(token),
) )
) )
def p_expression(self, p): @staticmethod
def p_expression(symbols):
"""expression : brackets expression """expression : brackets expression
| braces expression | braces expression
| command expression | command expression
@ -38,138 +59,155 @@ class Parser:
| SPACE expression | SPACE expression
| empty | empty
""" """
if len(p) == 3: if len(symbols) == 3:
if p[2] is None: if symbols[2] is None:
p[0] = ast.Expression(p[1]) symbols[0] = ast.Expression(symbols[1])
else: else:
p[0] = p[2].prepend(p[1]) symbols[0] = symbols[2].prepend(symbols[1])
else: else:
p[0] = None symbols[0] = None
def p_empty(self, p): @staticmethod
def p_empty(__symbols):
"""empty :""" """empty :"""
return None return None
def p_brackets(self, p): @staticmethod
def p_brackets(symbols):
"""brackets : LBRACKET expression RBRACKET""" """brackets : LBRACKET expression RBRACKET"""
p[0] = p[2] symbols[0] = symbols[2]
def p_braces(self, p): @staticmethod
def p_braces(symbols):
"""braces : LBRACE expression RBRACE""" """braces : LBRACE expression RBRACE"""
p[0] = p[2] symbols[0] = symbols[2]
def p_command(self, p): @staticmethod
def p_command(symbols):
"""command : COMMAND brackets_list braces_list""" """command : COMMAND brackets_list braces_list"""
p[0] = ast.Command(p[1], p[2], p[3]) symbols[0] = ast.Command(symbols[1], symbols[2], symbols[3])
def p_brackets_list(self, p): @staticmethod
def p_brackets_list(symbols):
"""brackets_list : brackets brackets_list """brackets_list : brackets brackets_list
| empty | empty
""" """
if len(p) == 3: if len(symbols) == 3:
p[0] = p[2] symbols[0] = symbols[2]
p[0].insert(0, p[1]) symbols[0].insert(0, symbols[1])
else: else:
p[0] = [] symbols[0] = []
def p_braces_list(self, p): @staticmethod
def p_braces_list(symbols):
"""braces_list : braces braces_list """braces_list : braces braces_list
| empty | empty
""" """
if len(p) == 3: if len(symbols) == 3:
p[0] = p[2] symbols[0] = symbols[2]
p[0].insert(0, p[1]) symbols[0].insert(0, symbols[1])
else: else:
p[0] = [] symbols[0] = []
def p_word(self, p): @staticmethod
def p_word(symbols):
"""word : CHARACTER word_next """word : CHARACTER word_next
| COMMA word_next | COMMA word_next
| EQUAL word_next | EQUAL word_next
""" """
p[0] = p[1] + p[2] symbols[0] = symbols[1] + symbols[2]
def p_word_next(self, p): @staticmethod
def p_word_next(symbols):
"""word_next : CHARACTER word_next """word_next : CHARACTER word_next
| empty | empty
""" """
if len(p) == 2: if len(symbols) == 2:
p[0] = "" symbols[0] = ""
else: else:
p[0] = p[1] + p[2] symbols[0] = symbols[1] + symbols[2]
def p_beginsong(self, p): def p_beginsong(self, symbols):
"""beginsong : BEGINSONG separator songbraces separator songbrackets""" """beginsong : BEGINSONG separator songbraces separator songbrackets"""
self.ast.metadata["@titles"] = p[3] self.ast.metadata["@titles"] = symbols[3]
self.ast.metadata.update(p[5]) self.ast.metadata.update(symbols[5])
def p_songbrackets(self, p): @staticmethod
def p_songbrackets(symbols):
"""songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS """songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS
| empty | empty
""" """
if len(p) == 6: if len(symbols) == 6:
p[0] = p[3] symbols[0] = symbols[3]
else: else:
p[0] = {} symbols[0] = {}
def p_songbraces(self, p): @staticmethod
def p_songbraces(symbols):
"""songbraces : SONG_LTITLE separator titles separator SONG_RTITLE """songbraces : SONG_LTITLE separator titles separator SONG_RTITLE
| empty | empty
""" """
if len(p) == 6: if len(symbols) == 6:
p[0] = p[3] symbols[0] = symbols[3]
else: else:
p[0] = [] symbols[0] = []
def p_dictionary(self, p): @staticmethod
def p_dictionary(symbols):
"""dictionary : identifier EQUAL braces dictionary_next """dictionary : identifier EQUAL braces dictionary_next
| identifier EQUAL error dictionary_next | identifier EQUAL error dictionary_next
""" """
if isinstance(p[3], ast.Expression): if isinstance(symbols[3], ast.Expression):
p[0] = {} symbols[0] = {}
p[0][p[1]] = p[3] symbols[0][symbols[1]] = symbols[3]
p[0].update(p[4]) symbols[0].update(symbols[4])
else: else:
raise Exception("Do enclose arguments between braces.") # TODO raise ParsingError("Do enclose arguments between braces.")
def p_identifier(self, p): @staticmethod
def p_identifier(symbols):
"""identifier : CHARACTER identifier """identifier : CHARACTER identifier
| empty | empty
""" """
if len(p) == 2: if len(symbols) == 2:
p[0] = "" symbols[0] = ""
else: else:
p[0] = p[1] + p[2] symbols[0] = symbols[1] + symbols[2]
def p_separator(self, p): @staticmethod
def p_separator(symbols):
"""separator : SPACE """separator : SPACE
| empty | empty
""" """
p[0] = None symbols[0] = None
def p_dictonary_next(self, p): @staticmethod
def p_dictonary_next(symbols):
"""dictionary_next : separator COMMA separator dictionary """dictionary_next : separator COMMA separator dictionary
| empty | empty
""" """
if len(p) == 5: if len(symbols) == 5:
p[0] = p[4] symbols[0] = symbols[4]
else: else:
p[0] = {} symbols[0] = {}
def p_titles(self, p): @staticmethod
def p_titles(symbols):
"""titles : title titles_next""" """titles : title titles_next"""
p[0] = [p[1]] + p[2] symbols[0] = [symbols[1]] + symbols[2]
def p_titles_next(self, p): @staticmethod
def p_titles_next(symbols):
"""titles_next : NEWLINE title titles_next """titles_next : NEWLINE title titles_next
| empty | empty
""" """
if len(p) == 2: if len(symbols) == 2:
p[0] = [] symbols[0] = []
else: else:
p[0] = [p[2]] + p[3] symbols[0] = [symbols[2]] + symbols[3]
def p_title(self, p): @staticmethod
def p_title(symbols):
"""title : brackets title """title : brackets title
| braces title | braces title
| command title | command title
@ -177,18 +215,30 @@ class Parser:
| SPACE title | SPACE title
| empty | empty
""" """
if len(p) == 2: if len(symbols) == 2:
p[0] = None symbols[0] = None
else: else:
if p[2] is None: if symbols[2] is None:
p[0] = ast.Expression(p[1]) symbols[0] = ast.Expression(symbols[1])
else: else:
p[0] = p[2].prepend(p[1]) symbols[0] = symbols[2].prepend(symbols[1])
def tex2plain(string): def tex2plain(string):
return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer)) """Parse string and return its plain text version."""
return detex(
yacc.yacc(module=Parser()).parse(
string,
lexer=SimpleLexer().lexer,
)
)
def parsesong(string, filename=None): def parsesong(string, filename=None):
return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata) """Parse song and return its metadata."""
return detex(
yacc.yacc(module=Parser(filename)).parse(
string,
lexer=SongLexer().lexer,
).metadata
)

50
patacrep/latex/testing.py

@ -1,50 +0,0 @@
# Test it out
song = r"""
\selectlanguage{french}
plop = tag
% Un commentaire
\columns{3}
\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}
Dans [Dm6]cette ruedots [E7]
"""
isong = r"""
\selectlanguage{french}
\songcolumns{2}
\beginsong{Tous les bateaux, tous les oiseaux}
[by={Michel Polnareff},cov={passe-present},album={Passé Présent}]
Dans \[Dm6]cette ruedots [E7]
"""
tex = "D\\^iacritiqu\\'Es"
# Give the lexer some input
#if 0:
# from syntax import parser
# print(parser.parse(data, debug=0))
# print(parser.parse(data).song_data())
#else:
# from lexer import SimpleLexer
# lexer.input(data)
# for tok in lexer:
# print(tok)
from patacrep.latex import tex2plain
from patacrep.latex.syntax import parsesong
from patacrep.latex.ast import AST
print(tex2plain(tex) == "DîacritiquÉs")
print(parsesong(song, AST))
print({
"@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"],
"@languages": set(["french"]),
"@path": "TODO",
"album": "Tagada tsoin ïtsoin",
"cov": "pouf.png",
}
)
Loading…
Cancel
Save