Browse Source

Cleaning LaTeX parser

pull/66/head
Louis 10 years ago
parent
commit
d685da1244
  1. 12
      patacrep/latex/__init__.py
  2. 15
      patacrep/latex/ast.py
  3. 14
      patacrep/latex/detex.py
  4. 120
      patacrep/latex/lexer.py
  5. 72
      patacrep/latex/parsetab.py
  6. 202
      patacrep/latex/syntax.py
  7. 50
      patacrep/latex/testing.py

12
patacrep/latex/__init__.py

@ -1,12 +1,17 @@
# -*- coding: utf-8 -*-
"""Very simple LaTeX parser
This module uses an LALR parser to try to parse LaTeX code. LaTeX language
*cannot* be parsed by an LALR parser, so this is a very simple attemps, which
will work on simple cases, but not on complex ones.
"""
from patacrep.latex.syntax import tex2plain as syntax_tex2plain
from patacrep.latex.syntax import parsesong as syntax_parsesong
from patacrep.latex.detex import detex
from patacrep import encoding
"""Very simple LaTeX parser"""
def tex2plain(string):
"""Render LaTeX string
@ -17,9 +22,6 @@ def tex2plain(string):
def parsesong(path):
"""Return a dictonary of data read from the latex file `path`.
This file is a drop in replacement for an old function. Elle ne devrait pas
apparaitre telle quelle dans la version finale, une fois que
https://github.com/patacrep/patacrep/issues/64 aura été pris en compte.
"""
data = syntax_parsesong(encoding.open_read(path).read(), path)
data['@path'] = path

15
patacrep/latex/ast.py

@ -1,20 +1,33 @@
"""Abstract Syntax Tree for LaTeX code."""
# pylint: disable=too-few-public-methods
class AST:
"""Base class for the tree."""
# pylint: disable=no-init
metadata = None
@classmethod
def init_metadata(cls):
"""Clear metadata
As this attribute is a class attribute, it as to be reset at each new
parsing.
"""
cls.metadata = {
'@languages': set(),
}
class Expression(AST):
"""LaTeX expression"""
def __init__(self, value):
super().__init__()
self.content = [value]
def prepend(self, value):
"""Add a value at the beginning of the content list."""
if value is not None:
self.content.insert(0, value)
return self
@ -23,6 +36,7 @@ class Expression(AST):
return "".join([str(item) for item in self.content])
class Command(AST):
"""LaTeX command"""
def __init__(self, name, optional, mandatory):
self.name = name
@ -43,6 +57,7 @@ class Command(AST):
class BeginSong(AST):
"""Beginsong command"""
def __init__(self, titles, arguments):
self.titles = titles

14
patacrep/latex/detex.py

@ -1,3 +1,9 @@
"""Render `very simple` TeX commands in a simple TeX code."""
import logging
LOGGER = logging.getLogger()
MATCH = [
# Diacritics: a
(r"\'a", "á"),
@ -85,6 +91,12 @@ MATCH = [
def detex(arg):
"""Render very simple TeX commands from argument.
Argument can be:
- a string: it is processed;
- a list, dict or set: its values are processed.
"""
if isinstance(arg, dict):
return dict([
(key, detex(value))
@ -104,7 +116,7 @@ def detex(arg):
for (latex, plain) in MATCH:
string = string.replace(latex, plain)
if '\\' in string:
print("WARNING: Remaining command in string '{}'.".format(string))
LOGGER.warning("Remaining command in string '{}'.".format(string))
return string.strip()
else:
return detex(str(arg))

120
patacrep/latex/lexer.py

@ -1,5 +1,11 @@
"""Very simple LaTeX lexer."""
import logging
import ply.lex as lex
LOGGER = logging.getLogger()
#pylint: disable=invalid-name
tokens = (
'LBRACKET',
'RBRACKET',
@ -19,10 +25,10 @@ tokens = (
)
class SimpleLexer:
"""Very simple LaTeX lexer."""
tokens = tokens
# Regular expression rules for simple tokens
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'{'
@ -52,73 +58,93 @@ class SimpleLexer:
self.__class__.lexer = lex.lex(module=self)
# Define a rule so we can track line numbers
def t_newline(self, t):
@staticmethod
def t_newline(token):
r'\n+'
t.lexer.lineno += len(t.value)
token.lexer.lineno += len(token.value)
def t_comment(self, t):
@staticmethod
def t_comment(token):
r'%.*'
pass
# Error handling rule
def t_error(self, t):
print("Illegal character '%s'" % t.value[0]) # TODO log
t.lexer.skip(1)
@staticmethod
def t_error(token):
"""Manage errors"""
LOGGER.error("Illegal character '{}'".format(token.value[0]))
token.lexer.skip(1)
class SongLexer(SimpleLexer):
r"""Very simple song lexer.
In the context of this class, a "song" is some LaTeX code containing the
``\beginsong`` (or ``\sortassong``) command.
"""
states = (
('beginsong', 'inclusive'),
)
# State beginsong
def t_INITIAL_BEGINSONG(self, t):
r'\\beginsong'
t.lexer.push_state('beginsong')
t.lexer.open_brackets = 0
t.lexer.open_braces = 0
return t
def t_beginsong_LBRACKET(self, t):
@staticmethod
def t_INITIAL_BEGINSONG(token):
r'(\\beginsong|\\sortassong)'
token.lexer.push_state('beginsong')
token.lexer.open_brackets = 0
token.lexer.open_braces = 0
return token
@staticmethod
def t_beginsong_LBRACKET(token):
r'\['
if t.lexer.open_brackets == 0:
t.type = 'SONG_LOPTIONS'
t.lexer.open_braces += 1 # TODO Explain
t.lexer.open_brackets += 1
return t
if token.lexer.open_brackets == 0:
token.type = 'SONG_LOPTIONS'
# Count opening and closing braces to know when to leave the
# `beginsong` state.
token.lexer.open_braces += 1
token.lexer.open_brackets += 1
return token
def t_beginsong_RBRACKET(self, t):
@staticmethod
def t_beginsong_RBRACKET(token):
r'\]'
t.lexer.open_brackets -= 1
if t.lexer.open_brackets == 0:
t.type = 'SONG_ROPTIONS'
t.lexer.open_braces -= 1 # TODO Explain
t.lexer.pop_state()
for __ignored in t.lexer: # TODO Explain
token.lexer.open_brackets -= 1
if token.lexer.open_brackets == 0:
token.type = 'SONG_ROPTIONS'
token.lexer.open_braces -= 1
token.lexer.pop_state()
for __ignored in token.lexer:
# In this parser, we only want to read metadata. So, after the
# first ``\beginsong`` command, we can stop parsing.
pass
return t
return token
def t_beginsong_LBRACE(self, t):
@staticmethod
def t_beginsong_LBRACE(token):
r'{'
if t.lexer.open_braces == 0:
t.type = 'SONG_LTITLE'
t.lexer.open_braces += 1
return t
if token.lexer.open_braces == 0:
token.type = 'SONG_LTITLE'
token.lexer.open_braces += 1
return token
def t_beginsong_RBRACE1(self, t):
@staticmethod
def t_beginsong_RBRACE1(token):
r'}(?![ \t\r\n]*\[)'
t.lexer.open_braces -= 1
t.type = 'RBRACE'
if t.lexer.open_braces == 0:
t.lexer.pop_state()
t.type = 'SONG_RTITLE'
return t
def t_beginsong_RBRACE2(self, t):
token.lexer.open_braces -= 1
token.type = 'RBRACE'
if token.lexer.open_braces == 0:
token.lexer.pop_state()
token.type = 'SONG_RTITLE'
return token
@staticmethod
def t_beginsong_RBRACE2(token):
r'}(?=[ \t\r\n]*\[)'
t.lexer.open_braces -= 1
t.type = 'RBRACE'
if t.lexer.open_braces == 0:
t.type = 'SONG_RTITLE'
return t
token.lexer.open_braces -= 1
token.type = 'RBRACE'
if token.lexer.open_braces == 0:
token.type = 'SONG_RTITLE'
return token

72
patacrep/latex/parsetab.py

@ -1,72 +0,0 @@
# parsetab.py
# This file is automatically generated. Do not edit.
_tabversion = '3.2'
_lr_method = 'LALR'
_lr_signature = b'\xa4\n\x7f%6\xc5\x1d\x1cV\xd9V\xf5\x07\xd5\x10\x10'
_lr_action_items = {'LBRACKET':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[9,9,9,9,9,9,-9,9,9,9,9,-9,9,-9,-9,-17,-21,-9,-9,-14,9,-19,-26,9,-9,-18,-20,-10,-16,-9,-12,-13,-11,9,9,9,9,9,-24,-22,-15,-25,9,-23,]),'SONG_ROPTIONS':([43,66,74,75,76,79,80,84,],[-11,70,-9,-9,-34,-28,-27,-33,]),'$end':([0,1,2,3,4,5,6,7,8,10,11,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-9,0,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SPACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,81,],[10,10,10,10,10,10,-9,10,10,-9,10,-9,10,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,50,-9,-18,-20,-10,-16,-9,-12,-13,-11,50,50,50,50,50,-24,-22,-15,-25,50,-23,77,77,77,]),'COMMAND':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,],[11,11,11,11,11,11,-9,11,11,-9,11,-9,11,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,11,-9,-18,-20,-10,-16,-9,-12,-13,-11,11,11,11,11,11,-24,-22,-15,-25,11,-23,]),'RBRACKET':([1,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,38,-7,-9,-14,-9,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'LBRACE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,71,],[12,12,12,12,12,12,-9,12,12,-9,12,-9,12,-9,-9,-17,-21,-9,12,-14,-9,-19,-26,12,-9,-18,-20,-10,-16,12,-12,-13,-11,12,12,12,12,12,-24,-22,-15,-25,12,-23,12,]),'SONG_LOPTIONS':([15,33,35,60,],[-9,-26,54,-25,]),'CHARACTER':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,68,70,77,81,82,83,],[8,8,8,8,8,8,24,8,8,-9,8,24,8,-9,24,-17,-21,24,-9,-14,-9,-19,-26,8,-9,-18,-20,-10,-16,-9,-12,-13,-11,8,8,8,8,8,-24,-22,68,-15,-25,8,68,-23,-31,-9,68,-32,]),'NEWLINE':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,50,51,52,53,55,56,57,58,59,60,61,62,69,70,],[7,7,7,7,7,7,-9,7,7,-9,7,-9,7,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,-9,62,-24,-22,-15,-41,-40,-38,-39,-25,-42,-9,62,-23,]),'error':([71,],[74,]),'EQUAL':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,54,55,60,62,65,67,68,70,72,77,81,82,83,],[13,13,13,13,13,13,-9,13,13,-9,13,-9,13,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,13,-9,-18,-20,-10,-16,-9,-12,-13,-11,13,13,13,13,13,-24,-22,-9,-15,-25,13,-30,71,-9,-23,-29,-31,-9,-9,-32,]),'COMMA':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,50,52,53,55,60,62,70,74,75,76,77,78,],[16,16,16,16,16,16,-9,16,16,-9,16,-9,16,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,16,-9,-18,-20,-10,-16,-9,-12,-13,-11,16,16,16,16,16,-24,-22,-15,-25,16,-23,-9,-9,-32,-31,81,]),'RBRACE':([1,3,4,5,6,7,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[-9,-8,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-6,-3,-5,-1,-4,-17,-21,-9,-7,-9,-14,-9,43,-19,-2,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_LTITLE':([15,],[34,]),'BEGINSONG':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,22,23,24,27,28,29,31,33,35,36,37,38,39,40,41,42,43,52,53,55,60,70,],[15,15,15,15,15,15,-9,15,15,-9,15,-9,15,-9,-9,-17,-21,-9,-9,-14,-9,-19,-26,-9,-18,-20,-10,-16,-9,-12,-13,-11,-24,-22,-15,-25,-23,]),'SONG_RTITLE':([8,11,13,16,22,23,24,27,28,29,31,34,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,55,56,57,58,59,61,62,63,64,69,73,],[-9,-9,-9,-9,-17,-21,-9,-9,-14,-9,-19,-9,-18,-20,-10,-16,-9,-12,-13,-11,-9,-43,-9,-9,-9,60,-9,-9,-15,-41,-40,-38,-39,-42,-9,-37,-35,-9,-36,]),}
_lr_action = { }
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = { }
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'word':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[1,1,1,1,1,1,1,1,1,1,44,44,44,44,44,44,44,]),'expression':([0,1,4,5,6,7,9,10,12,14,],[2,17,18,19,20,21,25,26,30,32,]),'dictionary_next':([74,75,],[79,80,]),'empty':([0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,24,27,29,34,35,40,44,46,47,48,50,51,54,62,68,69,74,75,81,82,],[3,3,3,3,3,3,23,3,3,28,3,23,3,33,23,23,39,28,45,52,39,45,45,45,45,45,63,65,45,65,63,76,76,83,65,]),'dictionary':([54,82,],[66,84,]),'songbrackets':([35,],[53,]),'command':([0,1,4,5,6,7,9,10,12,14,34,44,46,47,48,50,62,],[4,4,4,4,4,4,4,4,4,4,46,46,46,46,46,46,46,]),'identifier':([54,68,82,],[67,72,67,]),'songbraces':([15,],[35,]),'brackets_list':([11,29,],[27,42,]),'brackets':([0,1,4,5,6,7,9,10,11,12,14,29,34,44,46,47,48,50,62,],[6,6,6,6,6,6,6,6,29,6,6,29,47,47,47,47,47,47,47,]),'word_next':([8,13,16,24,],[22,31,36,37,]),'beginsong':([0,1,4,5,6,7,9,10,12,14,],[5,5,5,5,5,5,5,5,5,5,]),'title':([34,44,46,47,48,50,62,],[51,56,57,58,59,61,69,]),'titles_next':([51,69,],[64,73,]),'braces':([0,1,4,5,6,7,9,10,12,14,27,34,40,44,46,47,48,50,62,71,],[14,14,14,14,14,14,14,14,14,14,40,48,40,48,48,48,48,48,48,75,]),'separator':([74,75,81,],[78,78,82,]),'titles':([34,],[49,]),'braces_list':([27,40,],[41,55,]),}
_lr_goto = { }
for _k, _v in _lr_goto_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_goto: _lr_goto[_x] = { }
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> expression","S'",1,None,None,None),
('expression -> brackets expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',8),
('expression -> braces expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',9),
('expression -> command expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',10),
('expression -> NEWLINE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',11),
('expression -> beginsong expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',12),
('expression -> word expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',13),
('expression -> SPACE expression','expression',2,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',14),
('expression -> empty','expression',1,'p_expression','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',15),
('empty -> <empty>','empty',0,'p_empty','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',26),
('brackets -> LBRACKET expression RBRACKET','brackets',3,'p_brackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',30),
('braces -> LBRACE expression RBRACE','braces',3,'p_braces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',34),
('command -> COMMAND brackets_list braces_list','command',3,'p_command','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',38),
('brackets_list -> brackets brackets_list','brackets_list',2,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',42),
('brackets_list -> empty','brackets_list',1,'p_brackets_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',43),
('braces_list -> braces braces_list','braces_list',2,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',52),
('braces_list -> empty','braces_list',1,'p_braces_list','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',53),
('word -> CHARACTER word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',62),
('word -> COMMA word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',63),
('word -> EQUAL word_next','word',2,'p_word','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',64),
('word_next -> CHARACTER word_next','word_next',2,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',69),
('word_next -> empty','word_next',1,'p_word_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',70),
('beginsong -> BEGINSONG songbraces songbrackets','beginsong',3,'p_beginsong','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',78),
('songbrackets -> SONG_LOPTIONS dictionary SONG_ROPTIONS','songbrackets',3,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',82),
('songbrackets -> empty','songbrackets',1,'p_songbrackets','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',83),
('songbraces -> SONG_LTITLE titles SONG_RTITLE','songbraces',3,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',91),
('songbraces -> empty','songbraces',1,'p_songbraces','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',92),
('dictionary -> identifier EQUAL braces dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',100),
('dictionary -> identifier EQUAL error dictionary_next','dictionary',4,'p_dictionary','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',101),
('identifier -> CHARACTER identifier','identifier',2,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',111),
('identifier -> empty','identifier',1,'p_identifier','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',112),
('separator -> SPACE','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',120),
('separator -> empty','separator',1,'p_separator','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',121),
('dictionary_next -> separator COMMA separator dictionary','dictionary_next',4,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',126),
('dictionary_next -> empty','dictionary_next',1,'p_dictonary_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',127),
('titles -> title titles_next','titles',2,'p_titles','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',135),
('titles_next -> NEWLINE title titles_next','titles_next',3,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',139),
('titles_next -> empty','titles_next',1,'p_titles_next','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',140),
('title -> brackets title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',148),
('title -> braces title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',149),
('title -> command title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',150),
('title -> word title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',151),
('title -> SPACE title','title',2,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',152),
('title -> empty','title',1,'p_title','/home/louis/projets/patacrep/patacrep/patacrep/latex/syntax.py',153),
]

202
patacrep/latex/syntax.py

@ -1,11 +1,28 @@
"""Very simple LaTeX parser"""
import logging
import ply.yacc as yacc
import inspect # TODO supprimer
from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer
from patacrep.latex import ast
from patacrep.errors import SongbookError
from patacrep.latex.detex import detex
LOGGER = logging.getLogger()
class ParsingError(SongbookError):
"""Parsing error."""
def __init__(self, message):
super().__init__(self)
self.message = message
def __str__(self):
return self.message
# pylint: disable=line-too-long
class Parser:
"""LaTeX parser."""
def __init__(self, filename=None):
self.tokens = tokens
@ -13,22 +30,26 @@ class Parser:
self.ast.init_metadata()
self.filename = filename
def __find_column(self, token):
@staticmethod
def __find_column(token):
"""Return the column of ``token``."""
last_cr = token.lexer.lexdata.rfind('\n', 0, token.lexpos)
if last_cr < 0:
last_cr = 0
column = (token.lexpos - last_cr) + 1
return column
def p_error(self, p):
print("Erreur fichier {}, ligne {}, position {}.".format( # TODO
def p_error(self, token):
"""Manage parsing errors."""
LOGGER.error("Erreur fichier {}, ligne {}, position {}.".format(
str(self.filename),
p.lineno,
self.__find_column(p),
token.lineno,
self.__find_column(token),
)
)
def p_expression(self, p):
@staticmethod
def p_expression(symbols):
"""expression : brackets expression
| braces expression
| command expression
@ -38,138 +59,155 @@ class Parser:
| SPACE expression
| empty
"""
if len(p) == 3:
if p[2] is None:
p[0] = ast.Expression(p[1])
if len(symbols) == 3:
if symbols[2] is None:
symbols[0] = ast.Expression(symbols[1])
else:
p[0] = p[2].prepend(p[1])
symbols[0] = symbols[2].prepend(symbols[1])
else:
p[0] = None
symbols[0] = None
def p_empty(self, p):
@staticmethod
def p_empty(__symbols):
"""empty :"""
return None
def p_brackets(self, p):
@staticmethod
def p_brackets(symbols):
"""brackets : LBRACKET expression RBRACKET"""
p[0] = p[2]
symbols[0] = symbols[2]
def p_braces(self, p):
@staticmethod
def p_braces(symbols):
"""braces : LBRACE expression RBRACE"""
p[0] = p[2]
symbols[0] = symbols[2]
def p_command(self, p):
@staticmethod
def p_command(symbols):
"""command : COMMAND brackets_list braces_list"""
p[0] = ast.Command(p[1], p[2], p[3])
symbols[0] = ast.Command(symbols[1], symbols[2], symbols[3])
def p_brackets_list(self, p):
@staticmethod
def p_brackets_list(symbols):
"""brackets_list : brackets brackets_list
| empty
"""
if len(p) == 3:
p[0] = p[2]
p[0].insert(0, p[1])
if len(symbols) == 3:
symbols[0] = symbols[2]
symbols[0].insert(0, symbols[1])
else:
p[0] = []
symbols[0] = []
def p_braces_list(self, p):
@staticmethod
def p_braces_list(symbols):
"""braces_list : braces braces_list
| empty
"""
if len(p) == 3:
p[0] = p[2]
p[0].insert(0, p[1])
if len(symbols) == 3:
symbols[0] = symbols[2]
symbols[0].insert(0, symbols[1])
else:
p[0] = []
symbols[0] = []
def p_word(self, p):
@staticmethod
def p_word(symbols):
"""word : CHARACTER word_next
| COMMA word_next
| EQUAL word_next
"""
p[0] = p[1] + p[2]
symbols[0] = symbols[1] + symbols[2]
def p_word_next(self, p):
@staticmethod
def p_word_next(symbols):
"""word_next : CHARACTER word_next
| empty
"""
if len(p) == 2:
p[0] = ""
if len(symbols) == 2:
symbols[0] = ""
else:
p[0] = p[1] + p[2]
symbols[0] = symbols[1] + symbols[2]
def p_beginsong(self, p):
def p_beginsong(self, symbols):
"""beginsong : BEGINSONG separator songbraces separator songbrackets"""
self.ast.metadata["@titles"] = p[3]
self.ast.metadata.update(p[5])
self.ast.metadata["@titles"] = symbols[3]
self.ast.metadata.update(symbols[5])
def p_songbrackets(self, p):
@staticmethod
def p_songbrackets(symbols):
"""songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS
| empty
"""
if len(p) == 6:
p[0] = p[3]
if len(symbols) == 6:
symbols[0] = symbols[3]
else:
p[0] = {}
symbols[0] = {}
def p_songbraces(self, p):
@staticmethod
def p_songbraces(symbols):
"""songbraces : SONG_LTITLE separator titles separator SONG_RTITLE
| empty
"""
if len(p) == 6:
p[0] = p[3]
if len(symbols) == 6:
symbols[0] = symbols[3]
else:
p[0] = []
symbols[0] = []
def p_dictionary(self, p):
@staticmethod
def p_dictionary(symbols):
"""dictionary : identifier EQUAL braces dictionary_next
| identifier EQUAL error dictionary_next
"""
if isinstance(p[3], ast.Expression):
p[0] = {}
p[0][p[1]] = p[3]
p[0].update(p[4])
if isinstance(symbols[3], ast.Expression):
symbols[0] = {}
symbols[0][symbols[1]] = symbols[3]
symbols[0].update(symbols[4])
else:
raise Exception("Do enclose arguments between braces.") # TODO
raise ParsingError("Do enclose arguments between braces.")
def p_identifier(self, p):
@staticmethod
def p_identifier(symbols):
"""identifier : CHARACTER identifier
| empty
"""
if len(p) == 2:
p[0] = ""
if len(symbols) == 2:
symbols[0] = ""
else:
p[0] = p[1] + p[2]
symbols[0] = symbols[1] + symbols[2]
def p_separator(self, p):
@staticmethod
def p_separator(symbols):
"""separator : SPACE
| empty
"""
p[0] = None
symbols[0] = None
def p_dictonary_next(self, p):
@staticmethod
def p_dictonary_next(symbols):
"""dictionary_next : separator COMMA separator dictionary
| empty
"""
if len(p) == 5:
p[0] = p[4]
if len(symbols) == 5:
symbols[0] = symbols[4]
else:
p[0] = {}
symbols[0] = {}
def p_titles(self, p):
@staticmethod
def p_titles(symbols):
"""titles : title titles_next"""
p[0] = [p[1]] + p[2]
symbols[0] = [symbols[1]] + symbols[2]
def p_titles_next(self, p):
@staticmethod
def p_titles_next(symbols):
"""titles_next : NEWLINE title titles_next
| empty
"""
if len(p) == 2:
p[0] = []
if len(symbols) == 2:
symbols[0] = []
else:
p[0] = [p[2]] + p[3]
symbols[0] = [symbols[2]] + symbols[3]
def p_title(self, p):
@staticmethod
def p_title(symbols):
"""title : brackets title
| braces title
| command title
@ -177,18 +215,30 @@ class Parser:
| SPACE title
| empty
"""
if len(p) == 2:
p[0] = None
if len(symbols) == 2:
symbols[0] = None
else:
if p[2] is None:
p[0] = ast.Expression(p[1])
if symbols[2] is None:
symbols[0] = ast.Expression(symbols[1])
else:
p[0] = p[2].prepend(p[1])
symbols[0] = symbols[2].prepend(symbols[1])
def tex2plain(string):
return detex(yacc.yacc(module = Parser()).parse(string, lexer = SimpleLexer().lexer))
"""Parse string and return its plain text version."""
return detex(
yacc.yacc(module=Parser()).parse(
string,
lexer=SimpleLexer().lexer,
)
)
def parsesong(string, filename=None):
return detex(yacc.yacc(module = Parser(filename)).parse(string, lexer = SongLexer().lexer).metadata)
"""Parse song and return its metadata."""
return detex(
yacc.yacc(module=Parser(filename)).parse(
string,
lexer=SongLexer().lexer,
).metadata
)

50
patacrep/latex/testing.py

@ -1,50 +0,0 @@
# Test it out
song = r"""
\selectlanguage{french}
plop = tag
% Un commentaire
\columns{3}
\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}, cov={pouf.png}] % un autre
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}[album={Tagada tsoin \"itsoin}] % un autre
%\beginsong{Titre un \\Titre deux \\ Tître t{ro}ïs \\ T\^itre quatre}
Dans [Dm6]cette ruedots [E7]
"""
isong = r"""
\selectlanguage{french}
\songcolumns{2}
\beginsong{Tous les bateaux, tous les oiseaux}
[by={Michel Polnareff},cov={passe-present},album={Passé Présent}]
Dans \[Dm6]cette ruedots [E7]
"""
tex = "D\\^iacritiqu\\'Es"
# Give the lexer some input
#if 0:
# from syntax import parser
# print(parser.parse(data, debug=0))
# print(parser.parse(data).song_data())
#else:
# from lexer import SimpleLexer
# lexer.input(data)
# for tok in lexer:
# print(tok)
from patacrep.latex import tex2plain
from patacrep.latex.syntax import parsesong
from patacrep.latex.ast import AST
print(tex2plain(tex) == "DîacritiquÉs")
print(parsesong(song, AST))
print({
"@titles": ["Titre un", "Titre deux", "Tître trois", "Tpitre quatre"],
"@languages": set(["french"]),
"@path": "TODO",
"album": "Tagada tsoin ïtsoin",
"cov": "pouf.png",
}
)
Loading…
Cancel
Save