mirror of https://github.com/patacrep/patacrep.git
Engine for LaTeX songbooks
http://www.patacrep.com
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
150 lines
3.5 KiB
150 lines
3.5 KiB
"""Very simple LaTeX lexer."""
|
|
|
|
import logging
|
|
import ply.lex as lex
|
|
|
|
LOGGER = logging.getLogger()
|
|
|
|
#pylint: disable=invalid-name
|
|
tokens = (
|
|
'LBRACKET',
|
|
'RBRACKET',
|
|
'LBRACE',
|
|
'RBRACE',
|
|
'COMMAND',
|
|
'ENDOFLINE',
|
|
'COMMA',
|
|
'EQUAL',
|
|
'CHARACTER',
|
|
'SPACE',
|
|
'BEGINSONG',
|
|
'SONG_LTITLE',
|
|
'SONG_RTITLE',
|
|
'SONG_LOPTIONS',
|
|
'SONG_ROPTIONS',
|
|
)
|
|
|
|
class SimpleLexer:
|
|
"""Very simple LaTeX lexer."""
|
|
|
|
tokens = tokens
|
|
|
|
t_LBRACKET = r'\['
|
|
t_RBRACKET = r'\]'
|
|
t_LBRACE = r'{'
|
|
t_RBRACE = r'}'
|
|
t_COMMAND = r'\\([@a-zA-Z]+|[^\\])'
|
|
t_ENDOFLINE = r'\\\\'
|
|
SPECIAL_CHARACTERS = (
|
|
t_LBRACKET +
|
|
t_RBRACKET +
|
|
t_RBRACE +
|
|
t_LBRACE +
|
|
r"\\" +
|
|
r" " +
|
|
r"\n" +
|
|
r"\r" +
|
|
r"%" +
|
|
r"=" +
|
|
r","
|
|
)
|
|
t_CHARACTER = r'[^{}]'.format(SPECIAL_CHARACTERS)
|
|
t_EQUAL = r'='
|
|
t_COMMA = r','
|
|
|
|
t_SPACE = r'[ \t\n\r]+'
|
|
|
|
def __init__(self):
|
|
self.__class__.lexer = lex.lex(module=self)
|
|
|
|
# Define a rule so we can track line numbers
|
|
@staticmethod
|
|
def t_endofline(token):
|
|
r'\n+'
|
|
token.lexer.lineno += len(token.value)
|
|
|
|
@staticmethod
|
|
def t_comment(token):
|
|
r'%.*'
|
|
pass
|
|
|
|
# Error handling rule
|
|
@staticmethod
|
|
def t_error(token):
|
|
"""Manage errors"""
|
|
LOGGER.error("Illegal character '{}'".format(token.value[0]))
|
|
token.lexer.skip(1)
|
|
|
|
class SongLexer(SimpleLexer):
|
|
r"""Very simple song lexer.
|
|
|
|
In the context of this class, a "song" is some LaTeX code containing the
|
|
``\beginsong`` (or ``\sortassong``) command.
|
|
"""
|
|
|
|
states = (
|
|
('beginsong', 'inclusive'),
|
|
)
|
|
|
|
# State beginsong
|
|
@staticmethod
|
|
def t_INITIAL_BEGINSONG(token):
|
|
r'(\\beginsong|\\sortassong)'
|
|
token.lexer.push_state('beginsong')
|
|
token.lexer.open_brackets = 0
|
|
token.lexer.open_braces = 0
|
|
return token
|
|
|
|
@staticmethod
|
|
def t_beginsong_LBRACKET(token):
|
|
r'\['
|
|
if token.lexer.open_brackets == 0:
|
|
token.type = 'SONG_LOPTIONS'
|
|
|
|
# Count opening and closing braces to know when to leave the
|
|
# `beginsong` state.
|
|
token.lexer.open_braces += 1
|
|
token.lexer.open_brackets += 1
|
|
return token
|
|
|
|
@staticmethod
|
|
def t_beginsong_RBRACKET(token):
|
|
r'\]'
|
|
token.lexer.open_brackets -= 1
|
|
if token.lexer.open_brackets == 0:
|
|
token.type = 'SONG_ROPTIONS'
|
|
token.lexer.open_braces -= 1
|
|
token.lexer.pop_state()
|
|
for __ignored in token.lexer:
|
|
# In this parser, we only want to read metadata. So, after the
|
|
# first ``\beginsong`` command, we can stop parsing.
|
|
pass
|
|
return token
|
|
|
|
@staticmethod
|
|
def t_beginsong_LBRACE(token):
|
|
r'{'
|
|
if token.lexer.open_braces == 0:
|
|
token.type = 'SONG_LTITLE'
|
|
token.lexer.open_braces += 1
|
|
return token
|
|
|
|
@staticmethod
|
|
def t_beginsong_RBRACE1(token):
|
|
r'}(?![ \t\r\n]*\[)'
|
|
token.lexer.open_braces -= 1
|
|
token.type = 'RBRACE'
|
|
if token.lexer.open_braces == 0:
|
|
token.lexer.pop_state()
|
|
token.type = 'SONG_RTITLE'
|
|
return token
|
|
|
|
@staticmethod
|
|
def t_beginsong_RBRACE2(token):
|
|
r'}(?=[ \t\r\n]*\[)'
|
|
token.lexer.open_braces -= 1
|
|
token.type = 'RBRACE'
|
|
if token.lexer.open_braces == 0:
|
|
token.type = 'SONG_RTITLE'
|
|
return token
|
|
|
|
|