From a0c19d3b358dc5c681af73115b37eb371a9a1463 Mon Sep 17 00:00:00 2001 From: Luthaf Date: Mon, 6 Oct 2014 22:30:46 +0200 Subject: [PATCH] First minimalist chordpro lexer --- patacrep/chordpro/__init__.py | 0 patacrep/chordpro/lexer.py | 59 +++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 patacrep/chordpro/__init__.py create mode 100644 patacrep/chordpro/lexer.py diff --git a/patacrep/chordpro/__init__.py b/patacrep/chordpro/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/patacrep/chordpro/lexer.py b/patacrep/chordpro/lexer.py new file mode 100644 index 00000000..860e16d5 --- /dev/null +++ b/patacrep/chordpro/lexer.py @@ -0,0 +1,59 @@ +"""ChordPro lexer""" + +import logging +import ply.lex as lex + +LOGGER = logging.getLogger() + +#pylint: disable=invalid-name +tokens = ( + 'LBRACKET', + 'RBRACKET', + 'LBRACE', + 'RBRACE', + 'NEWLINE', + 'COLON', + 'WORD', + 'SPACE', + 'NUMBER' +) + +class ChordProLexer: + """ChordPro Lexer class""" + + tokens = tokens + + t_LBRACKET = r'\[' + t_RBRACKET = r'\]' + t_LBRACE = r'{' + t_RBRACE = r'}' + t_SPACE = r'[ \t]+' + t_COLON = r':' + t_WORD = r'[a-zA-Z_]+' #TODO: handle unicode + + def __init__(self): + self.__class__.lexer = lex.lex(module=self) + + # Define a rule so we can track line numbers + @staticmethod + def t_NEWLINE(token): + r'[\n\r]' + token.lexer.lineno += 1 + return token + + @staticmethod + def t_comment(token): + r'\#.*' + pass + + @staticmethod + def t_NUMBER(token): + r'[0-9]+' + token.value = int(token.value) + return token + + @staticmethod + def t_error(token): + """Manage errors""" + LOGGER.error("Illegal character '{}'".format(token.value[0])) + token.lexer.skip(1) \ No newline at end of file