diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..bca3a82b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include LICENSE NEWS readme.md Requirements.txt stdeb.cfg +recursive-include patacrep/data * diff --git a/Requirements.txt b/Requirements.txt index 9678473f..4aa4a442 100644 --- a/Requirements.txt +++ b/Requirements.txt @@ -2,4 +2,3 @@ Jinja2==2.7.3 argparse==1.2.1 chardet==2.2.1 unidecode>=0.04.16 -https://github.com/tiarno/plastex/archive/master.zip \ No newline at end of file diff --git a/patacrep/build.py b/patacrep/build.py index c3a414d4..be9b48f4 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -9,7 +9,7 @@ import logging import os.path from subprocess import Popen, PIPE, call -from patacrep import __DATADIR__, authors, content, errors +from patacrep import __DATADIR__, authors, content, errors, files from patacrep.index import process_sxd from patacrep.templates import TexRenderer from patacrep.songs import DataSubpath @@ -50,14 +50,13 @@ class Songbook(object): super(Songbook, self).__init__() self.config = raw_songbook self.basename = basename - self.contentlist = [] # Some special keys have their value processed. self._set_datadir() def _set_datadir(self): """Set the default values for datadir""" try: - if isinstance(self.config['datadir'], basestring): + if isinstance(self.config['datadir'], str): self.config['datadir'] = [self.config['datadir']] except KeyError: # No datadir in the raw_songbook self.config['datadir'] = [os.path.abspath('.')] @@ -86,7 +85,7 @@ class Songbook(object): - output: a file object, in which the file will be written. """ # Updating configuration - config = DEFAULT_CONFIG + config = DEFAULT_CONFIG.copy() config.update(self.config) renderer = TexRenderer( config['template'], @@ -100,18 +99,33 @@ class Songbook(object): copy.deepcopy(config['authwords']) ) - self.config = config - # Configuration set + # Loading custom plugins + config['_content_plugins'] = files.load_plugins( + datadirs=config.get('datadir', []), + subdir=['content'], + variable='CONTENT_PLUGINS', + error=( + "File {filename}: Keyword '{keyword}' is already used. Ignored." + ), + ) + config['_file_plugins'] = files.load_plugins( + datadirs=config.get('datadir', []), + subdir=['songs'], + variable='FILE_PLUGINS', + error=( + "File {filename}: Keyword '{keyword}' is already used. Ignored." + ), + ) - self.contentlist = content.process_content( - self.config.get('content', []), - self.config, + # Configuration set + config['render_content'] = content.render_content + config['content'] = content.process_content( + config.get('content', []), + config, ) - self.config['render_content'] = content.render_content - self.config['content'] = self.contentlist - self.config['filename'] = output.name[:-4] + config['filename'] = output.name[:-4] - renderer.render_tex(output, self.config) + renderer.render_tex(output, config) class SongbookBuilder(object): @@ -213,7 +227,7 @@ class SongbookBuilder(object): log = '' line = process.stdout.readline() while line: - log += line + log += str(line) line = process.stdout.readline() LOGGER.debug(log) diff --git a/patacrep/content/__init__.py b/patacrep/content/__init__.py index 2f4fea0e..5214a55b 100755 --- a/patacrep/content/__init__.py +++ b/patacrep/content/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Content plugin management. @@ -69,7 +69,6 @@ More documentation in the docstring of Content. """ import glob -import importlib import jinja2 import logging import os @@ -134,53 +133,6 @@ class ContentError(SongbookError): def __str__(self): return "Content: {}: {}".format(self.keyword, self.message) -def load_plugins(config): - """Load all content plugins, and return a dictionary of those plugins. - - Return value: a dictionary where: - - keys are the keywords ; - - values are functions triggered when this keyword is met. - """ - plugins = {} - directory_list = ( - [ - os.path.join(datadir, "python", "content") - for datadir in config.get('datadir', []) - ] - + [os.path.dirname(__file__)] - ) - for directory in directory_list: - if not os.path.exists(directory): - LOGGER.debug( - "Ignoring non-existent directory '%s'.", - directory - ) - continue - sys.path.append(directory) - for name in glob.glob(os.path.join(directory, '*.py')): - if name.endswith(".py") and os.path.basename(name) != "__init__.py": - if directory == os.path.dirname(__file__): - plugin = importlib.import_module( - 'patacrep.content.{}'.format( - os.path.basename(name[:-len('.py')]) - ) - ) - else: - plugin = importlib.import_module( - os.path.basename(name[:-len('.py')]) - ) - for (key, value) in plugin.CONTENT_PLUGINS.items(): - if key in plugins: - LOGGER.warning( - "File %s: Keyword '%s' is already used. Ignored.", - files.relpath(name), - key, - ) - continue - plugins[key] = value - del sys.path[-1] - return plugins - @jinja2.contextfunction def render_content(context, content): """Render the content of the songbook as a LaTeX code. @@ -224,12 +176,13 @@ def process_content(content, config=None): included in the .tex file. """ contentlist = [] - plugins = load_plugins(config) - keyword_re = re.compile(ur'^ *(?P\w*) *(\((?P.*)\))? *$') + plugins = config.get('_content_plugins', {}) + + keyword_re = re.compile(r'^ *(?P\w*) *(\((?P.*)\))? *$') if not content: content = [["song"]] for elem in content: - if isinstance(elem, basestring): + if isinstance(elem, str): elem = ["song", elem] if len(content) == 0: content = ["song"] diff --git a/patacrep/content/cwd.py b/patacrep/content/cwd.py index 338adb76..5e55d68d 100755 --- a/patacrep/content/cwd.py +++ b/patacrep/content/cwd.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Change base directory before importing songs.""" diff --git a/patacrep/content/section.py b/patacrep/content/section.py index 2bde2eb2..96215f68 100755 --- a/patacrep/content/section.py +++ b/patacrep/content/section.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Allow LaTeX sections (starred or not) as content of a songbook.""" @@ -26,9 +26,9 @@ class Section(Content): def render(self, __context): if self.short is None: - return ur'\{}{{{}}}'.format(self.keyword, self.name) + return r'\{}{{{}}}'.format(self.keyword, self.name) else: - return ur'\{}[{}]{{{}}}'.format(self.keyword, self.short, self.name) + return r'\{}[{}]{{{}}}'.format(self.keyword, self.short, self.name) #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): diff --git a/patacrep/content/song.py b/patacrep/content/song.py index b83fd134..50cb7349 100755 --- a/patacrep/content/song.py +++ b/patacrep/content/song.py @@ -1,45 +1,18 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Plugin to include songs to the songbook.""" import glob -import jinja2 import logging import os -from patacrep.content import Content, process_content, ContentError +from patacrep.content import process_content, ContentError from patacrep import files, errors from patacrep.songs import Song LOGGER = logging.getLogger(__name__) -class SongRenderer(Content, Song): - """Render a song in the .tex file.""" - - def begin_new_block(self, previous, __context): - """Return a boolean stating if a new block is to be created.""" - return not isinstance(previous, SongRenderer) - - def begin_block(self, context): - """Return the string to begin a block.""" - indexes = context.resolve("indexes") - if isinstance(indexes, jinja2.runtime.Undefined): - indexes = "" - return ur'\begin{songs}{%s}' % indexes - - def end_block(self, __context): - """Return the string to end a block.""" - return ur'\end{songs}' - - def render(self, context): - """Return the string that will render the song.""" - return ur'\input{{{}}}'.format(files.path2posix( - files.relpath( - self.fullpath, - os.path.dirname(context['filename']) - ))) - #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): """Parse data associated with keyword 'song'. @@ -51,22 +24,17 @@ def parse(keyword, argument, contentlist, config): expressions (interpreted using the glob module), referring to songs. - config: the current songbook configuration dictionary. - Return a list of SongRenderer() instances. + Return a list of Song() instances. """ if '_languages' not in config: config['_languages'] = set() songlist = [] + plugins = config.get('_file_plugins', {}) for songdir in config['_songdir']: if contentlist: break - contentlist = [ - filename - for filename - in ( - files.recursive_find(songdir.fullpath, "*.sg") - + files.recursive_find(songdir.fullpath, "*.is") - ) - ] + contentlist = files.recursive_find(songdir.fullpath, plugins.keys()) + for elem in contentlist: before = len(songlist) for songdir in config['_songdir']: @@ -74,21 +42,16 @@ def parse(keyword, argument, contentlist, config): continue with files.chdir(songdir.datadir): for filename in glob.iglob(os.path.join(songdir.subpath, elem)): - if not ( - filename.endswith('.sg') or - filename.endswith('.is') - ): + LOGGER.debug('Parsing file "{}"…'.format(filename)) + try: + renderer = plugins[filename.split('.')[-1]] + except KeyError: LOGGER.warning(( - 'File "{}" is not a ".sg" or ".is" file. Ignored.' + 'I do not know how to parse file "{}". Ignored.' ).format(os.path.join(songdir.datadir, filename)) ) continue - LOGGER.debug('Parsing file "{}"…'.format(filename)) - song = SongRenderer( - songdir.datadir, - filename, - config, - ) + song = renderer(songdir.datadir, filename, config) songlist.append(song) config["_languages"].update(song.languages) if len(songlist) > before: @@ -129,7 +92,7 @@ def process_songs(content, config=None): item for item in contentlist - if not isinstance(item, SongRenderer) + if not isinstance(item, Song) ] if not_songs: raise OnlySongsError(not_songs) diff --git a/patacrep/content/songsection.py b/patacrep/content/songsection.py index b4c9d446..07153591 100755 --- a/patacrep/content/songsection.py +++ b/patacrep/content/songsection.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Allow 'songchapter' and 'songsection' as content of a songbook.""" @@ -19,7 +19,7 @@ class SongSection(Content): def render(self, __context): """Render this section or chapter.""" - return ur'\{}{{{}}}'.format(self.keyword, self.name) + return r'\{}{{{}}}'.format(self.keyword, self.name) #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): diff --git a/patacrep/content/sorted.py b/patacrep/content/sorted.py index e9a5e677..f95065d6 100755 --- a/patacrep/content/sorted.py +++ b/patacrep/content/sorted.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Sorted list of songs. @@ -9,9 +9,9 @@ to a songbook. import locale import logging +import unidecode from patacrep import files -from patacrep import encoding from patacrep.content import ContentError from patacrep.content.song import OnlySongsError, process_songs @@ -27,11 +27,11 @@ def normalize_string(string): - lower case; - passed through locale.strxfrm(). """ - return locale.strxfrm(encoding.unidecode(string.lower().strip())) + return locale.strxfrm(unidecode.unidecode(string.lower().strip())) def normalize_field(field): """Return a normalized field, it being a string or a list of strings.""" - if isinstance(field, basestring): + if isinstance(field, str): return normalize_string(field) elif isinstance(field, list) or isinstance(field, tuple): return [normalize_field(string) for string in field] @@ -55,7 +55,7 @@ def key_generator(sort): field = song.authors else: try: - field = song.args[key] + field = song.data[key] except KeyError: LOGGER.debug( "Ignoring unknown key '{}' for song {}.".format( @@ -63,7 +63,7 @@ def key_generator(sort): files.relpath(song.fullpath), ) ) - field = u"" + field = "" songkey.append(normalize_field(field)) return songkey return ordered_song_keys diff --git a/patacrep/content/tex.py b/patacrep/content/tex.py index 5f80fcfc..38593f38 100755 --- a/patacrep/content/tex.py +++ b/patacrep/content/tex.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Include LaTeX raw code in the songbook.""" @@ -18,7 +18,7 @@ class LaTeX(Content): self.filename = filename def render(self, context): - return ur'\input{{{}}}'.format(files.relpath( + return r'\input{{{}}}'.format(files.relpath( self.filename, os.path.dirname(context['filename']), )) diff --git a/patacrep/data/examples/example-all.sb b/patacrep/data/examples/example-all.sb index 327ed801..0b1252aa 100644 --- a/patacrep/data/examples/example-all.sb +++ b/patacrep/data/examples/example-all.sb @@ -10,5 +10,7 @@ "authwords" : { "sep" : ["and", "et"] }, - "datadir" : "." + "datadir" : ".", + "content": [["sorted"]] + } diff --git a/patacrep/data/examples/songs/intersong.is b/patacrep/data/examples/songs/intersong.is new file mode 100644 index 00000000..dbeeb547 --- /dev/null +++ b/patacrep/data/examples/songs/intersong.is @@ -0,0 +1,6 @@ +\selectlanguage{french} +\sortassong{}[by={QQ}] +\begin{intersong} + +Lorem ipsum +\end{intersong} diff --git a/patacrep/encoding.py b/patacrep/encoding.py index 8ba7de61..ca917295 100644 --- a/patacrep/encoding.py +++ b/patacrep/encoding.py @@ -5,7 +5,6 @@ import codecs import chardet import logging -from unidecode import unidecode as unidecode_orig LOGGER = logging.getLogger(__name__) @@ -17,32 +16,6 @@ def open_read(filename, mode='r'): return codecs.open( filename, mode=mode, - encoding=chardet.detect(open(filename, "r").read())['encoding'], + encoding=chardet.detect(open(filename, 'rb').read())['encoding'], errors='replace', ) - -def basestring2unicode(arg): - """Return the unicode version of the argument, guessing original encoding. - """ - if isinstance(arg, unicode): - return arg - elif isinstance(arg, basestring): - return arg.decode( - encoding=chardet.detect(arg)['encoding'], - errors='replace', - ) - else: - LOGGER.warning("Cannot decode string {}. Ignored.".format(str(arg))) - return "" - -def list2unicode(arg): - """Return the unicode version of the argument, guessing original encoding. - - Argument is a list of strings. If an item is of another type, it is - silently ignored (an empty string is returned). - """ - return [basestring2unicode(item) for item in arg] - -def unidecode(arg): - """Return a unicode version of a unidecoded string.""" - return unicode(unidecode_orig(arg)) diff --git a/patacrep/errors.py b/patacrep/errors.py index ff3d210d..a065c322 100644 --- a/patacrep/errors.py +++ b/patacrep/errors.py @@ -17,10 +17,7 @@ class SBFileError(SongbookError): self.message = message def __str__(self): - if self.message is None: - return str(self.original) - else: - return self.message + return self.message class TemplateError(SongbookError): """Error during template generation""" diff --git a/patacrep/files.py b/patacrep/files.py index 48928a00..5c84e0b0 100644 --- a/patacrep/files.py +++ b/patacrep/files.py @@ -2,23 +2,35 @@ """File system utilities.""" from contextlib import contextmanager -import fnmatch +import glob +import importlib +import logging import os import posixpath +import re +import sys -def recursive_find(root_directory, pattern): - """Recursively find files matching a pattern, from a root_directory. +LOGGER = logging.getLogger(__name__) - Return a list of files matching the pattern. +def recursive_find(root_directory, extensions): + """Recursively find files with some extension, from a root_directory. + + Return a list of files matching those conditions. + + Arguments: + - `extensions`: list of accepted extensions. + - `root_directory`: root directory of the search. """ if not os.path.isdir(root_directory): return [] matches = [] + pattern = re.compile(r'.*\.({})$'.format('|'.join(extensions))) with chdir(root_directory): - for root, _, filenames in os.walk(os.curdir): - for filename in fnmatch.filter(filenames, pattern): - matches.append(os.path.join(root, filename)) + for root, __ignored, filenames in os.walk(os.curdir): + for filename in filenames: + if pattern.match(filename): + matches.append(os.path.join(root, filename)) return matches def relpath(path, start=None): @@ -59,3 +71,63 @@ def chdir(path): os.chdir(olddir) else: yield + +def load_plugins(datadirs, subdir, variable, error): + """Load all content plugins, and return a dictionary of those plugins. + + A plugin is a .py file, submodule of `subdir`, located in one of the + directories of `datadirs`. It contains a dictionary `variable`. The return + value is the union of the dictionaries of the loaded plugins. + + Arguments: + - datadirs: list of directories (as strings) in which files has to be + searched. + - subdir: modules (as a list of strings) files has to be submodules of + (e.g. if `subdir` is `['first', 'second']`, search files are of the form + `first/second/*.py`. + - variable: Name of the variable holding the dictionary. + - error: Error message raised if a key appears several times. + """ + plugins = {} + directory_list = ( + [ + os.path.join(datadir, "python", *subdir) #pylint: disable=star-args + for datadir in datadirs + ] + + [os.path.dirname(__file__)] + ) + for directory in directory_list: + if not os.path.exists(directory): + LOGGER.debug( + "Ignoring non-existent directory '%s'.", + directory + ) + continue + sys.path.append(directory) + for name in glob.glob(os.path.join(directory, *(subdir + ['*.py']))): + if name.endswith(".py") and os.path.basename(name) != "__init__.py": + if directory == os.path.dirname(__file__): + plugin = importlib.import_module( + 'patacrep.{}.{}'.format( + ".".join(subdir), + os.path.basename(name[:-len('.py')]) + ) + ) + else: + plugin = importlib.import_module( + os.path.basename(name[:-len('.py')]) + ) + for (key, value) in getattr(plugin, variable, {}).items(): + if key in plugins: + LOGGER.warning( + error.format( + filename=relpath(name), + key=key, + ) + ) + continue + plugins[key] = value + del sys.path[-1] + return plugins + + diff --git a/patacrep/index.py b/patacrep/index.py index 470bb67e..c715918a 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -8,17 +8,18 @@ from a file generated by the latex compilation of the songbook (.sxd). """ import locale +import unidecode import re from patacrep import authors from patacrep import encoding -from patacrep.plastex import simpleparse +from patacrep.latex import tex2plain -EOL = u"\n" +EOL = "\n" # Pattern set to ignore latex command in title prefix -KEYWORD_PATTERN = re.compile(ur"^%(\w+)\s?(.*)$", re.LOCALE) -FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) +KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$", re.LOCALE) +FIRST_LETTER_PATTERN = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) def process_sxd(filename): @@ -77,13 +78,13 @@ class Index(object): except AttributeError: # classify as number all the non letter characters letter = "0" - if re.match(ur'\d', letter): + if re.match(r'\d', letter): letter = '0-9' return letter.upper() def add_keyword(self, key, word): """Add 'word' to self.keywords[key].""" - if not key in self.keywords.keys(): + if not key in self.keywords: self.keywords[key] = [] self.keywords[key].append(word) @@ -93,7 +94,7 @@ class Index(object): if 'prefix' in self.keywords: for prefix in self.keywords['prefix']: self.prefix_patterns.append(re.compile( - ur"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix), + r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix), re.LOCALE )) @@ -107,12 +108,12 @@ class Index(object): similar method with processing. """ first = self.get_first_letter(key[0]) - if not first in self.data.keys(): + if not first in self.data: self.data[first] = dict() - if not key in self.data[first].keys(): + if not key in self.data[first]: self.data[first][key] = { 'sortingkey': [ - encoding.unidecode(simpleparse(item)).lower() + unidecode.unidecode(tex2plain(item)).lower() for item in key ], 'entries': [], @@ -150,26 +151,26 @@ class Index(object): @staticmethod def ref_to_str(ref): """Return the LaTeX code corresponding to the reference.""" - return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) + return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) def key_to_str(self, key): """Convert the key (title or author) to the LaTeX command rendering it. """ if self.indextype == "AUTHOR": - return ur"\indexauthor{{{first}}}{{{last}}}".format( + return r"\indexauthor{{{first}}}{{{last}}}".format( first=key[1], last=key[0], ) if self.indextype == "TITLE": - return ur"\indextitle{{{0[1]}}}{{{0[0]}}}".format(key) + return r"\indextitle{{{0[1]}}}{{{0[0]}}}".format(key) def entry_to_str(self, key, entry): """Return the LaTeX code corresponding to the entry.""" - return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format( + return (r'\idxentry{{{0}}}{{{1}}}' + EOL).format( self.key_to_str(key), - ur'\\'.join([self.ref_to_str(ref) for ref in entry]), + r'\\'.join([self.ref_to_str(ref) for ref in entry]), ) def idxblock_to_str(self, letter, entries): @@ -185,10 +186,10 @@ class Index(object): for item in entries[key]['sortingkey'] ] - string = ur'\begin{idxblock}{' + letter + '}' + EOL + string = r'\begin{idxblock}{' + letter + '}' + EOL for key in sorted(entries, key=sortkey): string += self.entry_to_str(key, entries[key]['entries']) - string += ur'\end{idxblock}' + EOL + string += r'\end{idxblock}' + EOL return string def entries_to_str(self): diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py new file mode 100644 index 00000000..fce5470f --- /dev/null +++ b/patacrep/latex/__init__.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +"""Very simple LaTeX parser + +This module uses an LALR parser to try to parse LaTeX code. LaTeX language +*cannot* be parsed by an LALR parser, so this is a very simple attemps, which +will work on simple cases, but not on complex ones. +""" + +from patacrep.latex.syntax import tex2plain +from patacrep.latex.syntax import parsesong as syntax_parsesong +from patacrep import encoding + +def parsesong(path): + """Return a dictonary of data read from the latex file `path`. + + """ + data = syntax_parsesong(encoding.open_read(path).read(), path) + data['@path'] = path + return data diff --git a/patacrep/latex/ast.py b/patacrep/latex/ast.py new file mode 100644 index 00000000..f8112525 --- /dev/null +++ b/patacrep/latex/ast.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +"""Abstract Syntax Tree for LaTeX code.""" + +# pylint: disable=too-few-public-methods + +class AST: + """Base class for the tree.""" + # pylint: disable=no-init + + metadata = None + + @classmethod + def init_metadata(cls): + """Clear metadata + + As this attribute is a class attribute, it as to be reset at each new + parsing. + """ + cls.metadata = { + '@languages': set(), + } + +class Expression(AST): + """LaTeX expression""" + + def __init__(self, value): + super().__init__() + self.content = [value] + + def prepend(self, value): + """Add a value at the beginning of the content list.""" + if value is not None: + self.content.insert(0, value) + return self + + def __str__(self): + return "".join([str(item) for item in self.content]) + +class Command(AST): + """LaTeX command""" + + def __init__(self, name, optional, mandatory): + self.name = name + self.mandatory = mandatory + self.optional = optional + + if name == r'\selectlanguage': + self.metadata['@languages'] |= set(self.mandatory) + + def __str__(self): + if self.name in [r'\emph']: + return str(self.mandatory[0]) + return "{}{}{}".format( + self.name, + "".join(["[{}]".format(item) for item in self.optional]), + "".join(["{{{}}}".format(item) for item in self.mandatory]), + ) + + +class BeginSong(AST): + """Beginsong command""" + + def __init__(self, titles, arguments): + self.titles = titles + self.arguments = arguments diff --git a/patacrep/latex/detex.py b/patacrep/latex/detex.py new file mode 100644 index 00000000..d99c3467 --- /dev/null +++ b/patacrep/latex/detex.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +"""Render `very simple` TeX commands in a simple TeX code.""" + +import logging + +LOGGER = logging.getLogger() + +MATCH = [ + # Diacritics: a + (r"\'a", "á"), + (r"\'A", "Á"), + (r"\`a", "à"), + (r"\`A", "À"), + (r"\^a", "â"), + (r"\^A", "Â"), + (r"\"a", "ä"), + (r"\"A", "Ä"), + + # Diacritics: e + (r"\'e", "é"), + (r"\'E", "É"), + (r"\`e", "è"), + (r"\`E", "È"), + (r"\^e", "ê"), + (r"\^E", "Ê"), + (r"\"e", "ë"), + (r"\"E", "Ë"), + + # Diacritics: i + (r"\'i", "í"), + (r"\'I", "Í"), + (r"\`i", "ì"), + (r"\`I", "Ì"), + (r"\^i", "î"), + (r"\^I", "Î"), + (r"\"i", "ï"), + (r"\"I", "Ï"), + (r"\'\i", "í"), + (r"\'\I", "Í"), + (r"\`\i", "ì"), + (r"\`\I", "Ì"), + (r"\^\i", "î"), + (r"\^\I", "Î"), + (r"\"\i", "ï"), + (r"\"\I", "Ï"), + + # Diacritics: o + (r"\'o", "ó"), + (r"\'O", "Ó"), + (r"\`o", "ò"), + (r"\`O", "Ò"), + (r"\^o", "ô"), + (r"\^O", "Ô"), + (r"\"o", "ö"), + (r"\"O", "Ö"), + + # Diacritics: u + (r"\'u", "ú"), + (r"\'U", "Ú"), + (r"\`u", "ù"), + (r"\`U", "Ù"), + (r"\^u", "û"), + (r"\^U", "Û"), + (r"\"u", "ü"), + (r"\"U", "Ü"), + + # Cedille + (r"\c c", "ç"), + (r"\c C", "Ç"), + + # œ, æ + (r"\oe", "œ"), + (r"\OE", "Œ"), + (r"\ae", "æ"), + (r"\AE", "Æ"), + + # Spaces + (r"\ ", " "), + (r"\,", " "), + (r"\~", " "), + + # IeC + (r"\IeC ", ""), + + # Miscallenous + (r"\dots", "…"), + (r"\%", "%"), + (r"\&", "&"), + (r"\_", "_"), + + ] + + +def detex(arg): + """Render very simple TeX commands from argument. + + Argument can be: + - a string: it is processed; + - a list, dict or set: its values are processed. + """ + if isinstance(arg, dict): + return dict([ + (key, detex(value)) + for (key, value) + in arg.items() + ]) + elif isinstance(arg, list): + return [ + detex(item) + for item + in arg + ] + elif isinstance(arg, set): + return set(detex(list(arg))) + elif isinstance(arg, str): + string = arg + for (latex, plain) in MATCH: + string = string.replace(latex, plain) + if '\\' in string: + LOGGER.warning("Remaining command in string '{}'.".format(string)) + return string.strip() + else: + return detex(str(arg)) diff --git a/patacrep/latex/lexer.py b/patacrep/latex/lexer.py new file mode 100644 index 00000000..c90303da --- /dev/null +++ b/patacrep/latex/lexer.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +"""Very simple LaTeX lexer.""" + +import logging +import ply.lex as lex + +LOGGER = logging.getLogger() + +#pylint: disable=invalid-name +tokens = ( + 'LBRACKET', + 'RBRACKET', + 'LBRACE', + 'RBRACE', + 'COMMAND', + 'NEWLINE', + 'COMMA', + 'EQUAL', + 'CHARACTER', + 'SPACE', + 'BEGINSONG', + 'SONG_LTITLE', + 'SONG_RTITLE', + 'SONG_LOPTIONS', + 'SONG_ROPTIONS', +) + +class SimpleLexer: + """Very simple LaTeX lexer.""" + + tokens = tokens + + t_LBRACKET = r'\[' + t_RBRACKET = r'\]' + t_LBRACE = r'{' + t_RBRACE = r'}' + t_COMMAND = r'\\([@a-zA-Z]+|[^\\])' + t_NEWLINE = r'\\\\' + SPECIAL_CHARACTERS = ( + t_LBRACKET + + t_RBRACKET + + t_RBRACE + + t_LBRACE + + r"\\" + + r" " + + r"\n" + + r"\r" + + r"%" + + r"=" + + r"," + ) + t_CHARACTER = r'[^{}]'.format(SPECIAL_CHARACTERS) + t_EQUAL = r'=' + t_COMMA = r',' + + t_SPACE = r'[ \t\n\r]+' + + def __init__(self): + self.__class__.lexer = lex.lex(module=self) + + # Define a rule so we can track line numbers + @staticmethod + def t_newline(token): + r'\n+' + token.lexer.lineno += len(token.value) + + @staticmethod + def t_comment(token): + r'%.*' + pass + + # Error handling rule + @staticmethod + def t_error(token): + """Manage errors""" + LOGGER.error("Illegal character '{}'".format(token.value[0])) + token.lexer.skip(1) + +class SongLexer(SimpleLexer): + r"""Very simple song lexer. + + In the context of this class, a "song" is some LaTeX code containing the + ``\beginsong`` (or ``\sortassong``) command. + """ + + states = ( + ('beginsong', 'inclusive'), + ) + + # State beginsong + @staticmethod + def t_INITIAL_BEGINSONG(token): + r'(\\beginsong|\\sortassong)' + token.lexer.push_state('beginsong') + token.lexer.open_brackets = 0 + token.lexer.open_braces = 0 + return token + + @staticmethod + def t_beginsong_LBRACKET(token): + r'\[' + if token.lexer.open_brackets == 0: + token.type = 'SONG_LOPTIONS' + + # Count opening and closing braces to know when to leave the + # `beginsong` state. + token.lexer.open_braces += 1 + token.lexer.open_brackets += 1 + return token + + @staticmethod + def t_beginsong_RBRACKET(token): + r'\]' + token.lexer.open_brackets -= 1 + if token.lexer.open_brackets == 0: + token.type = 'SONG_ROPTIONS' + token.lexer.open_braces -= 1 + token.lexer.pop_state() + for __ignored in token.lexer: + # In this parser, we only want to read metadata. So, after the + # first ``\beginsong`` command, we can stop parsing. + pass + return token + + @staticmethod + def t_beginsong_LBRACE(token): + r'{' + if token.lexer.open_braces == 0: + token.type = 'SONG_LTITLE' + token.lexer.open_braces += 1 + return token + + @staticmethod + def t_beginsong_RBRACE1(token): + r'}(?![ \t\r\n]*\[)' + token.lexer.open_braces -= 1 + token.type = 'RBRACE' + if token.lexer.open_braces == 0: + token.lexer.pop_state() + token.type = 'SONG_RTITLE' + return token + + @staticmethod + def t_beginsong_RBRACE2(token): + r'}(?=[ \t\r\n]*\[)' + token.lexer.open_braces -= 1 + token.type = 'RBRACE' + if token.lexer.open_braces == 0: + token.type = 'SONG_RTITLE' + return token + diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py new file mode 100644 index 00000000..896d8442 --- /dev/null +++ b/patacrep/latex/syntax.py @@ -0,0 +1,256 @@ +"""Very simple LaTeX parser""" + +import logging +import ply.yacc as yacc + +from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer +from patacrep.latex import ast +from patacrep.errors import SongbookError +from patacrep.latex.detex import detex + +LOGGER = logging.getLogger() + +class ParsingError(SongbookError): + """Parsing error.""" + + def __init__(self, message): + super().__init__(self) + self.message = message + + def __str__(self): + return self.message + +# pylint: disable=line-too-long +class Parser: + """LaTeX parser.""" + + def __init__(self, filename=None): + self.tokens = tokens + self.ast = ast.AST + self.ast.init_metadata() + self.filename = filename + + @staticmethod + def __find_column(token): + """Return the column of ``token``.""" + last_cr = token.lexer.lexdata.rfind('\n', 0, token.lexpos) + if last_cr < 0: + last_cr = 0 + column = (token.lexpos - last_cr) + 1 + return column + + def p_error(self, token): + """Manage parsing errors.""" + LOGGER.error( + "Error in file {}, line {} at position {}.".format( + str(self.filename), + token.lineno, + self.__find_column(token), + ) + ) + + @staticmethod + def p_expression(symbols): + """expression : brackets expression + | braces expression + | command expression + | NEWLINE expression + | beginsong expression + | word expression + | SPACE expression + | empty + """ + if len(symbols) == 3: + if symbols[2] is None: + symbols[0] = ast.Expression(symbols[1]) + else: + symbols[0] = symbols[2].prepend(symbols[1]) + else: + symbols[0] = None + + @staticmethod + def p_empty(__symbols): + """empty :""" + return None + + @staticmethod + def p_brackets(symbols): + """brackets : LBRACKET expression RBRACKET""" + symbols[0] = symbols[2] + + @staticmethod + def p_braces(symbols): + """braces : LBRACE expression RBRACE""" + symbols[0] = symbols[2] + + @staticmethod + def p_command(symbols): + """command : COMMAND brackets_list braces_list""" + symbols[0] = ast.Command(symbols[1], symbols[2], symbols[3]) + + @staticmethod + def p_brackets_list(symbols): + """brackets_list : brackets brackets_list + | empty + """ + if len(symbols) == 3: + symbols[0] = symbols[2] + symbols[0].insert(0, symbols[1]) + else: + symbols[0] = [] + + @staticmethod + def p_braces_list(symbols): + """braces_list : braces braces_list + | empty + """ + if len(symbols) == 3: + symbols[0] = symbols[2] + symbols[0].insert(0, symbols[1]) + else: + symbols[0] = [] + + @staticmethod + def p_word(symbols): + """word : CHARACTER word_next + | COMMA word_next + | EQUAL word_next + """ + symbols[0] = symbols[1] + symbols[2] + + @staticmethod + def p_word_next(symbols): + """word_next : CHARACTER word_next + | empty + """ + if len(symbols) == 2: + symbols[0] = "" + else: + symbols[0] = symbols[1] + symbols[2] + + def p_beginsong(self, symbols): + """beginsong : BEGINSONG separator songbraces separator songbrackets""" + self.ast.metadata["@titles"] = symbols[3] + self.ast.metadata.update(symbols[5]) + + @staticmethod + def p_songbrackets(symbols): + """songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS + | empty + """ + if len(symbols) == 6: + symbols[0] = symbols[3] + else: + symbols[0] = {} + + @staticmethod + def p_songbraces(symbols): + """songbraces : SONG_LTITLE separator titles separator SONG_RTITLE + | empty + """ + if len(symbols) == 6: + symbols[0] = symbols[3] + else: + symbols[0] = [] + + @staticmethod + def p_dictionary(symbols): + """dictionary : identifier EQUAL braces dictionary_next + | identifier EQUAL error dictionary_next + """ + if isinstance(symbols[3], ast.Expression): + symbols[0] = {} + symbols[0][symbols[1]] = symbols[3] + symbols[0].update(symbols[4]) + else: + raise ParsingError("Do enclose arguments between braces.") + + @staticmethod + def p_identifier(symbols): + """identifier : CHARACTER identifier + | empty + """ + if len(symbols) == 2: + symbols[0] = "" + else: + symbols[0] = symbols[1] + symbols[2] + + @staticmethod + def p_separator(symbols): + """separator : SPACE + | empty + """ + symbols[0] = None + + @staticmethod + def p_dictonary_next(symbols): + """dictionary_next : separator COMMA separator dictionary + | empty + """ + if len(symbols) == 5: + symbols[0] = symbols[4] + else: + symbols[0] = {} + + @staticmethod + def p_titles(symbols): + """titles : title titles_next""" + symbols[0] = [symbols[1]] + symbols[2] + + @staticmethod + def p_titles_next(symbols): + """titles_next : NEWLINE title titles_next + | empty + """ + if len(symbols) == 2: + symbols[0] = [] + else: + symbols[0] = [symbols[2]] + symbols[3] + + @staticmethod + def p_title(symbols): + """title : brackets title + | braces title + | command title + | word title + | SPACE title + | empty + """ + if len(symbols) == 2: + symbols[0] = None + else: + if symbols[2] is None: + symbols[0] = ast.Expression(symbols[1]) + else: + symbols[0] = symbols[2].prepend(symbols[1]) + +def silent_yacc(*args, **kwargs): + """Call yacc, suppressing (as far as possible) output and generated files. + """ + return yacc.yacc( + write_tables=0, + debug=0, + *args, + **kwargs + ) + +def tex2plain(string): + """Parse string and return its plain text version.""" + return detex( + silent_yacc( + module=Parser(), + ).parse( + string, + lexer=SimpleLexer().lexer, + ) + ) + +def parsesong(string, filename=None): + """Parse song and return its metadata.""" + return detex( + silent_yacc(module=Parser(filename)).parse( + string, + lexer=SongLexer().lexer, + ).metadata + ) + diff --git a/patacrep/plastex.py b/patacrep/plastex.py deleted file mode 100644 index cdaa3a64..00000000 --- a/patacrep/plastex.py +++ /dev/null @@ -1,117 +0,0 @@ -# -*- coding: utf-8 -*- - -"""PlasTeX module to process song files.""" - -from plasTeX.TeX import TeX -from plasTeX.Base.LaTeX import Sentences - -import locale -import os -import sys - -from patacrep import encoding - -def process_unbr_spaces(node): - #pylint: disable=line-too-long - r"""Replace '~' and '\ ' in node by nodes that - will be rendered as unbreakable space. - - Return node object for convenience. - - This function is a workaround to a bug that has been solved since: - - https://github.com/tiarno/plastex/commit/76bb78d5fbaac48e68025a3545286cc63cb4e7ad - - https://github.com/tiarno/plastex/commit/682a0d223b99d6b949bacf1c974d24dc9bb1d18e - - It can be deleted once this bug has been merged in production version of - PlasTeX. - """ - if (type(node) == Sentences.InterWordSpace or - (type(node) == Sentences.NoLineBreak and node.source == '~ ')): - node.unicode = unichr(160) - for child in node.childNodes: - process_unbr_spaces(child) - - return node - - -def simpleparse(text): - """Parse a simple LaTeX string. - """ - tex = TeX() - tex.disableLogging() - tex.input(text) - doc = tex.parse() - return process_unbr_spaces(doc.textContent) - - -class SongParser(object): - """Analyseur syntaxique de fichiers .sg""" - - @staticmethod - def create_tex(): - """Create a TeX object, ready to parse a tex file.""" - tex = TeX() - tex.disableLogging() - tex.ownerDocument.context.loadBaseMacros() - sys.path.append(os.path.dirname(__file__)) - tex.ownerDocument.context.loadPackage(tex, "plastex_patchedbabel") - tex.ownerDocument.context.loadPackage(tex, "plastex_chord") - tex.ownerDocument.context.loadPackage(tex, "plastex_songs") - tex.ownerDocument.context.loadPackage(tex, "plastex_misc_commands") - sys.path.pop() - return tex - - @classmethod - def parse(cls, filename): - """Parse a TeX file, and return its plasTeX representation.""" - tex = cls.create_tex() - tex.input(encoding.open_read(filename, 'r')) - return tex.parse() - - -def parsetex(filename): - r"""Analyse syntaxique d'un fichier .sg - - Renvoie un dictionnaire contenant les métadonnées lues dans le fichier. Les - clefs sont : - - languages: l'ensemble des langages utilisés (recherche des - \selectlanguages{}) ; - - titles: la liste des titres ; - - args: le dictionnaire des paramètres passés à \beginsong. - """ - # /* BEGIN plasTeX patch - # The following lines, and another line a few lines later, are used to - # circumvent a plasTeX bug. It has been reported and corrected : - # https://github.com/tiarno/plastex/commit/8f4e5a385f3cb6a04d5863f731ce24a7e856f2a4 - # To see if you can delete those lines, set your LC_TIME locale to French, - # during a month containing diacritics (e.g. Février), and run songbook. If - # no plasTeX bug appears, it is safe to remove those lines. - oldlocale = locale.getlocale(locale.LC_TIME) - locale.setlocale(locale.LC_TIME, 'C') - # plasTeX patch END */ - - # Analyse syntaxique - doc = SongParser.parse(filename) - - # /* BEGIN plasTeX patch - if oldlocale[0] and oldlocale[1]: - try: - locale.setlocale(locale.LC_TIME, "%s.%s" % oldlocale) - except locale.Error: - pass # Workaround a bug on windows - # plasTeX patch END */ - - # Extraction des données - data = { - "languages": set(), - "_doc": doc, - "_filename": filename, - } - for node in doc.allChildNodes: - if node.nodeName == "selectlanguage": - data["languages"].add(node.attributes['lang']) - if node.nodeName in ["beginsong", "sortassong"]: - data["titles"] = node.attributes["titles"] - data["args"] = node.attributes["args"] - - return data diff --git a/patacrep/plastex_chord.py b/patacrep/plastex_chord.py deleted file mode 100644 index dba9f36c..00000000 --- a/patacrep/plastex_chord.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""PlasTeX module to deal with chords commands of the songs LaTeX package - -Chords are set using commands like \[C]. This package parses those commands. -""" - -import logging - -import plasTeX -from plasTeX import Command, Environment, Macro -from plasTeX.Base.LaTeX.Math import BeginDisplayMath - -LOGGER = logging.getLogger(__name__) - -# Count the number of levels of 'verse' environment: IN_VERSE==1 means that we -# are in a 'verse' environment; IN_VERSE==2 means that we are in two included -# 'verse' environment, and so on. -IN_VERSE = 0 - -def wrap_displaymath(cls): - """Decorator to store the depth of 'verse' environment - - In the invoke() method classes, global variable IN_VERSE indicates the - number of 'verse' (or 'chorus' or 'verse*') environment we are in. - """ - - # pylint: disable=no-init,too-few-public-methods - class WrappedClass(cls): - """Wrapper to LaTeX environment updating IN_VERSE""" - blockType = True - # pylint: disable=super-on-old-class,global-statement,no-member - def invoke(self, tex): - """Wrapper to invoke() to update global variable IN_VERSE.""" - global IN_VERSE - if self.macroMode == Macro.MODE_BEGIN: - self.ownerDocument.context.push() - self.ownerDocument.context.catcode("\n", 13) - IN_VERSE += 1 - - # Removing spaces and line breaks at the beginning of verse - token = None - for token in tex: - if not match_space(token): - break - if token is not None: - tex.pushToken(token) - - else: - self.ownerDocument.context.pop() - IN_VERSE -= 1 - return super(WrappedClass, self).invoke(tex) - return WrappedClass - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class Verse(Environment): - """LaTeX 'verse' environment""" - macroName = 'verse' - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class VerseStar(Environment): - """LaTeX 'verse*' environment""" - macroName = 'verse*' - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class Chorus(Environment): - """LaTeX 'chorus' environment""" - macroName = 'chorus' - -def match_space(token): - """Return True if token is a space or newline character.""" - return ( - isinstance(token, plasTeX.Tokenizer.Space) - or token.nodeName == 'active::\n' - ) - -def match_closing_square_bracket(token): - """Return True if token is character ']'.""" - return token.nodeType == token.TEXT_NODE and token.nodeValue == ']' - -def match_egroup(token): - """Return True if token is of type `egroup` (end of group).""" - return isinstance(token, plasTeX.Base.Text.egroup) #pylint: disable=no-member - -def match_space_or_chord(token): - """Return True if token is a space or a chord.""" - return match_space(token) or isinstance(token, Chord) - -def parse_until(tex, end=lambda x: False): - """Parse `tex` until condition `end`, or `egroup` is met. - - Arguments: - - tex: object to parse - - end: function taking a token in argument, and returning a boolean. - Parsing stops when this function returns True, or an `egroup` is met. - - Return: a tuple of two items (the list of parsed tokens, last token). This - is done so that caller can decide whether they want to discard it or not. - Last token can be None if everything has been parsed without the end - condition being met. - """ - parsed = [] - last = None - for token in tex: - if end(token) or match_egroup(token): - last = token - break - elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member - # pylint: disable=expression-not-assigned - [token.appendChild(item) for item in parse_until(tex, match_egroup)[0]] - parsed.append(token) - return (parsed, last) - - -class Chord(Command): - """Beginning of a chord notation""" - macroName = 'chord' - macroMode = Command.MODE_NONE - -class BeginChordOrDisplayMath(BeginDisplayMath): - r"""Wrapper to BeginDisplayMath - - In a 'verse' (or 'verse*' or 'chorus') environment, the '\[' macro - displays a chord. Otherwise, it corresponds to the usual LaTeX math mode. - This class calls the right method, depending on the inclusion of this - macro in a verse environment. - """ - macroName = '[' - - def invoke(self, tex): - """Process this macro""" - if IN_VERSE: - chord = Chord() - - self.ownerDocument.context.push() #pylint: disable=no-member - self.ownerDocument.context.catcode("&", 13) #pylint: disable=no-member - chord.setAttribute( - 'name', - parse_until(tex, match_closing_square_bracket)[0], - ) - self.ownerDocument.context.pop() #pylint: disable=no-member - - token = next(iter(tex), None) - if token is None: - return [chord] - elif match_space(token): - return [chord, token] - elif ( - isinstance(token, Verse) - or isinstance(token, VerseStar) - or isinstance(token, Chorus) - ): - LOGGER.warning(( - "{} L{}: '\\end{{verse}}' (or 'verse*' or 'chorus') not " - "allowed directly after '\\['." - ).format(tex.filename, tex.lineNumber) - ) - return [chord] - elif isinstance(token, Chord): - token.attributes['name'] = ( - chord.attributes['name'] - + token.attributes['name'] - ) - chord = token - return [chord] - elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member - # pylint: disable=expression-not-assigned - [chord.appendChild(item) for item in parse_until(tex)[0]] - return [chord] - else: - chord.appendChild(token) - (parsed, last) = parse_until(tex, match_space_or_chord) - # pylint: disable=expression-not-assigned - [chord.appendChild(item) for item in parsed] - return [chord, last] - else: - return super(BeginChordOrDisplayMath, self).invoke(tex) - diff --git a/patacrep/plastex_misc_commands.py b/patacrep/plastex_misc_commands.py deleted file mode 100644 index 4b4b2602..00000000 --- a/patacrep/plastex_misc_commands.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Quick management of random LaTeX commands.""" - -from plasTeX import Command - -# pylint: disable=invalid-name,too-many-public-methods -class songcolumns(Command): - r"""Manage `\songcolumns` command""" - args = '{num:int}' - -# pylint: disable=invalid-name,too-many-public-methods -class gtab(Command): - r"""Manage `\gta` command""" - args = '{chord:str}{diagram:str}' diff --git a/patacrep/plastex_patchedbabel.py b/patacrep/plastex_patchedbabel.py deleted file mode 100644 index e20d3086..00000000 --- a/patacrep/plastex_patchedbabel.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""Patch pour le paquet Babel de PlasTeX - -Un bug dans PlasTeX intervient lorsqu'on essaye d'analyser une commande LaTeX -\selectlanguage{}, que nous voulons utiliser ici. Un patch a été proposé aux -développeurs de plasTeX, et accepté. Mais il faut que cette correction arrive -en production. En attendant, nous utilisons cette version modifiée. - -Dés que la correction sera entrée en production, il faudra supprimer ce -fichier, et remplater l'occurence à "patchedbabel" par "babel" dans le fichier -"plastex.py". -La correction à suveiller est la révision -41a48c0c229dd46b69fb0e3720595000a71b17d8 du fichier babel.py : -https://github.com/tiarno/plastex/commit/41a48c0c229dd46b69fb0e3720595000a71b17d8 - -# Comment vérifier si on peut supprimer ce fichier ? - -1) Remplacer l'occurence à patchedbabel par babel dans le fichier plastex.py. - -2) Générer un fichier .tex à partir d'un fichier .sb, ce dernier faisant -intervenir des chansons dans lesquelles \selectlanguage est utilisé (par -exemple, "make -B matteo.tex" ou "make -B naheulbeuk.tex" pour des fichiers pas -trop gros. - -3) Si l'erreur suivante apparaît, c'est qu'il faut encore attendre. - -> Traceback (most recent call last): -> [...] -> File "/usr/lib/pymodules/python2.7/plasTeX/Packages/babel.py", line 18, in -> invoke context.loadLanguage(self.attributes['lang'], self.ownerDocument) -> NameError: global name 'context' is not defined - -3 bis) Si elle n'apparait pas : youpi ! Supprimez ce fichier ! - -# Contact et commentaires - -Mercredi 27 mars 2013 -Louis - -""" - -from plasTeX import Command - -# pylint: disable=invalid-name,too-many-public-methods -class selectlanguage(Command): - """Patch of vanilla selectlanguage class. - - See module docstring for more information.""" - args = 'lang:str' - - def invoke(self, tex): - res = Command.invoke(self, tex) - self.ownerDocument.context.loadLanguage( # pylint: disable=no-member - self.attributes['lang'], - self.ownerDocument - ) - return res diff --git a/patacrep/plastex_songs.py b/patacrep/plastex_songs.py deleted file mode 100644 index 5bf5041a..00000000 --- a/patacrep/plastex_songs.py +++ /dev/null @@ -1,70 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Module to process song LaTeX environment. -""" - -import plasTeX - -from patacrep import encoding -from patacrep.plastex import process_unbr_spaces - - -def split_linebreak(texlist): - """Return a list of alternative title. - - A title can be defined with alternative names : - - A real name\\ - Alternative name\\ - Another alternative name - - This function takes the object representation of a list of titles, and - return a list of titles. - """ - return_list = [] - current = [] - for token in texlist: - if token.nodeName == '\\': - return_list.append(current) - current = [] - else: - current.append(encoding.basestring2unicode( - process_unbr_spaces(token).textContent - )) - if current: - return_list.append(current) - return return_list - - -class beginsong(plasTeX.Command): # pylint: disable=invalid-name,too-many-public-methods - """Class parsing the LaTeX song environment.""" - - args = '{titles}[args:dict]' - - def invoke(self, tex): - """Parse an occurence of song environment.""" - - plasTeX.Command.invoke(self, tex) - - # Parsing title - titles = [] - for tokens in split_linebreak(self.attributes['titles'].allChildNodes): - titles.append("".join(tokens)) - self.attributes['titles'] = encoding.list2unicode(titles) - - # Parsing keyval arguments - args = {} - for (key, val) in self.attributes['args'].iteritems(): - if isinstance(val, plasTeX.DOM.Element): - args[key] = encoding.basestring2unicode( - process_unbr_spaces(val).textContent - ) - elif isinstance(val, basestring): - args[key] = encoding.basestring2unicode(val) - else: - args[key] = unicode(val) - self.attributes['args'] = args - -class sortassong(beginsong): # pylint: disable=invalid-name,too-many-public-methods - r"""Treat '\sortassong' exactly as if it were a '\beginsong'.""" - pass diff --git a/patacrep/songbook.py b/patacrep/songbook.py new file mode 100755 index 00000000..e3bcf555 --- /dev/null +++ b/patacrep/songbook.py @@ -0,0 +1,155 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +"""Command line tool to compile songbooks using the songbook library.""" + +import argparse +import json +import locale +import logging +import os.path +import textwrap +import sys + +from patacrep.build import SongbookBuilder, DEFAULT_STEPS +from patacrep import __version__ +from patacrep import errors +from patacrep import encoding + +# Logging configuration +logging.basicConfig(level=logging.INFO) +LOGGER = logging.getLogger() + +# pylint: disable=too-few-public-methods +class ParseStepsAction(argparse.Action): + """Argparse action to split a string into a list.""" + def __call__(self, __parser, namespace, values, __option_string=None): + if not getattr(namespace, self.dest): + setattr(namespace, self.dest, []) + setattr( + namespace, + self.dest, + ( + getattr(namespace, self.dest) + + [value.strip() for value in values[0].split(',')] + ), + ) + +class VerboseAction(argparse.Action): + """Set verbosity level with option --verbose.""" + def __call__(self, *_args, **_kwargs): + LOGGER.setLevel(logging.DEBUG) + +def argument_parser(args): + """Parse arguments""" + parser = argparse.ArgumentParser(description="A song book compiler") + + parser.add_argument('--version', help='Show version', action='version', + version='%(prog)s ' + __version__) + + parser.add_argument('book', nargs=1, help=textwrap.dedent("""\ + Book to compile. + """)) + + parser.add_argument('--datadir', '-d', nargs='+', type=str, action='append', + help=textwrap.dedent("""\ + Data location. Expected (not necessarily required) + subdirectories are 'songs', 'img', 'latex', 'templates'. + """)) + + parser.add_argument('--verbose', '-v', nargs=0, action=VerboseAction, + help=textwrap.dedent("""\ + Show details about the compilation process. + """)) + + parser.add_argument('--steps', '-s', nargs=1, type=str, + action=ParseStepsAction, + help=textwrap.dedent("""\ + Steps to run. Default is "{steps}". + Available steps are: + "tex" produce .tex file from templates; + "pdf" compile .tex file; + "sbx" compile index files; + "clean" remove temporary files; + any string beginning with '%%' (in this case, it will be run + in a shell). Several steps (excepted the custom shell + command) can be combinend in one --steps argument, as a + comma separated string. + """.format(steps=','.join(DEFAULT_STEPS))), + default=None, + ) + + options = parser.parse_args(args) + + return options + + +def main(): + """Main function:""" + + # set script locale to match user's + try: + locale.setlocale(locale.LC_ALL, '') + except locale.Error as error: + # Locale is not installed on user's system, or wrongly configured. + LOGGER.error("Locale error: {}\n".format(str(error))) + + options = argument_parser(sys.argv[1:]) + + songbook_path = options.book[0] + + basename = os.path.basename(songbook_path)[:-3] + + songbook_file = None + try: + songbook_file = encoding.open_read(songbook_path) + songbook = json.load(songbook_file) + except Exception as error: # pylint: disable=broad-except + LOGGER.error(error) + LOGGER.error("Error while loading file '{}'.".format(songbook_path)) + sys.exit(1) + finally: + if songbook_file: + songbook_file.close() + + # Gathering datadirs + datadirs = [] + if options.datadir: + # Command line options + datadirs += [item[0] for item in options.datadir] + if 'datadir' in songbook: + # .sg file + if isinstance(songbook['datadir'], str): + songbook['datadir'] = [songbook['datadir']] + datadirs += [ + os.path.join( + os.path.dirname(os.path.abspath(songbook_path)), + path + ) + for path in songbook['datadir'] + ] + # Default value + datadirs.append(os.path.dirname(os.path.abspath(songbook_path))) + + songbook['datadir'] = datadirs + + try: + sb_builder = SongbookBuilder(songbook, basename) + sb_builder.unsafe = True + + sb_builder.build_steps(options.steps) + except errors.SongbookError as error: + LOGGER.error(error) + if LOGGER.level >= logging.INFO: + LOGGER.error( + "Running again with option '-v' may give more information." + ) + sys.exit(1) + except KeyboardInterrupt: + LOGGER.warning("Aborted by user.") + sys.exit(1) + + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/patacrep/songs.py b/patacrep/songs/__init__.py similarity index 58% rename from patacrep/songs.py rename to patacrep/songs/__init__.py index 961d9f37..a4a6b089 100644 --- a/patacrep/songs.py +++ b/patacrep/songs/__init__.py @@ -4,17 +4,14 @@ import errno import hashlib +import jinja2 import logging import os +import pickle import re -try: - import cPickle as pickle -except ImportError: - import pickle - from patacrep.authors import processauthors -from patacrep.plastex import parsetex +from patacrep.content import Content LOGGER = logging.getLogger(__name__) @@ -66,19 +63,33 @@ class DataSubpath(object): self.subpath = os.path.join(self.subpath, path) return self -# pylint: disable=too-few-public-methods, too-many-instance-attributes -class Song(object): - """Song management""" +# pylint: disable=too-many-instance-attributes +class Song(Content): + """Song (or song metadata) + + This class represents a song, bound to a file. + + - It can parse the file given in arguments. + - It can render the song as some LaTeX code. + - Its content is cached, so that if the file has not been changed, the + file is not parsed again. + + This class is inherited by classes implementing song management for + several file formats. Those subclasses must implement: + - `parse()` to parse the file; + - `render()` to render the song as LaTeX code. + """ # Version format of cached song. Increment this number if we update # information stored in cache. - CACHE_VERSION = 0 + CACHE_VERSION = 1 # List of attributes to cache cached_attributes = [ "titles", "unprefixed_titles", - "args", + "cached", + "data", "datadir", "fullpath", "subpath", @@ -113,9 +124,14 @@ class Song(object): self.fullpath )) - # Data extraction from the song with plastex - data = parsetex(self.fullpath) - self.titles = data['titles'] + # Default values + self.data = {} + self.titles = [] + self.languages = [] + self.authors = [] + + # Parsing and data processing + self.parse() self.datadir = datadir self.unprefixed_titles = [ unprefixed_title( @@ -125,16 +141,16 @@ class Song(object): for title in self.titles ] - self.args = data['args'] self.subpath = subpath - self.languages = data['languages'] - if "by" in self.args.keys(): - self.authors = processauthors( - self.args["by"], - **config["_compiled_authwords"] - ) - else: - self.authors = [] + self.authors = processauthors( + self.authors, + **config["_compiled_authwords"] + ) + + # Cache management + + #: Special attribute to allow plugins to store cached data + self.cached = None self._version = self.CACHE_VERSION self._write_cache() @@ -144,14 +160,7 @@ class Song(object): if self.datadir: cached = {} for attribute in self.cached_attributes: - if attribute == "args": - cached[attribute] = dict([ - (key, u"{}".format(value)) # Force conversion to unicode - for (key, value) - in self.args.iteritems() - ]) - else: - cached[attribute] = getattr(self, attribute) + cached[attribute] = getattr(self, attribute) pickle.dump( cached, open(cached_name(self.datadir, self.subpath), 'wb'), @@ -159,15 +168,58 @@ class Song(object): ) def __repr__(self): - return repr((self.titles, self.args, self.fullpath)) + return repr((self.titles, self.data, self.fullpath)) + + def begin_new_block(self, previous, __context): + """Return a boolean stating if a new block is to be created.""" + return not isinstance(previous, Song) + + def begin_block(self, context): + """Return the string to begin a block.""" + indexes = context.resolve("indexes") + if isinstance(indexes, jinja2.runtime.Undefined): + indexes = "" + return r'\begin{songs}{%s}' % indexes + + def end_block(self, __context): + """Return the string to end a block.""" + return r'\end{songs}' + + def render(self, __context): + """Returns the TeX code rendering the song. + + This function is to be defined by subclasses. + """ + return '' + + def parse(self): + """Parse file `self.fullpath`. + + This function is to be defined by subclasses. + + It set the following attributes: + + - titles: the list of (raw) titles. This list will be processed to + remove prefixes. + - languages: the list of languages used in the song, as languages + recognized by the LaTeX babel package. + - authors: the list of (raw) authors. This list will be processed to + 'clean' it (see function :func:`patacrep.authors.processauthors`). + - data: song metadata. Used (among others) to sort the songs. + - cached: additional data that will be cached. Thus, data stored in + this attribute must be picklable. + """ + self.data = {} + self.titles = [] + self.languages = [] + self.authors = [] def unprefixed_title(title, prefixes): """Remove the first prefix of the list in the beginning of title (if any). """ for prefix in prefixes: - match = re.compile(ur"^(%s)\b\s*(.*)$" % prefix, re.LOCALE).match(title) + match = re.compile(r"^(%s)\b\s*(.*)$" % prefix, re.LOCALE).match(title) if match: return match.group(2) return title - diff --git a/patacrep/songs/tex.py b/patacrep/songs/tex.py new file mode 100644 index 00000000..b5710346 --- /dev/null +++ b/patacrep/songs/tex.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +"""Very simple LaTeX parsing.""" + +import os + +from patacrep import files +from patacrep.latex import parsesong +from patacrep.songs import Song + +class TexRenderer(Song): + """Renderer for song and intersong files.""" + + def parse(self): + """Parse song and set metadata.""" + self.data = parsesong(self.fullpath) + self.titles = self.data['@titles'] + self.languages = self.data['@languages'] + self.authors = self.data['by'] + + def render(self, context): + """Return the string that will render the song.""" + return r'\input{{{}}}'.format(files.path2posix( + files.relpath( + self.fullpath, + os.path.dirname(context['filename']) + ))) + +FILE_PLUGINS = { + 'sg': TexRenderer, + 'is': TexRenderer, + } diff --git a/patacrep/templates.py b/patacrep/templates.py index ec8ad99c..d1e9a616 100644 --- a/patacrep/templates.py +++ b/patacrep/templates.py @@ -12,15 +12,15 @@ import json from patacrep import encoding, errors, files _LATEX_SUBS = ( - (re.compile(ur'\\'), ur'\\textbackslash'), - (re.compile(ur'([{}_#%&$])'), ur'\\\1'), - (re.compile(ur'~'), ur'\~{}'), - (re.compile(ur'\^'), ur'\^{}'), - (re.compile(ur'"'), ur"''"), - (re.compile(ur'\.\.\.+'), ur'\\ldots'), + (re.compile(r'\\'), r'\\textbackslash'), + (re.compile(r'([{}_#%&$])'), r'\\\1'), + (re.compile(r'~'), r'\~{}'), + (re.compile(r'\^'), r'\^{}'), + (re.compile(r'"'), r"''"), + (re.compile(r'\.\.\.+'), r'\\ldots'), ) -_VARIABLE_REGEXP = re.compile(ur""" +_VARIABLE_REGEXP = re.compile(r""" \(\*\ *variables\ *\*\) # Match (* variables *) ( # Match and capture the following: (?: # Start of non-capturing group, used to match a single character @@ -48,7 +48,7 @@ class VariablesExtension(Extension): tags = set(['variables']) def parse(self, parser): - parser.stream.next() + next(parser.stream) parser.parse_statements( end_tokens=['name:endvariables'], drop_needle=True, diff --git a/readme.md b/readme.md index c1b7fd8e..c97fef12 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,7 @@ is precised in the header. # Python version -Patacrep is compatible with Python 2.7 (no Python3 since [one of the -library](http://plastex.sourceforge.net/) we are using is not). +Patacrep is compatible with Python 3. # Download @@ -27,7 +26,7 @@ Clone Patacrep repos: Make sure you have [pip](https://pip.pypa.io/en/latest/) installed, and then run > pip install -r Requirements.txt -> python setup.py install +> python3 setup.py install # Run @@ -39,13 +38,6 @@ Look for existing songbook files in `/books/`. For example: > /songbook /books/songbook_en.sb > songbook_en.pdf -# Quick and dirty deb packages - -Install `python-stdeb`, then: - -> python setup.py --command-packages=stdeb.command bdist_deb -> sudo dpkg -i deb_dist/python-patacrep_-1_all.deb - # Documentation - Compiled, but may be outdated: http://www.patacrep.com/data/documents/doc_en.pdf diff --git a/setup.py b/setup.py index a71f9dff..34705187 100755 --- a/setup.py +++ b/setup.py @@ -6,55 +6,42 @@ $ python setup.py install """ from patacrep import __version__ -from setuptools import setup - -import sys -import os -import site - - -SETUP = {"name": 'patacrep', - "version": __version__, - "description": 'Songbook compilation chain', - "author": 'The Songbook team', - "author_email": 'crep@team-on-fire.com', - "url": 'https://github.com/patacrep/patacrep', - "packages": ['patacrep', 'patacrep.content'], - "license": "GPLv2 or any later version", - "scripts": ['songbook'], - "requires": [ - "argparse", "codecs", "distutils", "fnmatch", "glob", "json", - "locale", "logging", "os", "plasTeX", "re", "subprocess", "sys", - "textwrap", "unidecode", "jinja2", "chardet" - ], - "install_requires": [ - "argparse", "plasTeX", "unidecode", "jinja2", "chardet" +from setuptools import setup, find_packages + +setup( + name='patacrep', + version=__version__, + description='Songbook compilation chain', + author='The Songbook team', + author_email='crep@team-on-fire.com', + url='https://github.com/patacrep/patacrep', + packages=find_packages(), + license="GPLv2 or any later version", + requires=[ + "argparse", "codecs", "distutils", "fnmatch", "glob", "json", + "locale", "logging", "os", "re", "subprocess", "sys", + "textwrap", "unidecode", "jinja2", "chardet" + ], + install_requires=[ + "argparse", "unidecode", "jinja2", "chardet", "ply" + ], + include_package_data=True, + entry_points={ + 'console_scripts': [ + "songbook = patacrep.songbook:main", ], - "package_data": {'patacrep': [ 'data/latex/*', - 'data/templates/*', - 'data/examples/*.sb', - 'data/examples/*/*.sg', - 'data/examples/*/*.ly', - 'data/examples/*/*.jpg', - 'data/examples/*/*.png', - 'data/examples/*/*.png', - 'data/examples/*/*/header']}, - "classifiers": [ - "Environment :: Console", - "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", - "Natural Language :: English", - "Operating System :: POSIX :: Linux", - "Operating System :: Microsoft :: Windows", - "Operating System :: MacOS :: MacOS X", - "Programming Language :: Python :: 2.7", - "Topic :: Utilities", - ], - "platforms": ["GNU/Linux", "Windows", "MacOsX"] -} - -if sys.platform.startswith('win32'): - from shutil import copy - copy("songbook", "songbook.py") - SETUP["scripts"] = ['songbook.py'] - -setup(**SETUP) + }, + classifiers=[ + "Environment :: Console", + "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", + "Natural Language :: English", + "Operating System :: POSIX :: Linux", + "Operating System :: Microsoft :: Windows", + "Operating System :: MacOS :: MacOS X", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Topic :: Utilities", + ], + platforms=["GNU/Linux", "Windows", "MacOsX"] +) diff --git a/songbook b/songbook index bcf015ee..80e12f88 100755 --- a/songbook +++ b/songbook @@ -1,152 +1,9 @@ -#! /usr/bin/env python2 -# -*- coding: utf-8 -*- +#! /usr/bin/env python3 -"""Command line tool to compile songbooks using the songbook library.""" - -import argparse -import json -import locale -import logging -import os.path -import textwrap -import sys - -from patacrep.build import SongbookBuilder, DEFAULT_STEPS -from patacrep import __version__ -from patacrep import errors -from patacrep import encoding - -# Logging configuration -logging.basicConfig(level=logging.INFO) -LOGGER = logging.getLogger() - -# pylint: disable=too-few-public-methods -class ParseStepsAction(argparse.Action): - """Argparse action to split a string into a list.""" - def __call__(self, __parser, namespace, values, __option_string=None): - if not getattr(namespace, self.dest): - setattr(namespace, self.dest, []) - setattr( - namespace, - self.dest, - ( - getattr(namespace, self.dest) - + [value.strip() for value in values[0].split(',')] - ), - ) - -class VerboseAction(argparse.Action): - """Set verbosity level with option --verbose.""" - def __call__(self, *_args, **_kwargs): - LOGGER.setLevel(logging.DEBUG) - -def argument_parser(args): - """Parse arguments""" - parser = argparse.ArgumentParser(description="A song book compiler") - - parser.add_argument('--version', help='Show version', action='version', - version='%(prog)s ' + __version__) - - parser.add_argument('book', nargs=1, help=textwrap.dedent("""\ - Book to compile. - """)) - - parser.add_argument('--datadir', '-d', nargs='+', type=str, action='append', - help=textwrap.dedent("""\ - Data location. Expected (not necessarily required) - subdirectories are 'songs', 'img', 'latex', 'templates'. - """)) - - parser.add_argument('--verbose', '-v', nargs=0, action=VerboseAction, - help=textwrap.dedent("""\ - Show details about the compilation process. - """)) - - parser.add_argument('--steps', '-s', nargs=1, type=str, - action=ParseStepsAction, - help=textwrap.dedent("""\ - Steps to run. Default is "{steps}". - Available steps are: - "tex" produce .tex file from templates; - "pdf" compile .tex file; - "sbx" compile index files; - "clean" remove temporary files; - any string beginning with '%%' (in this case, it will be run - in a shell). Several steps (excepted the custom shell - command) can be combinend in one --steps argument, as a - comma separated string. - """.format(steps=','.join(DEFAULT_STEPS))), - default=None, - ) - - options = parser.parse_args(args) +# Do not edit this file. This file is just a helper file for development test. +# It is not part of the distributed software. - return options - - -def main(): - """Main function:""" - - # set script locale to match user's - try: - locale.setlocale(locale.LC_ALL, '') - except locale.Error as error: - # Locale is not installed on user's system, or wrongly configured. - sys.stderr.write("Locale error: {}\n".format(error.message)) - - options = argument_parser(sys.argv[1:]) - - songbook_path = options.book[0] - - basename = os.path.basename(songbook_path)[:-3] - - songbook_file = None - try: - songbook_file = encoding.open_read(songbook_path) - songbook = json.load(songbook_file) - except Exception as error: # pylint: disable=broad-except - LOGGER.error(error) - LOGGER.error("Error while loading file '{}'.".format(songbook_path)) - sys.exit(1) - finally: - if songbook_file: - songbook_file.close() - - # Gathering datadirs - datadirs = [] - if options.datadir: - # Command line options - datadirs += [item[0] for item in options.datadir] - if 'datadir' in songbook: - # .sg file - if isinstance(songbook['datadir'], basestring): - songbook['datadir'] = [songbook['datadir']] - datadirs += [ - os.path.join( - os.path.dirname(os.path.abspath(songbook_path)), - path - ) - for path in songbook['datadir'] - ] - # Default value - datadirs.append(os.path.dirname(os.path.abspath(songbook_path))) - - songbook['datadir'] = datadirs - - try: - sb_builder = SongbookBuilder(songbook, basename) - sb_builder.unsafe = True - - sb_builder.build_steps(options.steps) - except errors.SongbookError as error: - LOGGER.error(error) - if LOGGER.level >= logging.INFO: - LOGGER.error( - "Running again with option '-v' may give more information." - ) - sys.exit(1) - - sys.exit(0) +"""Command line tool to compile songbooks using the songbook library.""" -if __name__ == '__main__': - main() +from patacrep.songbook import main +main() diff --git a/stdeb.cfg b/stdeb.cfg deleted file mode 100644 index 8c33d4a4..00000000 --- a/stdeb.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[DEFAULT] -Depends: python-jinja2, python-pkg-resources, python-plastex, python-chardet, python-unidecode, texlive-latex-base, texlive-latex-recommended, texlive-latex-extra, lilypond, texlive-fonts-recommended -Recommends: texlive-lang-english, texlive-lang-french, texlive-lang-portuguese, texlive-lang-spanish, texlive-fonts-extra -XS-Python-Version: >=2.7 -Section: tex -