diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..fe4c1ab7 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include LICENSE NEWS readme.md Requirements.txt +recursive-include patacrep/data * diff --git a/Requirements.txt b/Requirements.txt index 9678473f..4aa4a442 100644 --- a/Requirements.txt +++ b/Requirements.txt @@ -2,4 +2,3 @@ Jinja2==2.7.3 argparse==1.2.1 chardet==2.2.1 unidecode>=0.04.16 -https://github.com/tiarno/plastex/archive/master.zip \ No newline at end of file diff --git a/patacrep/build.py b/patacrep/build.py index c3a414d4..1bc683a4 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -9,7 +9,7 @@ import logging import os.path from subprocess import Popen, PIPE, call -from patacrep import __DATADIR__, authors, content, errors +from patacrep import __DATADIR__, authors, content, errors, files from patacrep.index import process_sxd from patacrep.templates import TexRenderer from patacrep.songs import DataSubpath @@ -33,6 +33,7 @@ DEFAULT_CONFIG = { 'lang': 'english', 'content': [], 'titleprefixwords': [], + 'encoding': None, } @@ -50,14 +51,13 @@ class Songbook(object): super(Songbook, self).__init__() self.config = raw_songbook self.basename = basename - self.contentlist = [] # Some special keys have their value processed. self._set_datadir() def _set_datadir(self): """Set the default values for datadir""" try: - if isinstance(self.config['datadir'], basestring): + if isinstance(self.config['datadir'], str): self.config['datadir'] = [self.config['datadir']] except KeyError: # No datadir in the raw_songbook self.config['datadir'] = [os.path.abspath('.')] @@ -86,12 +86,13 @@ class Songbook(object): - output: a file object, in which the file will be written. """ # Updating configuration - config = DEFAULT_CONFIG + config = DEFAULT_CONFIG.copy() config.update(self.config) renderer = TexRenderer( config['template'], config['datadir'], config['lang'], + config['encoding'], ) config.update(renderer.get_variables()) config.update(self.config) @@ -100,18 +101,33 @@ class Songbook(object): copy.deepcopy(config['authwords']) ) - self.config = config - # Configuration set + # Loading custom plugins + config['_content_plugins'] = files.load_plugins( + datadirs=config.get('datadir', []), + subdir=['content'], + variable='CONTENT_PLUGINS', + error=( + "File {filename}: Keyword '{keyword}' is already used. Ignored." + ), + ) + config['_file_plugins'] = files.load_plugins( + datadirs=config.get('datadir', []), + subdir=['songs'], + variable='FILE_PLUGINS', + error=( + "File {filename}: Keyword '{keyword}' is already used. Ignored." + ), + ) - self.contentlist = content.process_content( - self.config.get('content', []), - self.config, + # Configuration set + config['render_content'] = content.render_content + config['content'] = content.process_content( + config.get('content', []), + config, ) - self.config['render_content'] = content.render_content - self.config['content'] = self.contentlist - self.config['filename'] = output.name[:-4] + config['filename'] = output.name[:-4] - renderer.render_tex(output, self.config) + renderer.render_tex(output, config) class SongbookBuilder(object): @@ -213,7 +229,7 @@ class SongbookBuilder(object): log = '' line = process.stdout.readline() while line: - log += line + log += str(line) line = process.stdout.readline() LOGGER.debug(log) diff --git a/patacrep/content/__init__.py b/patacrep/content/__init__.py index 2f4fea0e..5214a55b 100755 --- a/patacrep/content/__init__.py +++ b/patacrep/content/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Content plugin management. @@ -69,7 +69,6 @@ More documentation in the docstring of Content. """ import glob -import importlib import jinja2 import logging import os @@ -134,53 +133,6 @@ class ContentError(SongbookError): def __str__(self): return "Content: {}: {}".format(self.keyword, self.message) -def load_plugins(config): - """Load all content plugins, and return a dictionary of those plugins. - - Return value: a dictionary where: - - keys are the keywords ; - - values are functions triggered when this keyword is met. - """ - plugins = {} - directory_list = ( - [ - os.path.join(datadir, "python", "content") - for datadir in config.get('datadir', []) - ] - + [os.path.dirname(__file__)] - ) - for directory in directory_list: - if not os.path.exists(directory): - LOGGER.debug( - "Ignoring non-existent directory '%s'.", - directory - ) - continue - sys.path.append(directory) - for name in glob.glob(os.path.join(directory, '*.py')): - if name.endswith(".py") and os.path.basename(name) != "__init__.py": - if directory == os.path.dirname(__file__): - plugin = importlib.import_module( - 'patacrep.content.{}'.format( - os.path.basename(name[:-len('.py')]) - ) - ) - else: - plugin = importlib.import_module( - os.path.basename(name[:-len('.py')]) - ) - for (key, value) in plugin.CONTENT_PLUGINS.items(): - if key in plugins: - LOGGER.warning( - "File %s: Keyword '%s' is already used. Ignored.", - files.relpath(name), - key, - ) - continue - plugins[key] = value - del sys.path[-1] - return plugins - @jinja2.contextfunction def render_content(context, content): """Render the content of the songbook as a LaTeX code. @@ -224,12 +176,13 @@ def process_content(content, config=None): included in the .tex file. """ contentlist = [] - plugins = load_plugins(config) - keyword_re = re.compile(ur'^ *(?P\w*) *(\((?P.*)\))? *$') + plugins = config.get('_content_plugins', {}) + + keyword_re = re.compile(r'^ *(?P\w*) *(\((?P.*)\))? *$') if not content: content = [["song"]] for elem in content: - if isinstance(elem, basestring): + if isinstance(elem, str): elem = ["song", elem] if len(content) == 0: content = ["song"] diff --git a/patacrep/content/cwd.py b/patacrep/content/cwd.py index 338adb76..5e55d68d 100755 --- a/patacrep/content/cwd.py +++ b/patacrep/content/cwd.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Change base directory before importing songs.""" diff --git a/patacrep/content/include.py b/patacrep/content/include.py index dcc969a9..03492544 100644 --- a/patacrep/content/include.py +++ b/patacrep/content/include.py @@ -48,15 +48,15 @@ def parse(keyword, config, argument, contentlist): filepath = load_from_datadirs(path, config) content_file = None try: - content_file = encoding.open_read(filepath, 'r') - new_content = json.load(content_file) + with encoding.open_read( + filepath, + encoding=config['encoding'] + ) as content_file: + new_content = json.load(content_file) except Exception as error: # pylint: disable=broad-except LOGGER.error(error) LOGGER.error("Error while loading file '{}'.".format(filepath)) sys.exit(1) - finally: - if content_file: - content_file.close() config["datadir"].append(os.path.abspath(os.path.dirname(filepath))) new_contentlist += process_content(new_content, config) diff --git a/patacrep/content/section.py b/patacrep/content/section.py index 2bde2eb2..96215f68 100755 --- a/patacrep/content/section.py +++ b/patacrep/content/section.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Allow LaTeX sections (starred or not) as content of a songbook.""" @@ -26,9 +26,9 @@ class Section(Content): def render(self, __context): if self.short is None: - return ur'\{}{{{}}}'.format(self.keyword, self.name) + return r'\{}{{{}}}'.format(self.keyword, self.name) else: - return ur'\{}[{}]{{{}}}'.format(self.keyword, self.short, self.name) + return r'\{}[{}]{{{}}}'.format(self.keyword, self.short, self.name) #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): diff --git a/patacrep/content/song.py b/patacrep/content/song.py index b83fd134..50cb7349 100755 --- a/patacrep/content/song.py +++ b/patacrep/content/song.py @@ -1,45 +1,18 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Plugin to include songs to the songbook.""" import glob -import jinja2 import logging import os -from patacrep.content import Content, process_content, ContentError +from patacrep.content import process_content, ContentError from patacrep import files, errors from patacrep.songs import Song LOGGER = logging.getLogger(__name__) -class SongRenderer(Content, Song): - """Render a song in the .tex file.""" - - def begin_new_block(self, previous, __context): - """Return a boolean stating if a new block is to be created.""" - return not isinstance(previous, SongRenderer) - - def begin_block(self, context): - """Return the string to begin a block.""" - indexes = context.resolve("indexes") - if isinstance(indexes, jinja2.runtime.Undefined): - indexes = "" - return ur'\begin{songs}{%s}' % indexes - - def end_block(self, __context): - """Return the string to end a block.""" - return ur'\end{songs}' - - def render(self, context): - """Return the string that will render the song.""" - return ur'\input{{{}}}'.format(files.path2posix( - files.relpath( - self.fullpath, - os.path.dirname(context['filename']) - ))) - #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): """Parse data associated with keyword 'song'. @@ -51,22 +24,17 @@ def parse(keyword, argument, contentlist, config): expressions (interpreted using the glob module), referring to songs. - config: the current songbook configuration dictionary. - Return a list of SongRenderer() instances. + Return a list of Song() instances. """ if '_languages' not in config: config['_languages'] = set() songlist = [] + plugins = config.get('_file_plugins', {}) for songdir in config['_songdir']: if contentlist: break - contentlist = [ - filename - for filename - in ( - files.recursive_find(songdir.fullpath, "*.sg") - + files.recursive_find(songdir.fullpath, "*.is") - ) - ] + contentlist = files.recursive_find(songdir.fullpath, plugins.keys()) + for elem in contentlist: before = len(songlist) for songdir in config['_songdir']: @@ -74,21 +42,16 @@ def parse(keyword, argument, contentlist, config): continue with files.chdir(songdir.datadir): for filename in glob.iglob(os.path.join(songdir.subpath, elem)): - if not ( - filename.endswith('.sg') or - filename.endswith('.is') - ): + LOGGER.debug('Parsing file "{}"…'.format(filename)) + try: + renderer = plugins[filename.split('.')[-1]] + except KeyError: LOGGER.warning(( - 'File "{}" is not a ".sg" or ".is" file. Ignored.' + 'I do not know how to parse file "{}". Ignored.' ).format(os.path.join(songdir.datadir, filename)) ) continue - LOGGER.debug('Parsing file "{}"…'.format(filename)) - song = SongRenderer( - songdir.datadir, - filename, - config, - ) + song = renderer(songdir.datadir, filename, config) songlist.append(song) config["_languages"].update(song.languages) if len(songlist) > before: @@ -129,7 +92,7 @@ def process_songs(content, config=None): item for item in contentlist - if not isinstance(item, SongRenderer) + if not isinstance(item, Song) ] if not_songs: raise OnlySongsError(not_songs) diff --git a/patacrep/content/songsection.py b/patacrep/content/songsection.py index b4c9d446..07153591 100755 --- a/patacrep/content/songsection.py +++ b/patacrep/content/songsection.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Allow 'songchapter' and 'songsection' as content of a songbook.""" @@ -19,7 +19,7 @@ class SongSection(Content): def render(self, __context): """Render this section or chapter.""" - return ur'\{}{{{}}}'.format(self.keyword, self.name) + return r'\{}{{{}}}'.format(self.keyword, self.name) #pylint: disable=unused-argument def parse(keyword, argument, contentlist, config): diff --git a/patacrep/content/sorted.py b/patacrep/content/sorted.py index e9a5e677..f95065d6 100755 --- a/patacrep/content/sorted.py +++ b/patacrep/content/sorted.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Sorted list of songs. @@ -9,9 +9,9 @@ to a songbook. import locale import logging +import unidecode from patacrep import files -from patacrep import encoding from patacrep.content import ContentError from patacrep.content.song import OnlySongsError, process_songs @@ -27,11 +27,11 @@ def normalize_string(string): - lower case; - passed through locale.strxfrm(). """ - return locale.strxfrm(encoding.unidecode(string.lower().strip())) + return locale.strxfrm(unidecode.unidecode(string.lower().strip())) def normalize_field(field): """Return a normalized field, it being a string or a list of strings.""" - if isinstance(field, basestring): + if isinstance(field, str): return normalize_string(field) elif isinstance(field, list) or isinstance(field, tuple): return [normalize_field(string) for string in field] @@ -55,7 +55,7 @@ def key_generator(sort): field = song.authors else: try: - field = song.args[key] + field = song.data[key] except KeyError: LOGGER.debug( "Ignoring unknown key '{}' for song {}.".format( @@ -63,7 +63,7 @@ def key_generator(sort): files.relpath(song.fullpath), ) ) - field = u"" + field = "" songkey.append(normalize_field(field)) return songkey return ordered_song_keys diff --git a/patacrep/content/tex.py b/patacrep/content/tex.py index 5f80fcfc..38593f38 100755 --- a/patacrep/content/tex.py +++ b/patacrep/content/tex.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Include LaTeX raw code in the songbook.""" @@ -18,7 +18,7 @@ class LaTeX(Content): self.filename = filename def render(self, context): - return ur'\input{{{}}}'.format(files.relpath( + return r'\input{{{}}}'.format(files.relpath( self.filename, os.path.dirname(context['filename']), )) diff --git a/patacrep/data/examples/example-all.sb b/patacrep/data/examples/example-all.sb index 327ed801..0b1252aa 100644 --- a/patacrep/data/examples/example-all.sb +++ b/patacrep/data/examples/example-all.sb @@ -10,5 +10,7 @@ "authwords" : { "sep" : ["and", "et"] }, - "datadir" : "." + "datadir" : ".", + "content": [["sorted"]] + } diff --git a/patacrep/data/examples/songs/intersong.is b/patacrep/data/examples/songs/intersong.is new file mode 100644 index 00000000..dbeeb547 --- /dev/null +++ b/patacrep/data/examples/songs/intersong.is @@ -0,0 +1,6 @@ +\selectlanguage{french} +\sortassong{}[by={QQ}] +\begin{intersong} + +Lorem ipsum +\end{intersong} diff --git a/patacrep/encoding.py b/patacrep/encoding.py index 8ba7de61..a0501e4d 100644 --- a/patacrep/encoding.py +++ b/patacrep/encoding.py @@ -5,44 +5,27 @@ import codecs import chardet import logging -from unidecode import unidecode as unidecode_orig +import contextlib LOGGER = logging.getLogger(__name__) -def open_read(filename, mode='r'): + +@contextlib.contextmanager +def open_read(filename, mode='r', encoding=None): """Open a file for reading, guessing the right encoding. Return a fileobject, reading unicode strings. + If `encoding` is set, use it as the encoding (do not guess). """ - return codecs.open( + if encoding is None: + fileencoding = chardet.detect(open(filename, 'rb').read())['encoding'] + else: + fileencoding = encoding + + with codecs.open( filename, mode=mode, - encoding=chardet.detect(open(filename, "r").read())['encoding'], + encoding=fileencoding, errors='replace', - ) - -def basestring2unicode(arg): - """Return the unicode version of the argument, guessing original encoding. - """ - if isinstance(arg, unicode): - return arg - elif isinstance(arg, basestring): - return arg.decode( - encoding=chardet.detect(arg)['encoding'], - errors='replace', - ) - else: - LOGGER.warning("Cannot decode string {}. Ignored.".format(str(arg))) - return "" - -def list2unicode(arg): - """Return the unicode version of the argument, guessing original encoding. - - Argument is a list of strings. If an item is of another type, it is - silently ignored (an empty string is returned). - """ - return [basestring2unicode(item) for item in arg] - -def unidecode(arg): - """Return a unicode version of a unidecoded string.""" - return unicode(unidecode_orig(arg)) + ) as fileobject: + yield fileobject diff --git a/patacrep/errors.py b/patacrep/errors.py index ff3d210d..a065c322 100644 --- a/patacrep/errors.py +++ b/patacrep/errors.py @@ -17,10 +17,7 @@ class SBFileError(SongbookError): self.message = message def __str__(self): - if self.message is None: - return str(self.original) - else: - return self.message + return self.message class TemplateError(SongbookError): """Error during template generation""" diff --git a/patacrep/files.py b/patacrep/files.py index 48928a00..5c84e0b0 100644 --- a/patacrep/files.py +++ b/patacrep/files.py @@ -2,23 +2,35 @@ """File system utilities.""" from contextlib import contextmanager -import fnmatch +import glob +import importlib +import logging import os import posixpath +import re +import sys -def recursive_find(root_directory, pattern): - """Recursively find files matching a pattern, from a root_directory. +LOGGER = logging.getLogger(__name__) - Return a list of files matching the pattern. +def recursive_find(root_directory, extensions): + """Recursively find files with some extension, from a root_directory. + + Return a list of files matching those conditions. + + Arguments: + - `extensions`: list of accepted extensions. + - `root_directory`: root directory of the search. """ if not os.path.isdir(root_directory): return [] matches = [] + pattern = re.compile(r'.*\.({})$'.format('|'.join(extensions))) with chdir(root_directory): - for root, _, filenames in os.walk(os.curdir): - for filename in fnmatch.filter(filenames, pattern): - matches.append(os.path.join(root, filename)) + for root, __ignored, filenames in os.walk(os.curdir): + for filename in filenames: + if pattern.match(filename): + matches.append(os.path.join(root, filename)) return matches def relpath(path, start=None): @@ -59,3 +71,63 @@ def chdir(path): os.chdir(olddir) else: yield + +def load_plugins(datadirs, subdir, variable, error): + """Load all content plugins, and return a dictionary of those plugins. + + A plugin is a .py file, submodule of `subdir`, located in one of the + directories of `datadirs`. It contains a dictionary `variable`. The return + value is the union of the dictionaries of the loaded plugins. + + Arguments: + - datadirs: list of directories (as strings) in which files has to be + searched. + - subdir: modules (as a list of strings) files has to be submodules of + (e.g. if `subdir` is `['first', 'second']`, search files are of the form + `first/second/*.py`. + - variable: Name of the variable holding the dictionary. + - error: Error message raised if a key appears several times. + """ + plugins = {} + directory_list = ( + [ + os.path.join(datadir, "python", *subdir) #pylint: disable=star-args + for datadir in datadirs + ] + + [os.path.dirname(__file__)] + ) + for directory in directory_list: + if not os.path.exists(directory): + LOGGER.debug( + "Ignoring non-existent directory '%s'.", + directory + ) + continue + sys.path.append(directory) + for name in glob.glob(os.path.join(directory, *(subdir + ['*.py']))): + if name.endswith(".py") and os.path.basename(name) != "__init__.py": + if directory == os.path.dirname(__file__): + plugin = importlib.import_module( + 'patacrep.{}.{}'.format( + ".".join(subdir), + os.path.basename(name[:-len('.py')]) + ) + ) + else: + plugin = importlib.import_module( + os.path.basename(name[:-len('.py')]) + ) + for (key, value) in getattr(plugin, variable, {}).items(): + if key in plugins: + LOGGER.warning( + error.format( + filename=relpath(name), + key=key, + ) + ) + continue + plugins[key] = value + del sys.path[-1] + return plugins + + diff --git a/patacrep/index.py b/patacrep/index.py index 470bb67e..351e155e 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -8,17 +8,18 @@ from a file generated by the latex compilation of the songbook (.sxd). """ import locale +import unidecode import re from patacrep import authors from patacrep import encoding -from patacrep.plastex import simpleparse +from patacrep.latex import tex2plain -EOL = u"\n" +EOL = "\n" # Pattern set to ignore latex command in title prefix -KEYWORD_PATTERN = re.compile(ur"^%(\w+)\s?(.*)$", re.LOCALE) -FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) +KEYWORD_PATTERN = re.compile(r"^%(\w+)\s?(.*)$", re.LOCALE) +FIRST_LETTER_PATTERN = re.compile(r"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) def process_sxd(filename): @@ -28,13 +29,9 @@ def process_sxd(filename): """ data = [] index_file = None - try: - index_file = encoding.open_read(filename, 'r') + with encoding.open_read(filename) as index_file: for line in index_file: data.append(line.strip()) - finally: - if index_file: - index_file.close() i = 1 idx = Index(data[0]) @@ -77,13 +74,13 @@ class Index(object): except AttributeError: # classify as number all the non letter characters letter = "0" - if re.match(ur'\d', letter): + if re.match(r'\d', letter): letter = '0-9' return letter.upper() def add_keyword(self, key, word): """Add 'word' to self.keywords[key].""" - if not key in self.keywords.keys(): + if not key in self.keywords: self.keywords[key] = [] self.keywords[key].append(word) @@ -93,7 +90,7 @@ class Index(object): if 'prefix' in self.keywords: for prefix in self.keywords['prefix']: self.prefix_patterns.append(re.compile( - ur"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix), + r"^({prefix})(\b|\\)(\s*.*)$".format(prefix=prefix), re.LOCALE )) @@ -107,12 +104,12 @@ class Index(object): similar method with processing. """ first = self.get_first_letter(key[0]) - if not first in self.data.keys(): + if not first in self.data: self.data[first] = dict() - if not key in self.data[first].keys(): + if not key in self.data[first]: self.data[first][key] = { 'sortingkey': [ - encoding.unidecode(simpleparse(item)).lower() + unidecode.unidecode(tex2plain(item)).lower() for item in key ], 'entries': [], @@ -150,26 +147,26 @@ class Index(object): @staticmethod def ref_to_str(ref): """Return the LaTeX code corresponding to the reference.""" - return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) + return r'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) def key_to_str(self, key): """Convert the key (title or author) to the LaTeX command rendering it. """ if self.indextype == "AUTHOR": - return ur"\indexauthor{{{first}}}{{{last}}}".format( + return r"\indexauthor{{{first}}}{{{last}}}".format( first=key[1], last=key[0], ) if self.indextype == "TITLE": - return ur"\indextitle{{{0[1]}}}{{{0[0]}}}".format(key) + return r"\indextitle{{{0[1]}}}{{{0[0]}}}".format(key) def entry_to_str(self, key, entry): """Return the LaTeX code corresponding to the entry.""" - return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format( + return (r'\idxentry{{{0}}}{{{1}}}' + EOL).format( self.key_to_str(key), - ur'\\'.join([self.ref_to_str(ref) for ref in entry]), + r'\\'.join([self.ref_to_str(ref) for ref in entry]), ) def idxblock_to_str(self, letter, entries): @@ -185,10 +182,10 @@ class Index(object): for item in entries[key]['sortingkey'] ] - string = ur'\begin{idxblock}{' + letter + '}' + EOL + string = r'\begin{idxblock}{' + letter + '}' + EOL for key in sorted(entries, key=sortkey): string += self.entry_to_str(key, entries[key]['entries']) - string += ur'\end{idxblock}' + EOL + string += r'\end{idxblock}' + EOL return string def entries_to_str(self): diff --git a/patacrep/latex/__init__.py b/patacrep/latex/__init__.py new file mode 100644 index 00000000..8d0efed9 --- /dev/null +++ b/patacrep/latex/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +"""Very simple LaTeX parser + +This module uses an LALR parser to try to parse LaTeX code. LaTeX language +*cannot* be parsed by an LALR parser, so this is a very simple attemps, which +will work on simple cases, but not on complex ones. +""" + +from patacrep.latex.syntax import tex2plain +from patacrep.latex.syntax import parsesong as syntax_parsesong +from patacrep import encoding + +def parsesong(path, fileencoding=None): + """Return a dictonary of data read from the latex file `path`. + + """ + with encoding.open_read(path, encoding=fileencoding) as songfile: + data = syntax_parsesong(songfile.read(), path) + data['@path'] = path + return data diff --git a/patacrep/latex/ast.py b/patacrep/latex/ast.py new file mode 100644 index 00000000..f8112525 --- /dev/null +++ b/patacrep/latex/ast.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +"""Abstract Syntax Tree for LaTeX code.""" + +# pylint: disable=too-few-public-methods + +class AST: + """Base class for the tree.""" + # pylint: disable=no-init + + metadata = None + + @classmethod + def init_metadata(cls): + """Clear metadata + + As this attribute is a class attribute, it as to be reset at each new + parsing. + """ + cls.metadata = { + '@languages': set(), + } + +class Expression(AST): + """LaTeX expression""" + + def __init__(self, value): + super().__init__() + self.content = [value] + + def prepend(self, value): + """Add a value at the beginning of the content list.""" + if value is not None: + self.content.insert(0, value) + return self + + def __str__(self): + return "".join([str(item) for item in self.content]) + +class Command(AST): + """LaTeX command""" + + def __init__(self, name, optional, mandatory): + self.name = name + self.mandatory = mandatory + self.optional = optional + + if name == r'\selectlanguage': + self.metadata['@languages'] |= set(self.mandatory) + + def __str__(self): + if self.name in [r'\emph']: + return str(self.mandatory[0]) + return "{}{}{}".format( + self.name, + "".join(["[{}]".format(item) for item in self.optional]), + "".join(["{{{}}}".format(item) for item in self.mandatory]), + ) + + +class BeginSong(AST): + """Beginsong command""" + + def __init__(self, titles, arguments): + self.titles = titles + self.arguments = arguments diff --git a/patacrep/latex/detex.py b/patacrep/latex/detex.py new file mode 100644 index 00000000..d99c3467 --- /dev/null +++ b/patacrep/latex/detex.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +"""Render `very simple` TeX commands in a simple TeX code.""" + +import logging + +LOGGER = logging.getLogger() + +MATCH = [ + # Diacritics: a + (r"\'a", "á"), + (r"\'A", "Á"), + (r"\`a", "à"), + (r"\`A", "À"), + (r"\^a", "â"), + (r"\^A", "Â"), + (r"\"a", "ä"), + (r"\"A", "Ä"), + + # Diacritics: e + (r"\'e", "é"), + (r"\'E", "É"), + (r"\`e", "è"), + (r"\`E", "È"), + (r"\^e", "ê"), + (r"\^E", "Ê"), + (r"\"e", "ë"), + (r"\"E", "Ë"), + + # Diacritics: i + (r"\'i", "í"), + (r"\'I", "Í"), + (r"\`i", "ì"), + (r"\`I", "Ì"), + (r"\^i", "î"), + (r"\^I", "Î"), + (r"\"i", "ï"), + (r"\"I", "Ï"), + (r"\'\i", "í"), + (r"\'\I", "Í"), + (r"\`\i", "ì"), + (r"\`\I", "Ì"), + (r"\^\i", "î"), + (r"\^\I", "Î"), + (r"\"\i", "ï"), + (r"\"\I", "Ï"), + + # Diacritics: o + (r"\'o", "ó"), + (r"\'O", "Ó"), + (r"\`o", "ò"), + (r"\`O", "Ò"), + (r"\^o", "ô"), + (r"\^O", "Ô"), + (r"\"o", "ö"), + (r"\"O", "Ö"), + + # Diacritics: u + (r"\'u", "ú"), + (r"\'U", "Ú"), + (r"\`u", "ù"), + (r"\`U", "Ù"), + (r"\^u", "û"), + (r"\^U", "Û"), + (r"\"u", "ü"), + (r"\"U", "Ü"), + + # Cedille + (r"\c c", "ç"), + (r"\c C", "Ç"), + + # œ, æ + (r"\oe", "œ"), + (r"\OE", "Œ"), + (r"\ae", "æ"), + (r"\AE", "Æ"), + + # Spaces + (r"\ ", " "), + (r"\,", " "), + (r"\~", " "), + + # IeC + (r"\IeC ", ""), + + # Miscallenous + (r"\dots", "…"), + (r"\%", "%"), + (r"\&", "&"), + (r"\_", "_"), + + ] + + +def detex(arg): + """Render very simple TeX commands from argument. + + Argument can be: + - a string: it is processed; + - a list, dict or set: its values are processed. + """ + if isinstance(arg, dict): + return dict([ + (key, detex(value)) + for (key, value) + in arg.items() + ]) + elif isinstance(arg, list): + return [ + detex(item) + for item + in arg + ] + elif isinstance(arg, set): + return set(detex(list(arg))) + elif isinstance(arg, str): + string = arg + for (latex, plain) in MATCH: + string = string.replace(latex, plain) + if '\\' in string: + LOGGER.warning("Remaining command in string '{}'.".format(string)) + return string.strip() + else: + return detex(str(arg)) diff --git a/patacrep/latex/lexer.py b/patacrep/latex/lexer.py new file mode 100644 index 00000000..c90303da --- /dev/null +++ b/patacrep/latex/lexer.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +"""Very simple LaTeX lexer.""" + +import logging +import ply.lex as lex + +LOGGER = logging.getLogger() + +#pylint: disable=invalid-name +tokens = ( + 'LBRACKET', + 'RBRACKET', + 'LBRACE', + 'RBRACE', + 'COMMAND', + 'NEWLINE', + 'COMMA', + 'EQUAL', + 'CHARACTER', + 'SPACE', + 'BEGINSONG', + 'SONG_LTITLE', + 'SONG_RTITLE', + 'SONG_LOPTIONS', + 'SONG_ROPTIONS', +) + +class SimpleLexer: + """Very simple LaTeX lexer.""" + + tokens = tokens + + t_LBRACKET = r'\[' + t_RBRACKET = r'\]' + t_LBRACE = r'{' + t_RBRACE = r'}' + t_COMMAND = r'\\([@a-zA-Z]+|[^\\])' + t_NEWLINE = r'\\\\' + SPECIAL_CHARACTERS = ( + t_LBRACKET + + t_RBRACKET + + t_RBRACE + + t_LBRACE + + r"\\" + + r" " + + r"\n" + + r"\r" + + r"%" + + r"=" + + r"," + ) + t_CHARACTER = r'[^{}]'.format(SPECIAL_CHARACTERS) + t_EQUAL = r'=' + t_COMMA = r',' + + t_SPACE = r'[ \t\n\r]+' + + def __init__(self): + self.__class__.lexer = lex.lex(module=self) + + # Define a rule so we can track line numbers + @staticmethod + def t_newline(token): + r'\n+' + token.lexer.lineno += len(token.value) + + @staticmethod + def t_comment(token): + r'%.*' + pass + + # Error handling rule + @staticmethod + def t_error(token): + """Manage errors""" + LOGGER.error("Illegal character '{}'".format(token.value[0])) + token.lexer.skip(1) + +class SongLexer(SimpleLexer): + r"""Very simple song lexer. + + In the context of this class, a "song" is some LaTeX code containing the + ``\beginsong`` (or ``\sortassong``) command. + """ + + states = ( + ('beginsong', 'inclusive'), + ) + + # State beginsong + @staticmethod + def t_INITIAL_BEGINSONG(token): + r'(\\beginsong|\\sortassong)' + token.lexer.push_state('beginsong') + token.lexer.open_brackets = 0 + token.lexer.open_braces = 0 + return token + + @staticmethod + def t_beginsong_LBRACKET(token): + r'\[' + if token.lexer.open_brackets == 0: + token.type = 'SONG_LOPTIONS' + + # Count opening and closing braces to know when to leave the + # `beginsong` state. + token.lexer.open_braces += 1 + token.lexer.open_brackets += 1 + return token + + @staticmethod + def t_beginsong_RBRACKET(token): + r'\]' + token.lexer.open_brackets -= 1 + if token.lexer.open_brackets == 0: + token.type = 'SONG_ROPTIONS' + token.lexer.open_braces -= 1 + token.lexer.pop_state() + for __ignored in token.lexer: + # In this parser, we only want to read metadata. So, after the + # first ``\beginsong`` command, we can stop parsing. + pass + return token + + @staticmethod + def t_beginsong_LBRACE(token): + r'{' + if token.lexer.open_braces == 0: + token.type = 'SONG_LTITLE' + token.lexer.open_braces += 1 + return token + + @staticmethod + def t_beginsong_RBRACE1(token): + r'}(?![ \t\r\n]*\[)' + token.lexer.open_braces -= 1 + token.type = 'RBRACE' + if token.lexer.open_braces == 0: + token.lexer.pop_state() + token.type = 'SONG_RTITLE' + return token + + @staticmethod + def t_beginsong_RBRACE2(token): + r'}(?=[ \t\r\n]*\[)' + token.lexer.open_braces -= 1 + token.type = 'RBRACE' + if token.lexer.open_braces == 0: + token.type = 'SONG_RTITLE' + return token + diff --git a/patacrep/latex/syntax.py b/patacrep/latex/syntax.py new file mode 100644 index 00000000..896d8442 --- /dev/null +++ b/patacrep/latex/syntax.py @@ -0,0 +1,256 @@ +"""Very simple LaTeX parser""" + +import logging +import ply.yacc as yacc + +from patacrep.latex.lexer import tokens, SimpleLexer, SongLexer +from patacrep.latex import ast +from patacrep.errors import SongbookError +from patacrep.latex.detex import detex + +LOGGER = logging.getLogger() + +class ParsingError(SongbookError): + """Parsing error.""" + + def __init__(self, message): + super().__init__(self) + self.message = message + + def __str__(self): + return self.message + +# pylint: disable=line-too-long +class Parser: + """LaTeX parser.""" + + def __init__(self, filename=None): + self.tokens = tokens + self.ast = ast.AST + self.ast.init_metadata() + self.filename = filename + + @staticmethod + def __find_column(token): + """Return the column of ``token``.""" + last_cr = token.lexer.lexdata.rfind('\n', 0, token.lexpos) + if last_cr < 0: + last_cr = 0 + column = (token.lexpos - last_cr) + 1 + return column + + def p_error(self, token): + """Manage parsing errors.""" + LOGGER.error( + "Error in file {}, line {} at position {}.".format( + str(self.filename), + token.lineno, + self.__find_column(token), + ) + ) + + @staticmethod + def p_expression(symbols): + """expression : brackets expression + | braces expression + | command expression + | NEWLINE expression + | beginsong expression + | word expression + | SPACE expression + | empty + """ + if len(symbols) == 3: + if symbols[2] is None: + symbols[0] = ast.Expression(symbols[1]) + else: + symbols[0] = symbols[2].prepend(symbols[1]) + else: + symbols[0] = None + + @staticmethod + def p_empty(__symbols): + """empty :""" + return None + + @staticmethod + def p_brackets(symbols): + """brackets : LBRACKET expression RBRACKET""" + symbols[0] = symbols[2] + + @staticmethod + def p_braces(symbols): + """braces : LBRACE expression RBRACE""" + symbols[0] = symbols[2] + + @staticmethod + def p_command(symbols): + """command : COMMAND brackets_list braces_list""" + symbols[0] = ast.Command(symbols[1], symbols[2], symbols[3]) + + @staticmethod + def p_brackets_list(symbols): + """brackets_list : brackets brackets_list + | empty + """ + if len(symbols) == 3: + symbols[0] = symbols[2] + symbols[0].insert(0, symbols[1]) + else: + symbols[0] = [] + + @staticmethod + def p_braces_list(symbols): + """braces_list : braces braces_list + | empty + """ + if len(symbols) == 3: + symbols[0] = symbols[2] + symbols[0].insert(0, symbols[1]) + else: + symbols[0] = [] + + @staticmethod + def p_word(symbols): + """word : CHARACTER word_next + | COMMA word_next + | EQUAL word_next + """ + symbols[0] = symbols[1] + symbols[2] + + @staticmethod + def p_word_next(symbols): + """word_next : CHARACTER word_next + | empty + """ + if len(symbols) == 2: + symbols[0] = "" + else: + symbols[0] = symbols[1] + symbols[2] + + def p_beginsong(self, symbols): + """beginsong : BEGINSONG separator songbraces separator songbrackets""" + self.ast.metadata["@titles"] = symbols[3] + self.ast.metadata.update(symbols[5]) + + @staticmethod + def p_songbrackets(symbols): + """songbrackets : SONG_LOPTIONS separator dictionary separator SONG_ROPTIONS + | empty + """ + if len(symbols) == 6: + symbols[0] = symbols[3] + else: + symbols[0] = {} + + @staticmethod + def p_songbraces(symbols): + """songbraces : SONG_LTITLE separator titles separator SONG_RTITLE + | empty + """ + if len(symbols) == 6: + symbols[0] = symbols[3] + else: + symbols[0] = [] + + @staticmethod + def p_dictionary(symbols): + """dictionary : identifier EQUAL braces dictionary_next + | identifier EQUAL error dictionary_next + """ + if isinstance(symbols[3], ast.Expression): + symbols[0] = {} + symbols[0][symbols[1]] = symbols[3] + symbols[0].update(symbols[4]) + else: + raise ParsingError("Do enclose arguments between braces.") + + @staticmethod + def p_identifier(symbols): + """identifier : CHARACTER identifier + | empty + """ + if len(symbols) == 2: + symbols[0] = "" + else: + symbols[0] = symbols[1] + symbols[2] + + @staticmethod + def p_separator(symbols): + """separator : SPACE + | empty + """ + symbols[0] = None + + @staticmethod + def p_dictonary_next(symbols): + """dictionary_next : separator COMMA separator dictionary + | empty + """ + if len(symbols) == 5: + symbols[0] = symbols[4] + else: + symbols[0] = {} + + @staticmethod + def p_titles(symbols): + """titles : title titles_next""" + symbols[0] = [symbols[1]] + symbols[2] + + @staticmethod + def p_titles_next(symbols): + """titles_next : NEWLINE title titles_next + | empty + """ + if len(symbols) == 2: + symbols[0] = [] + else: + symbols[0] = [symbols[2]] + symbols[3] + + @staticmethod + def p_title(symbols): + """title : brackets title + | braces title + | command title + | word title + | SPACE title + | empty + """ + if len(symbols) == 2: + symbols[0] = None + else: + if symbols[2] is None: + symbols[0] = ast.Expression(symbols[1]) + else: + symbols[0] = symbols[2].prepend(symbols[1]) + +def silent_yacc(*args, **kwargs): + """Call yacc, suppressing (as far as possible) output and generated files. + """ + return yacc.yacc( + write_tables=0, + debug=0, + *args, + **kwargs + ) + +def tex2plain(string): + """Parse string and return its plain text version.""" + return detex( + silent_yacc( + module=Parser(), + ).parse( + string, + lexer=SimpleLexer().lexer, + ) + ) + +def parsesong(string, filename=None): + """Parse song and return its metadata.""" + return detex( + silent_yacc(module=Parser(filename)).parse( + string, + lexer=SongLexer().lexer, + ).metadata + ) + diff --git a/patacrep/plastex.py b/patacrep/plastex.py deleted file mode 100644 index cdaa3a64..00000000 --- a/patacrep/plastex.py +++ /dev/null @@ -1,117 +0,0 @@ -# -*- coding: utf-8 -*- - -"""PlasTeX module to process song files.""" - -from plasTeX.TeX import TeX -from plasTeX.Base.LaTeX import Sentences - -import locale -import os -import sys - -from patacrep import encoding - -def process_unbr_spaces(node): - #pylint: disable=line-too-long - r"""Replace '~' and '\ ' in node by nodes that - will be rendered as unbreakable space. - - Return node object for convenience. - - This function is a workaround to a bug that has been solved since: - - https://github.com/tiarno/plastex/commit/76bb78d5fbaac48e68025a3545286cc63cb4e7ad - - https://github.com/tiarno/plastex/commit/682a0d223b99d6b949bacf1c974d24dc9bb1d18e - - It can be deleted once this bug has been merged in production version of - PlasTeX. - """ - if (type(node) == Sentences.InterWordSpace or - (type(node) == Sentences.NoLineBreak and node.source == '~ ')): - node.unicode = unichr(160) - for child in node.childNodes: - process_unbr_spaces(child) - - return node - - -def simpleparse(text): - """Parse a simple LaTeX string. - """ - tex = TeX() - tex.disableLogging() - tex.input(text) - doc = tex.parse() - return process_unbr_spaces(doc.textContent) - - -class SongParser(object): - """Analyseur syntaxique de fichiers .sg""" - - @staticmethod - def create_tex(): - """Create a TeX object, ready to parse a tex file.""" - tex = TeX() - tex.disableLogging() - tex.ownerDocument.context.loadBaseMacros() - sys.path.append(os.path.dirname(__file__)) - tex.ownerDocument.context.loadPackage(tex, "plastex_patchedbabel") - tex.ownerDocument.context.loadPackage(tex, "plastex_chord") - tex.ownerDocument.context.loadPackage(tex, "plastex_songs") - tex.ownerDocument.context.loadPackage(tex, "plastex_misc_commands") - sys.path.pop() - return tex - - @classmethod - def parse(cls, filename): - """Parse a TeX file, and return its plasTeX representation.""" - tex = cls.create_tex() - tex.input(encoding.open_read(filename, 'r')) - return tex.parse() - - -def parsetex(filename): - r"""Analyse syntaxique d'un fichier .sg - - Renvoie un dictionnaire contenant les métadonnées lues dans le fichier. Les - clefs sont : - - languages: l'ensemble des langages utilisés (recherche des - \selectlanguages{}) ; - - titles: la liste des titres ; - - args: le dictionnaire des paramètres passés à \beginsong. - """ - # /* BEGIN plasTeX patch - # The following lines, and another line a few lines later, are used to - # circumvent a plasTeX bug. It has been reported and corrected : - # https://github.com/tiarno/plastex/commit/8f4e5a385f3cb6a04d5863f731ce24a7e856f2a4 - # To see if you can delete those lines, set your LC_TIME locale to French, - # during a month containing diacritics (e.g. Février), and run songbook. If - # no plasTeX bug appears, it is safe to remove those lines. - oldlocale = locale.getlocale(locale.LC_TIME) - locale.setlocale(locale.LC_TIME, 'C') - # plasTeX patch END */ - - # Analyse syntaxique - doc = SongParser.parse(filename) - - # /* BEGIN plasTeX patch - if oldlocale[0] and oldlocale[1]: - try: - locale.setlocale(locale.LC_TIME, "%s.%s" % oldlocale) - except locale.Error: - pass # Workaround a bug on windows - # plasTeX patch END */ - - # Extraction des données - data = { - "languages": set(), - "_doc": doc, - "_filename": filename, - } - for node in doc.allChildNodes: - if node.nodeName == "selectlanguage": - data["languages"].add(node.attributes['lang']) - if node.nodeName in ["beginsong", "sortassong"]: - data["titles"] = node.attributes["titles"] - data["args"] = node.attributes["args"] - - return data diff --git a/patacrep/plastex_chord.py b/patacrep/plastex_chord.py deleted file mode 100644 index dba9f36c..00000000 --- a/patacrep/plastex_chord.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""PlasTeX module to deal with chords commands of the songs LaTeX package - -Chords are set using commands like \[C]. This package parses those commands. -""" - -import logging - -import plasTeX -from plasTeX import Command, Environment, Macro -from plasTeX.Base.LaTeX.Math import BeginDisplayMath - -LOGGER = logging.getLogger(__name__) - -# Count the number of levels of 'verse' environment: IN_VERSE==1 means that we -# are in a 'verse' environment; IN_VERSE==2 means that we are in two included -# 'verse' environment, and so on. -IN_VERSE = 0 - -def wrap_displaymath(cls): - """Decorator to store the depth of 'verse' environment - - In the invoke() method classes, global variable IN_VERSE indicates the - number of 'verse' (or 'chorus' or 'verse*') environment we are in. - """ - - # pylint: disable=no-init,too-few-public-methods - class WrappedClass(cls): - """Wrapper to LaTeX environment updating IN_VERSE""" - blockType = True - # pylint: disable=super-on-old-class,global-statement,no-member - def invoke(self, tex): - """Wrapper to invoke() to update global variable IN_VERSE.""" - global IN_VERSE - if self.macroMode == Macro.MODE_BEGIN: - self.ownerDocument.context.push() - self.ownerDocument.context.catcode("\n", 13) - IN_VERSE += 1 - - # Removing spaces and line breaks at the beginning of verse - token = None - for token in tex: - if not match_space(token): - break - if token is not None: - tex.pushToken(token) - - else: - self.ownerDocument.context.pop() - IN_VERSE -= 1 - return super(WrappedClass, self).invoke(tex) - return WrappedClass - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class Verse(Environment): - """LaTeX 'verse' environment""" - macroName = 'verse' - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class VerseStar(Environment): - """LaTeX 'verse*' environment""" - macroName = 'verse*' - -# pylint: disable=too-many-public-methods -@wrap_displaymath -class Chorus(Environment): - """LaTeX 'chorus' environment""" - macroName = 'chorus' - -def match_space(token): - """Return True if token is a space or newline character.""" - return ( - isinstance(token, plasTeX.Tokenizer.Space) - or token.nodeName == 'active::\n' - ) - -def match_closing_square_bracket(token): - """Return True if token is character ']'.""" - return token.nodeType == token.TEXT_NODE and token.nodeValue == ']' - -def match_egroup(token): - """Return True if token is of type `egroup` (end of group).""" - return isinstance(token, plasTeX.Base.Text.egroup) #pylint: disable=no-member - -def match_space_or_chord(token): - """Return True if token is a space or a chord.""" - return match_space(token) or isinstance(token, Chord) - -def parse_until(tex, end=lambda x: False): - """Parse `tex` until condition `end`, or `egroup` is met. - - Arguments: - - tex: object to parse - - end: function taking a token in argument, and returning a boolean. - Parsing stops when this function returns True, or an `egroup` is met. - - Return: a tuple of two items (the list of parsed tokens, last token). This - is done so that caller can decide whether they want to discard it or not. - Last token can be None if everything has been parsed without the end - condition being met. - """ - parsed = [] - last = None - for token in tex: - if end(token) or match_egroup(token): - last = token - break - elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member - # pylint: disable=expression-not-assigned - [token.appendChild(item) for item in parse_until(tex, match_egroup)[0]] - parsed.append(token) - return (parsed, last) - - -class Chord(Command): - """Beginning of a chord notation""" - macroName = 'chord' - macroMode = Command.MODE_NONE - -class BeginChordOrDisplayMath(BeginDisplayMath): - r"""Wrapper to BeginDisplayMath - - In a 'verse' (or 'verse*' or 'chorus') environment, the '\[' macro - displays a chord. Otherwise, it corresponds to the usual LaTeX math mode. - This class calls the right method, depending on the inclusion of this - macro in a verse environment. - """ - macroName = '[' - - def invoke(self, tex): - """Process this macro""" - if IN_VERSE: - chord = Chord() - - self.ownerDocument.context.push() #pylint: disable=no-member - self.ownerDocument.context.catcode("&", 13) #pylint: disable=no-member - chord.setAttribute( - 'name', - parse_until(tex, match_closing_square_bracket)[0], - ) - self.ownerDocument.context.pop() #pylint: disable=no-member - - token = next(iter(tex), None) - if token is None: - return [chord] - elif match_space(token): - return [chord, token] - elif ( - isinstance(token, Verse) - or isinstance(token, VerseStar) - or isinstance(token, Chorus) - ): - LOGGER.warning(( - "{} L{}: '\\end{{verse}}' (or 'verse*' or 'chorus') not " - "allowed directly after '\\['." - ).format(tex.filename, tex.lineNumber) - ) - return [chord] - elif isinstance(token, Chord): - token.attributes['name'] = ( - chord.attributes['name'] - + token.attributes['name'] - ) - chord = token - return [chord] - elif isinstance(token, plasTeX.Base.Text.bgroup): #pylint: disable=no-member - # pylint: disable=expression-not-assigned - [chord.appendChild(item) for item in parse_until(tex)[0]] - return [chord] - else: - chord.appendChild(token) - (parsed, last) = parse_until(tex, match_space_or_chord) - # pylint: disable=expression-not-assigned - [chord.appendChild(item) for item in parsed] - return [chord, last] - else: - return super(BeginChordOrDisplayMath, self).invoke(tex) - diff --git a/patacrep/plastex_misc_commands.py b/patacrep/plastex_misc_commands.py deleted file mode 100644 index 4b4b2602..00000000 --- a/patacrep/plastex_misc_commands.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Quick management of random LaTeX commands.""" - -from plasTeX import Command - -# pylint: disable=invalid-name,too-many-public-methods -class songcolumns(Command): - r"""Manage `\songcolumns` command""" - args = '{num:int}' - -# pylint: disable=invalid-name,too-many-public-methods -class gtab(Command): - r"""Manage `\gta` command""" - args = '{chord:str}{diagram:str}' diff --git a/patacrep/plastex_patchedbabel.py b/patacrep/plastex_patchedbabel.py deleted file mode 100644 index e20d3086..00000000 --- a/patacrep/plastex_patchedbabel.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- - -r"""Patch pour le paquet Babel de PlasTeX - -Un bug dans PlasTeX intervient lorsqu'on essaye d'analyser une commande LaTeX -\selectlanguage{}, que nous voulons utiliser ici. Un patch a été proposé aux -développeurs de plasTeX, et accepté. Mais il faut que cette correction arrive -en production. En attendant, nous utilisons cette version modifiée. - -Dés que la correction sera entrée en production, il faudra supprimer ce -fichier, et remplater l'occurence à "patchedbabel" par "babel" dans le fichier -"plastex.py". -La correction à suveiller est la révision -41a48c0c229dd46b69fb0e3720595000a71b17d8 du fichier babel.py : -https://github.com/tiarno/plastex/commit/41a48c0c229dd46b69fb0e3720595000a71b17d8 - -# Comment vérifier si on peut supprimer ce fichier ? - -1) Remplacer l'occurence à patchedbabel par babel dans le fichier plastex.py. - -2) Générer un fichier .tex à partir d'un fichier .sb, ce dernier faisant -intervenir des chansons dans lesquelles \selectlanguage est utilisé (par -exemple, "make -B matteo.tex" ou "make -B naheulbeuk.tex" pour des fichiers pas -trop gros. - -3) Si l'erreur suivante apparaît, c'est qu'il faut encore attendre. - -> Traceback (most recent call last): -> [...] -> File "/usr/lib/pymodules/python2.7/plasTeX/Packages/babel.py", line 18, in -> invoke context.loadLanguage(self.attributes['lang'], self.ownerDocument) -> NameError: global name 'context' is not defined - -3 bis) Si elle n'apparait pas : youpi ! Supprimez ce fichier ! - -# Contact et commentaires - -Mercredi 27 mars 2013 -Louis - -""" - -from plasTeX import Command - -# pylint: disable=invalid-name,too-many-public-methods -class selectlanguage(Command): - """Patch of vanilla selectlanguage class. - - See module docstring for more information.""" - args = 'lang:str' - - def invoke(self, tex): - res = Command.invoke(self, tex) - self.ownerDocument.context.loadLanguage( # pylint: disable=no-member - self.attributes['lang'], - self.ownerDocument - ) - return res diff --git a/patacrep/plastex_songs.py b/patacrep/plastex_songs.py deleted file mode 100644 index 5bf5041a..00000000 --- a/patacrep/plastex_songs.py +++ /dev/null @@ -1,70 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Module to process song LaTeX environment. -""" - -import plasTeX - -from patacrep import encoding -from patacrep.plastex import process_unbr_spaces - - -def split_linebreak(texlist): - """Return a list of alternative title. - - A title can be defined with alternative names : - - A real name\\ - Alternative name\\ - Another alternative name - - This function takes the object representation of a list of titles, and - return a list of titles. - """ - return_list = [] - current = [] - for token in texlist: - if token.nodeName == '\\': - return_list.append(current) - current = [] - else: - current.append(encoding.basestring2unicode( - process_unbr_spaces(token).textContent - )) - if current: - return_list.append(current) - return return_list - - -class beginsong(plasTeX.Command): # pylint: disable=invalid-name,too-many-public-methods - """Class parsing the LaTeX song environment.""" - - args = '{titles}[args:dict]' - - def invoke(self, tex): - """Parse an occurence of song environment.""" - - plasTeX.Command.invoke(self, tex) - - # Parsing title - titles = [] - for tokens in split_linebreak(self.attributes['titles'].allChildNodes): - titles.append("".join(tokens)) - self.attributes['titles'] = encoding.list2unicode(titles) - - # Parsing keyval arguments - args = {} - for (key, val) in self.attributes['args'].iteritems(): - if isinstance(val, plasTeX.DOM.Element): - args[key] = encoding.basestring2unicode( - process_unbr_spaces(val).textContent - ) - elif isinstance(val, basestring): - args[key] = encoding.basestring2unicode(val) - else: - args[key] = unicode(val) - self.attributes['args'] = args - -class sortassong(beginsong): # pylint: disable=invalid-name,too-many-public-methods - r"""Treat '\sortassong' exactly as if it were a '\beginsong'.""" - pass diff --git a/patacrep/songbook.py b/patacrep/songbook.py new file mode 100755 index 00000000..8417be91 --- /dev/null +++ b/patacrep/songbook.py @@ -0,0 +1,157 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- + +"""Command line tool to compile songbooks using the songbook library.""" + +import argparse +import json +import locale +import logging +import os.path +import textwrap +import sys + +from patacrep.build import SongbookBuilder, DEFAULT_STEPS +from patacrep import __version__ +from patacrep import errors +import patacrep.encoding + +# Logging configuration +logging.basicConfig(level=logging.INFO) +LOGGER = logging.getLogger() + +# pylint: disable=too-few-public-methods +class ParseStepsAction(argparse.Action): + """Argparse action to split a string into a list.""" + def __call__(self, __parser, namespace, values, __option_string=None): + if not getattr(namespace, self.dest): + setattr(namespace, self.dest, []) + setattr( + namespace, + self.dest, + ( + getattr(namespace, self.dest) + + [value.strip() for value in values[0].split(',')] + ), + ) + +class VerboseAction(argparse.Action): + """Set verbosity level with option --verbose.""" + def __call__(self, *_args, **_kwargs): + LOGGER.setLevel(logging.DEBUG) + +def argument_parser(args): + """Parse arguments""" + parser = argparse.ArgumentParser(description="A song book compiler") + + parser.add_argument('--version', help='Show version', action='version', + version='%(prog)s ' + __version__) + + parser.add_argument('book', nargs=1, help=textwrap.dedent("""\ + Book to compile. + """)) + + parser.add_argument('--datadir', '-d', nargs='+', type=str, action='append', + help=textwrap.dedent("""\ + Data location. Expected (not necessarily required) + subdirectories are 'songs', 'img', 'latex', 'templates'. + """)) + + parser.add_argument('--verbose', '-v', nargs=0, action=VerboseAction, + help=textwrap.dedent("""\ + Show details about the compilation process. + """)) + + parser.add_argument('--steps', '-s', nargs=1, type=str, + action=ParseStepsAction, + help=textwrap.dedent("""\ + Steps to run. Default is "{steps}". + Available steps are: + "tex" produce .tex file from templates; + "pdf" compile .tex file; + "sbx" compile index files; + "clean" remove temporary files; + any string beginning with '%%' (in this case, it will be run + in a shell). Several steps (excepted the custom shell + command) can be combinend in one --steps argument, as a + comma separated string. + """.format(steps=','.join(DEFAULT_STEPS))), + default=None, + ) + + options = parser.parse_args(args) + + return options + + +def main(): + """Main function:""" + + # set script locale to match user's + try: + locale.setlocale(locale.LC_ALL, '') + except locale.Error as error: + # Locale is not installed on user's system, or wrongly configured. + LOGGER.error("Locale error: {}\n".format(str(error))) + + options = argument_parser(sys.argv[1:]) + + songbook_path = options.book[0] + + basename = os.path.basename(songbook_path)[:-3] + + try: + with patacrep.encoding.open_read(songbook_path) as songbook_file: + songbook = json.load(songbook_file) + if 'encoding' in songbook: + with patacrep.encoding.open_read( + songbook_path, + encoding=songbook['encoding'] + ) as songbook_file: + songbook = json.load(songbook_file) + except Exception as error: # pylint: disable=broad-except + LOGGER.error(error) + LOGGER.error("Error while loading file '{}'.".format(songbook_path)) + sys.exit(1) + + # Gathering datadirs + datadirs = [] + if options.datadir: + # Command line options + datadirs += [item[0] for item in options.datadir] + if 'datadir' in songbook: + # .sg file + if isinstance(songbook['datadir'], str): + songbook['datadir'] = [songbook['datadir']] + datadirs += [ + os.path.join( + os.path.dirname(os.path.abspath(songbook_path)), + path + ) + for path in songbook['datadir'] + ] + # Default value + datadirs.append(os.path.dirname(os.path.abspath(songbook_path))) + + songbook['datadir'] = datadirs + + try: + sb_builder = SongbookBuilder(songbook, basename) + sb_builder.unsafe = True + + sb_builder.build_steps(options.steps) + except errors.SongbookError as error: + LOGGER.error(error) + if LOGGER.level >= logging.INFO: + LOGGER.error( + "Running again with option '-v' may give more information." + ) + sys.exit(1) + except KeyboardInterrupt: + LOGGER.warning("Aborted by user.") + sys.exit(1) + + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/patacrep/songs.py b/patacrep/songs/__init__.py similarity index 57% rename from patacrep/songs.py rename to patacrep/songs/__init__.py index 961d9f37..0a1072f9 100644 --- a/patacrep/songs.py +++ b/patacrep/songs/__init__.py @@ -4,17 +4,14 @@ import errno import hashlib +import jinja2 import logging import os +import pickle import re -try: - import cPickle as pickle -except ImportError: - import pickle - from patacrep.authors import processauthors -from patacrep.plastex import parsetex +from patacrep.content import Content LOGGER = logging.getLogger(__name__) @@ -66,21 +63,33 @@ class DataSubpath(object): self.subpath = os.path.join(self.subpath, path) return self -# pylint: disable=too-few-public-methods, too-many-instance-attributes -class Song(object): - """Song management""" +# pylint: disable=too-many-instance-attributes +class Song(Content): + """Song (or song metadata) + + This class represents a song, bound to a file. + + - It can parse the file given in arguments. + - It can render the song as some LaTeX code. + - Its content is cached, so that if the file has not been changed, the + file is not parsed again. + + This class is inherited by classes implementing song management for + several file formats. Those subclasses must implement: + - `parse()` to parse the file; + - `render()` to render the song as LaTeX code. + """ # Version format of cached song. Increment this number if we update # information stored in cache. - CACHE_VERSION = 0 + CACHE_VERSION = 1 # List of attributes to cache cached_attributes = [ "titles", "unprefixed_titles", - "args", - "datadir", - "fullpath", + "cached", + "data", "subpath", "languages", "authors", @@ -90,6 +99,9 @@ class Song(object): def __init__(self, datadir, subpath, config): self.fullpath = os.path.join(datadir, subpath) + self.datadir = datadir + self.encoding = config["encoding"] + if datadir: # Only songs in datadirs are cached self._filehash = hashlib.md5( @@ -113,9 +125,14 @@ class Song(object): self.fullpath )) - # Data extraction from the song with plastex - data = parsetex(self.fullpath) - self.titles = data['titles'] + # Default values + self.data = {} + self.titles = [] + self.languages = [] + self.authors = [] + + # Parsing and data processing + self.parse() self.datadir = datadir self.unprefixed_titles = [ unprefixed_title( @@ -125,16 +142,16 @@ class Song(object): for title in self.titles ] - self.args = data['args'] self.subpath = subpath - self.languages = data['languages'] - if "by" in self.args.keys(): - self.authors = processauthors( - self.args["by"], - **config["_compiled_authwords"] - ) - else: - self.authors = [] + self.authors = processauthors( + self.authors, + **config["_compiled_authwords"] + ) + + # Cache management + + #: Special attribute to allow plugins to store cached data + self.cached = None self._version = self.CACHE_VERSION self._write_cache() @@ -144,14 +161,7 @@ class Song(object): if self.datadir: cached = {} for attribute in self.cached_attributes: - if attribute == "args": - cached[attribute] = dict([ - (key, u"{}".format(value)) # Force conversion to unicode - for (key, value) - in self.args.iteritems() - ]) - else: - cached[attribute] = getattr(self, attribute) + cached[attribute] = getattr(self, attribute) pickle.dump( cached, open(cached_name(self.datadir, self.subpath), 'wb'), @@ -159,15 +169,58 @@ class Song(object): ) def __repr__(self): - return repr((self.titles, self.args, self.fullpath)) + return repr((self.titles, self.data, self.fullpath)) + + def begin_new_block(self, previous, __context): + """Return a boolean stating if a new block is to be created.""" + return not isinstance(previous, Song) + + def begin_block(self, context): + """Return the string to begin a block.""" + indexes = context.resolve("indexes") + if isinstance(indexes, jinja2.runtime.Undefined): + indexes = "" + return r'\begin{songs}{%s}' % indexes + + def end_block(self, __context): + """Return the string to end a block.""" + return r'\end{songs}' + + def render(self, __context): + """Returns the TeX code rendering the song. + + This function is to be defined by subclasses. + """ + return '' + + def parse(self): + """Parse file `self.fullpath`. + + This function is to be defined by subclasses. + + It set the following attributes: + + - titles: the list of (raw) titles. This list will be processed to + remove prefixes. + - languages: the list of languages used in the song, as languages + recognized by the LaTeX babel package. + - authors: the list of (raw) authors. This list will be processed to + 'clean' it (see function :func:`patacrep.authors.processauthors`). + - data: song metadata. Used (among others) to sort the songs. + - cached: additional data that will be cached. Thus, data stored in + this attribute must be picklable. + """ + self.data = {} + self.titles = [] + self.languages = [] + self.authors = [] def unprefixed_title(title, prefixes): """Remove the first prefix of the list in the beginning of title (if any). """ for prefix in prefixes: - match = re.compile(ur"^(%s)\b\s*(.*)$" % prefix, re.LOCALE).match(title) + match = re.compile(r"^(%s)\b\s*(.*)$" % prefix, re.LOCALE).match(title) if match: return match.group(2) return title - diff --git a/patacrep/songs/tex.py b/patacrep/songs/tex.py new file mode 100644 index 00000000..c2a51d62 --- /dev/null +++ b/patacrep/songs/tex.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +"""Very simple LaTeX parsing.""" + +import os + +from patacrep import files +from patacrep.latex import parsesong +from patacrep.songs import Song + +class TexRenderer(Song): + """Renderer for song and intersong files.""" + + def parse(self): + """Parse song and set metadata.""" + self.data = parsesong(self.fullpath, self.encoding) + self.titles = self.data['@titles'] + self.languages = self.data['@languages'] + self.authors = self.data['by'] + + def render(self, context): + """Return the string that will render the song.""" + return r'\input{{{}}}'.format(files.path2posix( + files.relpath( + self.fullpath, + os.path.dirname(context['filename']) + ))) + +FILE_PLUGINS = { + 'sg': TexRenderer, + 'is': TexRenderer, + } diff --git a/patacrep/templates.py b/patacrep/templates.py index ec8ad99c..be507a27 100644 --- a/patacrep/templates.py +++ b/patacrep/templates.py @@ -9,18 +9,19 @@ import os import re import json -from patacrep import encoding, errors, files +from patacrep import errors, files +import patacrep.encoding _LATEX_SUBS = ( - (re.compile(ur'\\'), ur'\\textbackslash'), - (re.compile(ur'([{}_#%&$])'), ur'\\\1'), - (re.compile(ur'~'), ur'\~{}'), - (re.compile(ur'\^'), ur'\^{}'), - (re.compile(ur'"'), ur"''"), - (re.compile(ur'\.\.\.+'), ur'\\ldots'), + (re.compile(r'\\'), r'\\textbackslash'), + (re.compile(r'([{}_#%&$])'), r'\\\1'), + (re.compile(r'~'), r'\~{}'), + (re.compile(r'\^'), r'\^{}'), + (re.compile(r'"'), r"''"), + (re.compile(r'\.\.\.+'), r'\\ldots'), ) -_VARIABLE_REGEXP = re.compile(ur""" +_VARIABLE_REGEXP = re.compile(r""" \(\*\ *variables\ *\*\) # Match (* variables *) ( # Match and capture the following: (?: # Start of non-capturing group, used to match a single character @@ -48,7 +49,7 @@ class VariablesExtension(Extension): tags = set(['variables']) def parse(self, parser): - parser.stream.next() + next(parser.stream) parser.parse_statements( end_tokens=['name:endvariables'], drop_needle=True, @@ -67,7 +68,7 @@ def _escape_tex(value): class TexRenderer(object): """Render a template to a LaTeX file.""" - def __init__(self, template, datadirs, lang): + def __init__(self, template, datadirs, lang, encoding=None): '''Start a new jinja2 environment for .tex creation. Arguments: @@ -75,8 +76,10 @@ class TexRenderer(object): - datadirs: list of locations of the data directory (which may contain file /templates/