Engine for LaTeX songbooks http://www.patacrep.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

222 lines
6.3 KiB

"""Authors string management."""
import logging
import re
LOGGER = logging.getLogger(__name__)
DEFAULT_AUTHWORDS = {
"after": ["by"],
"ignore": ["unknown"],
"sep": ["and"],
}
def compile_authwords(authwords):
"""Convert strings of authwords to compiled regular expressions.
This regexp will later be used to match these words in authors strings.
"""
# Fill missing values
for (key, value) in DEFAULT_AUTHWORDS.items():
if key not in authwords:
authwords[key] = value
# Compilation
authwords['after'] = [
re.compile(r"^.*\b%s\b(.*)$" % word, re.LOCALE)
for word in authwords['after']
]
authwords['sep'] = [
re.compile(r"^(.*)%s +(.*)$" % word, re.LOCALE)
for word in ([" %s" % word for word in authwords['sep']] + [','])
]
return authwords
def split_author_names(string):
r"""Split author between first and last name.
The last space separates first and last name, but spaces following a
backslash or a command are not separators.
Examples:
- Edgar Allan Poe => Poe, Edgar Allan
- Edgar Allan \emph {Poe} => \emph {Poe}, Edgar Allan
- The Rolling\ Stones => Rolling\ Stones, The
- The {Rolling Stones} => {Rolling Stones}, The
"""
ignore_space = False
last_space = index = 0
brace_count = 0
for char in string.strip():
index += 1
if brace_count == 0:
if char == "\\":
ignore_space = True
elif not char.isalnum() and ignore_space:
ignore_space = False
elif char == " ":
last_space = index
if char == "}":
brace_count += 1
if char == "{":
brace_count -= 1
return string[last_space:], string[:last_space]
def split_sep_author(string, sep):
"""Split authors string according to separators.
Arguments:
- string: string containing authors names ;
- sep: regexp matching a separator.
>>> split_sep_author("Tintin and Milou", re.compile('^(.*) and (.*)$'))
["Tintin", "Milou"]
"""
authors = []
match = sep.match(string)
while match:
authors.append(match.group(2))
string = match.group(1)
match = sep.match(string)
authors.append(string)
return authors
################################################################################
### Process authors tools.
################################################################################
def processauthors_removeparen(authors_string):
"""Remove parentheses
See docstring of processauthors() for more information.
"""
opening = 0
dest = ""
for char in authors_string:
if char == '(':
opening += 1
elif char == ')' and opening > 0:
opening -= 1
elif opening == 0:
dest += char
return dest
def processauthors_split_string(authors_string, sep):
"""Split strings
See docstring of processauthors() for more information.
"""
authors_list = [authors_string]
for sepword in sep:
dest = []
for author in authors_list:
dest.extend(split_sep_author(author, sepword))
authors_list = dest
return authors_list
def processauthors_remove_after(authors_list, after):
"""Remove stuff before "after"
See docstring of processauthors() for more information.
"""
dest = []
for author in authors_list:
for afterword in after:
match = afterword.match(author)
if match:
author = match.group(1)
break
dest.append(author)
return dest
def processauthors_ignore_authors(authors_list, ignore):
"""Ignore ignored authors
See docstring of processauthors() for more information.
"""
dest = []
for author in authors_list:
ignored = False
for ignoreword in ignore:
if author.find(ignoreword) != -1:
ignored = True
break
if not ignored:
dest.append(author)
return dest
def processauthors_clean_authors(authors_list):
"""Clean: remove empty authors and unnecessary spaces
See docstring of processauthors() for more information.
"""
return [
author.lstrip()
for author
in authors_list
if author.lstrip()
]
def processauthors(authors_string, after=None, ignore=None, sep=None):
r"""Return a list of authors
For example, we are processing:
# processauthors(
# "Lyrics by William Blake (from Milton, 1808),
music by Hubert Parry (1916),
and sung by The Royal\ Choir~of~Nowhere
(just here to show you how processing is done)",
# after = ["by"],
# ignore = ["anonymous"],
# sep = [re.compile('^(.*) and (.*)$')],
# )
The "authors_string" string is processed as:
1) First, parenthesis (and its content) are removed.
# "Lyrics by William Blake, music by Hubert Parry,
and sung by The Royal\ Choir~of~Nowhere"
2) String is split, separators being comma and words from "sep".
# ["Lyrics by William Blake", "music by Hubert Parry",
"sung by The Royal\ Choir~of~Nowhere"]
3) Everything before words in "after" is removed.
# ["William Blake", "Hubert Parry", "The Royal\ Choir~of~Nowhere"]
4) Strings containing words of "ignore" are dropped.
# ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
5) First and last names are splitted
# [
# ("Blake", "William"),
# ("Parry", "Hubert"),
# ("Royal\ Choir~of~Nowhere", "The"),
# ]
"""
if not sep:
sep = []
if not after:
after = []
if not ignore:
ignore = []
return [
split_author_names(author)
for author
in processauthors_clean_authors(
processauthors_ignore_authors(
processauthors_remove_after(
processauthors_split_string(
processauthors_removeparen(
authors_string
),
sep),
after),
ignore)
)
]