mirror of https://github.com/patacrep/patacrep.git
Engine for LaTeX songbooks
http://www.patacrep.com
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
151 lines
4.4 KiB
151 lines
4.4 KiB
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""Authors string management."""
|
|
|
|
def split_author_names(string):
|
|
r"""Split author between first and last name.
|
|
|
|
The last space separates first and last name, but spaces following a
|
|
backslash or a command are not separators.
|
|
Examples:
|
|
- Edgar Allan Poe => Poe, Edgar Allan
|
|
- Edgar Allan \emph {Poe} => \emph {Poe}, Edgar Allan
|
|
- The Rolling\ Stones => Rolling\ Stones, The
|
|
- The {Rolling Stones} => {Rolling Stones}, The
|
|
"""
|
|
ignore_space = False
|
|
last_space = index = 0
|
|
brace_count = 0
|
|
for char in string:
|
|
index += 1
|
|
if brace_count == 0:
|
|
if char == "\\":
|
|
ignore_space = True
|
|
elif not char.isalnum() and ignore_space:
|
|
ignore_space = False
|
|
elif char == " ":
|
|
last_space = index
|
|
if char == "}":
|
|
brace_count += 1
|
|
if char == "{":
|
|
brace_count -= 1
|
|
return string[:last_space], string[last_space:]
|
|
|
|
|
|
def split_sep_author(string, sep):
|
|
"""Split authors string according to separators.
|
|
|
|
Arguments:
|
|
- string: string containing authors names ;
|
|
- sep: regexp matching a separator.
|
|
|
|
>>> split_sep_author("Tintin and Milou", '^(.*) and (.*)$')
|
|
["Tintin", "Milou"]
|
|
"""
|
|
authors = []
|
|
match = sep.match(string)
|
|
while match:
|
|
authors.append(match.group(2))
|
|
string = match.group(1)
|
|
match = sep.match(string)
|
|
authors.append(string)
|
|
return authors
|
|
|
|
|
|
def processauthors(authors_string, after=[], ignore=[], sep=[]):
|
|
r"""Return a list of authors
|
|
|
|
For example, we are processing:
|
|
# processauthors(
|
|
# "Lyrics by William Blake (from Milton, 1808),
|
|
music by Hubert Parry (1916),
|
|
and sung by The Royal\ Choir~of~Nowhere
|
|
(just here to show you how processing is done)",
|
|
# after = ["by"],
|
|
# ignore = ["anonymous"],
|
|
# sep = [re.compile('^(.*) and (.*)$')],
|
|
# )
|
|
|
|
|
|
The "authors_string" string is processed as:
|
|
|
|
1) First, parenthesis (and its content) are removed.
|
|
# "Lyrics by William Blake, music by Hubert Parry,
|
|
and sung by The Royal\ Choir~of~Nowhere"
|
|
|
|
2) String is split, separators being comma and words from "sep".
|
|
# ["Lyrics by William Blake", "music by Hubert Parry",
|
|
"sung by The Royal\ Choir~of~Nowhere"]
|
|
|
|
3) Everything before words in "after" is removed.
|
|
# ["William Blake", "Hubert Parry", "The Royal\ Choir~of~Nowhere"]
|
|
|
|
4) Strings containing words of "ignore" are dropped.
|
|
# ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
|
|
|
|
5) First names are moved after last names
|
|
# ["Blake, William", "Parry, Hubert", Royal\ Choir~of~Nowhere, The"]
|
|
"""
|
|
|
|
# Removing parentheses
|
|
opening = 0
|
|
dest = ""
|
|
for char in authors_string:
|
|
if char == '(':
|
|
opening += 1
|
|
elif char == ')' and opening > 0:
|
|
opening -= 1
|
|
elif opening == 0:
|
|
dest += char
|
|
authors_string = dest
|
|
|
|
# Splitting strings
|
|
authors_list = [authors_string]
|
|
for sepword in sep:
|
|
dest = []
|
|
for author in authors_list:
|
|
dest.extend(split_sep_author(author, sepword))
|
|
authors_list = dest
|
|
|
|
# Removing stuff before "after"
|
|
dest = []
|
|
for author in authors_list:
|
|
for afterword in after:
|
|
match = afterword.match(author)
|
|
if match:
|
|
author = match.group(1)
|
|
break
|
|
dest.append(author)
|
|
authors_list = dest
|
|
|
|
# Ignoring ignored authors
|
|
dest = []
|
|
for author in authors_list:
|
|
ignored = False
|
|
for ignoreword in ignore:
|
|
if author.find(str(ignoreword)) != -1:
|
|
ignored = True
|
|
break
|
|
if not ignored:
|
|
dest.append(author)
|
|
authors_list = dest
|
|
|
|
# Cleaning: removing empty authors and unnecessary spaces
|
|
authors_list = [author.lstrip()
|
|
for author in authors_list if author.lstrip()]
|
|
|
|
# Moving first names after last names
|
|
dest = []
|
|
for author in authors_list:
|
|
first, last = split_author_names(author)
|
|
if first:
|
|
dest.append("%(last)s, %(first)s" % {
|
|
'first': first.lstrip(),
|
|
'last': last.lstrip(),
|
|
})
|
|
else:
|
|
dest.append(last.lstrip())
|
|
authors_list = dest
|
|
|
|
return authors_list
|
|
|