Browse Source

Use separators instead of sep for authors

pull/184/head
Oliverpool 9 years ago
parent
commit
c84be0895c
  1. 43
      patacrep/authors.py
  2. 11
      patacrep/data/templates/default_songbook.sb.yml
  3. 2
      patacrep/data/templates/songbook_schema.yml
  4. 2
      test/test_authors.py

43
patacrep/authors.py

@ -5,11 +5,8 @@ import re
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
DEFAULT_AUTHWORDS = { AUTHWORDS_KEYS = ["after", "ignore", "separators"]
"after": ["by"],
"ignore": ["unknown"],
"sep": ["and"],
}
RE_AFTER = r"^.*\b{}\b(.*)$" RE_AFTER = r"^.*\b{}\b(.*)$"
RE_SEPARATOR = r"^(.*)\b *{} *(\b.*)?$" RE_SEPARATOR = r"^(.*)\b *{} *(\b.*)?$"
@ -19,18 +16,18 @@ def compile_authwords(authwords):
This regexp will later be used to match these words in authors strings. This regexp will later be used to match these words in authors strings.
""" """
# Fill missing values # Fill missing values
for (key, value) in DEFAULT_AUTHWORDS.items(): for key in AUTHWORDS_KEYS:
if key not in authwords: if key not in authwords:
authwords[key] = value authwords[key] = []
# Compilation # Compilation
authwords['after'] = [ authwords['after'] = [
re.compile(RE_AFTER.format(word), re.LOCALE) re.compile(RE_AFTER.format(word), re.LOCALE)
for word in authwords['after'] for word in authwords['after']
] ]
authwords['sep'] = [ authwords['separators'] = [
re.compile(RE_SEPARATOR.format(word), re.LOCALE) re.compile(RE_SEPARATOR.format(word), re.LOCALE)
for word in ([" %s" % word for word in authwords['sep']] + [',', ';']) for word in ([" %s" % word for word in authwords['separators']] + [',', ';'])
] ]
return authwords return authwords
@ -60,12 +57,12 @@ def split_author_names(string):
return (chunks[-1].strip(), " ".join(chunks[:-1]).strip()) return (chunks[-1].strip(), " ".join(chunks[:-1]).strip())
def split_sep_author(string, sep): def split_sep_author(string, separators):
"""Split authors string according to separators. """Split authors string according to separators.
Arguments: Arguments:
- string: string containing authors names ; - string: string containing authors names ;
- sep: regexp matching a separator. - separators: regexp matching a separator.
>>> split_sep_author("Tintin and Milou", re.compile(RE_SEPARATOR.format("and"))) >>> split_sep_author("Tintin and Milou", re.compile(RE_SEPARATOR.format("and")))
['Tintin', 'Milou'] ['Tintin', 'Milou']
@ -73,12 +70,12 @@ def split_sep_author(string, sep):
['Tintin'] ['Tintin']
""" """
authors = [] authors = []
match = sep.match(string) match = separators.match(string)
while match: while match:
if match.group(2) is not None: if match.group(2) is not None:
authors.append(match.group(2).strip()) authors.append(match.group(2).strip())
string = match.group(1) string = match.group(1)
match = sep.match(string) match = separators.match(string)
authors.insert(0, string.strip()) authors.insert(0, string.strip())
return authors return authors
@ -105,7 +102,7 @@ def processauthors_removeparen(authors_string):
dest += char dest += char
return dest return dest
def processauthors_split_string(authors_string, sep): def processauthors_split_string(authors_string, separators):
"""Split strings """Split strings
See docstring of processauthors() for more information. See docstring of processauthors() for more information.
@ -121,7 +118,7 @@ def processauthors_split_string(authors_string, sep):
['Tintin', 'Milou'] ['Tintin', 'Milou']
""" """
authors_list = [authors_string] authors_list = [authors_string]
for sepword in sep: for sepword in separators:
dest = [] dest = []
for author in authors_list: for author in authors_list:
dest.extend(split_sep_author(author, sepword)) dest.extend(split_sep_author(author, sepword))
@ -171,7 +168,7 @@ def processauthors_clean_authors(authors_list):
if author.lstrip() if author.lstrip()
] ]
def processauthors(authors_string, after=None, ignore=None, sep=None): def processauthors(authors_string, after=None, ignore=None, separators=None):
r"""Return an iterator of authors r"""Return an iterator of authors
For example, in the following call: For example, in the following call:
@ -186,7 +183,7 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
... **compile_authwords({ ... **compile_authwords({
... 'after': ["by"], ... 'after': ["by"],
... 'ignore': ["anonymous"], ... 'ignore': ["anonymous"],
... 'sep': ["and", ","], ... 'separators': ["and", ","],
... }) ... })
... )) == {("Blake", "William"), ("Parry", "Hubert"), ("Royal~Choir~of~FooBar", "The")} ... )) == {("Blake", "William"), ("Parry", "Hubert"), ("Royal~Choir~of~FooBar", "The")}
True True
@ -198,7 +195,7 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
# "Lyrics by William Blake, music by Hubert Parry, # "Lyrics by William Blake, music by Hubert Parry,
and sung by The Royal~Choir~of~FooBar" and sung by The Royal~Choir~of~FooBar"
2) String is split, separators being comma and words from "sep". 2) String is split, separators being comma and words from "separators".
# ["Lyrics by William Blake", "music by Hubert Parry", # ["Lyrics by William Blake", "music by Hubert Parry",
"sung by The Royal~Choir~of~FooBar"] "sung by The Royal~Choir~of~FooBar"]
@ -216,8 +213,8 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
# ] # ]
""" """
if not sep: if not separators:
sep = [] separators = []
if not after: if not after:
after = [] after = []
if not ignore: if not ignore:
@ -230,17 +227,17 @@ def processauthors(authors_string, after=None, ignore=None, sep=None):
processauthors_removeparen( processauthors_removeparen(
authors_string authors_string
), ),
sep), separators),
after), after),
ignore) ignore)
): ):
yield split_author_names(author) yield split_author_names(author)
def process_listauthors(authors_list, after=None, ignore=None, sep=None): def process_listauthors(authors_list, after=None, ignore=None, separators=None):
"""Process a list of authors, and return the list of resulting authors.""" """Process a list of authors, and return the list of resulting authors."""
authors = [] authors = []
for sublist in [ for sublist in [
processauthors(string, after, ignore, sep) processauthors(string, after, ignore, separators)
for string in authors_list for string in authors_list
]: ]:
authors.extend(sublist) authors.extend(sublist)

11
patacrep/data/templates/default_songbook.sb.yml

@ -15,14 +15,11 @@ chords: # Options relatives aux accords
authors: # Comment sont analysés les auteurs authors: # Comment sont analysés les auteurs
separators: separators:
- To - and
- Do
ignore: ignore:
- To - unknown
- Do after:
by: - by
- To
- Do
titles: # Comment sont analysés les titres titles: # Comment sont analysés les titres
prefix: prefix:

2
patacrep/data/templates/songbook_schema.yml

@ -59,7 +59,7 @@ required:
- type: //arr - type: //arr
contents: //str contents: //str
- type: //nil - type: //nil
by: after:
type: //any type: //any
of: of:
- type: //arr - type: //arr

2
test/test_authors.py

@ -49,7 +49,7 @@ PROCESS_AUTHORS_DATA = [
AUTHWORDS = authors.compile_authwords({ AUTHWORDS = authors.compile_authwords({
"after": ["by"], "after": ["by"],
"ignore": ["anonymous", "Anonyme", "anonyme"], "ignore": ["anonymous", "Anonyme", "anonyme"],
"sep": ['and', 'et'], "separators": ['and', 'et'],
}) })
class TestAutors(unittest.TestCase): class TestAutors(unittest.TestCase):

Loading…
Cancel
Save