|
@ -5,8 +5,6 @@ import re |
|
|
|
|
|
|
|
|
LOGGER = logging.getLogger(__name__) |
|
|
LOGGER = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
AUTHWORDS_KEYS = ["after", "ignore", "separators"] |
|
|
|
|
|
|
|
|
|
|
|
RE_AFTER = r"^.*\b{}\b(.*)$" |
|
|
RE_AFTER = r"^.*\b{}\b(.*)$" |
|
|
RE_SEPARATOR = r"^(.*)\b *{} *(\b.*)?$" |
|
|
RE_SEPARATOR = r"^(.*)\b *{} *(\b.*)?$" |
|
|
|
|
|
|
|
@ -15,23 +13,17 @@ def compile_authwords(authwords): |
|
|
|
|
|
|
|
|
This regexp will later be used to match these words in authors strings. |
|
|
This regexp will later be used to match these words in authors strings. |
|
|
""" |
|
|
""" |
|
|
# Fill missing values |
|
|
return { |
|
|
for key in AUTHWORDS_KEYS: |
|
|
'ignore': authwords.get('ignore', []), |
|
|
if key not in authwords: |
|
|
'after': [ |
|
|
authwords[key] = [] |
|
|
|
|
|
|
|
|
|
|
|
# Compilation |
|
|
|
|
|
authwords['after'] = [ |
|
|
|
|
|
re.compile(RE_AFTER.format(word), re.LOCALE) |
|
|
re.compile(RE_AFTER.format(word), re.LOCALE) |
|
|
for word in authwords['after'] |
|
|
for word in authwords['after'] |
|
|
] |
|
|
], |
|
|
authwords['separators'] = [ |
|
|
'separators': [ |
|
|
re.compile(RE_SEPARATOR.format(word), re.LOCALE) |
|
|
re.compile(RE_SEPARATOR.format(word), re.LOCALE) |
|
|
for word in ([" %s" % word for word in authwords['separators']] + [',', ';']) |
|
|
for word in ([" %s" % word for word in authwords['separators']] + [',', ';']) |
|
|
] |
|
|
], |
|
|
|
|
|
} |
|
|
return authwords |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def split_author_names(string): |
|
|
def split_author_names(string): |
|
|
r"""Split author between first and last name. |
|
|
r"""Split author between first and last name. |
|
|