#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Authors string management."""

import re

DEFAULT_AUTHWORDS = {
        "after": ["by"],
        "ignore": ["unknown"],
        "sep": ["and"],
        }

def compile_authwords(authwords):
    """Convert strings of authwords to compiled regular expressions.

    This regexp will later be used to match these words in authors strings.
    """
    # Fill missing values
    for (key, value) in DEFAULT_AUTHWORDS.items():
        if key not in authwords:
            authwords[key] = value

    # Compilation
    authwords['after'] = [
            re.compile(r"^.*%s\b(.*)" % word)
            for word in authwords['after']
            ]
    authwords['sep'] = [
            re.compile(r"^(.*)%s (.*)$" % word)
            for word in ([" %s" % word for word in authwords['sep']] + [','])
            ]

    return authwords


def split_author_names(string):
    r"""Split author between first and last name.

    The last space separates first and last name, but spaces following a
    backslash or a command are not separators.
    Examples:
    - Edgar Allan Poe => Poe, Edgar Allan
    - Edgar Allan \emph {Poe} => \emph {Poe}, Edgar Allan
    - The Rolling\ Stones => Rolling\ Stones, The
    - The {Rolling Stones} => {Rolling Stones}, The
    """
    ignore_space = False
    last_space = index = 0
    brace_count = 0
    for char in string:
        index += 1
        if brace_count == 0:
            if char == "\\":
                ignore_space = True
            elif not char.isalnum() and ignore_space:
                ignore_space = False
            elif char == " ":
                last_space = index
        if char == "}":
            brace_count += 1
        if char == "{":
            brace_count -= 1
    return string[:last_space], string[last_space:]


def split_sep_author(string, sep):
    """Split authors string according to separators.

    Arguments:
    - string: string containing authors names ;
    - sep: regexp matching a separator.

    >>> split_sep_author("Tintin and Milou", '^(.*) and (.*)$')
    ["Tintin", "Milou"]
    """
    authors = []
    match = sep.match(string)
    while match:
        authors.append(match.group(2))
        string = match.group(1)
        match = sep.match(string)
    authors.append(string)
    return authors

################################################################################
### Process authors tools.
################################################################################

def processauthors_removeparen(authors_string):
    """Remove parentheses

    See docstring of processauthors() for more information.
    """
    opening = 0
    dest = ""
    for char in authors_string:
        if char == '(':
            opening += 1
        elif char == ')' and opening > 0:
            opening -= 1
        elif opening == 0:
            dest += char
    return dest

def processauthors_split_string(authors_string, sep):
    """Split strings

    See docstring of processauthors() for more information.
    """
    authors_list = [authors_string]
    for sepword in sep:
        dest = []
        for author in authors_list:
            dest.extend(split_sep_author(author, sepword))
        authors_list = dest
    return authors_list

def processauthors_remove_after(authors_list, after):
    """Remove stuff before "after"

    See docstring of processauthors() for more information.
    """
    dest = []
    for author in authors_list:
        for afterword in after:
            match = afterword.match(author)
            if match:
                author = match.group(1)
                break
        dest.append(author)
    return dest

def processauthors_ignore_authors(authors_list, ignore):
    """Ignore ignored authors

    See docstring of processauthors() for more information.
    """
    dest = []
    for author in authors_list:
        ignored = False
        for ignoreword in ignore:
            if author.find(str(ignoreword)) != -1:
                ignored = True
                break
        if not ignored:
            dest.append(author)
    return dest

def processauthors_clean_authors(authors_list):
    """Clean: remove empty authors and unnecessary spaces

    See docstring of processauthors() for more information.
    """
    return [
            author.lstrip()
            for author
            in authors_list
            if author.lstrip()
            ]

def processauthors_invert_names(authors_list):
    """Move first names after last names

    See docstring of processauthors() for more information.
    """
    dest = []
    for author in authors_list:
        first, last = split_author_names(author)
        if first:
            dest.append("%(last)s, %(first)s" % {
                'first': first.lstrip(),
                'last': last.lstrip(),
                })
        else:
            dest.append(last.lstrip())
    return dest

def processauthors(authors_string, after=None, ignore=None, sep=None):
    r"""Return a list of authors

    For example, we are processing:
    # processauthors(
    #   "Lyrics by William Blake (from Milton, 1808),
                    music by Hubert Parry (1916),
                    and sung by The Royal\ Choir~of~Nowhere
                    (just here to show you how processing is done)",
    #   after = ["by"],
    #   ignore = ["anonymous"],
    #   sep = [re.compile('^(.*) and (.*)$')],
    #   )


    The "authors_string" string is processed as:

    1) First, parenthesis (and its content) are removed.
    # "Lyrics by William Blake, music by Hubert Parry,
                and sung by The Royal\ Choir~of~Nowhere"

    2) String is split, separators being comma and words from "sep".
    # ["Lyrics by William Blake", "music by Hubert Parry",
                "sung by The Royal\ Choir~of~Nowhere"]

    3) Everything before words in "after" is removed.
    # ["William Blake", "Hubert Parry", "The Royal\ Choir~of~Nowhere"]

    4) Strings containing words of "ignore" are dropped.
    # ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]

    5) First names are moved after last names
    # ["Blake, William", "Parry, Hubert", Royal\ Choir~of~Nowhere, The"]
    """

    if not sep:
        sep = []
    if not after:
        after = []
    if not ignore:
        ignore = []

    return processauthors_invert_names(
            processauthors_clean_authors(
                processauthors_ignore_authors(
                    processauthors_remove_after(
                        processauthors_split_string(
                            processauthors_removeparen(
                                authors_string
                                ),
                            sep),
                        after),
                    ignore)
                )
            )