#!/usr/bin/python
# -*- coding: utf-8 -*-
#
import fnmatch
import os
def recursiveFind ( root_directory , pattern ) :
matches = [ ]
for root , dirnames , filenames in os . walk ( root_directory ) :
for filename in fnmatch . filter ( filenames , pattern ) :
matches . append ( os . path . join ( root , filename ) )
return matches
def split_author_names ( string ) :
""" Split author between first and last name.
The last space separates first and last name , but spaces following a
backslash or a command are not separators .
Examples :
- Edgar Allan Poe = > Poe , Edgar Allan
- Edgar Allan \emph { Poe } = > \emph { Poe } , Edgar Allan
- The Rolling \ Stones = > Rolling \ Stones , The
- The { Rolling Stones } = > { Rolling Stones } , The
"""
ignore_space = False
last_space = index = 0
brace_count = 0
for char in string :
index + = 1
if brace_count == 0 :
if char == " \\ " :
ignore_space = True
elif not char . isalnum ( ) and ignore_space :
ignore_space = False
elif char == " " :
last_space = index
if char == " } " :
brace_count + = 1
if char == " { " :
brace_count - = 1
return string [ : last_space ] , string [ last_space : ]
def split_sep_author ( string , sep ) :
authors = [ ]
match = sep . match ( string )
while match :
authors . append ( match . group ( 2 ) )
string = match . group ( 1 )
match = sep . match ( string )
authors . append ( string )
return authors
def processauthors ( authors_string , after = [ ] , ignore = [ ] , sep = [ ] ) :
""" Return a list of authors
For example , we are processing :
# processauthors(
# "Lyrics by William Blake (from Milton, 1808), music by Hubert Parry (1916), and sung by The Royal\ Choir~of~Nowhere (just here to show you how processing is done)",
# after = ["by"],
# ignore = ["anonymous"],
# sep = ["and"]
# )
The " authors_string " string is processed as :
1 ) First , parenthesis ( and its content ) are removed .
# "Lyrics by William Blake, music by Hubert Parry, and sung by The Royal\ Choir~of~Nowhere"
2 ) String is split , separators being comma and words from " sep " .
# ["Lyrics by William Blake", "music by Hubert Parry", "sung by The Royal\ Choir~of~Nowhere"]
3 ) Everything before words in " after " is removed .
# ["William Blake", "Hubert Parry", "The Royal\ Choir~of~Nowhere"]
4 ) Strings containing words of " ignore " are dropped .
# ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"]
5 ) First names are moved after last names
# ["Blake, William", "Parry, Hubert", Royal\ Choir~of~Nowhere, The"]
"""
# Removing parentheses
opening = 0
dest = " "
for char in authors_string :
if char == ' ( ' :
opening + = 1
elif char == ' ) ' and opening > 0 :
opening - = 1
elif opening == 0 :
dest + = char
authors_string = dest
# Splitting strings
authors_list = [ authors_string ]
for sepword in sep :
dest = [ ]
for author in authors_list :
dest . extend ( split_sep_author ( author , sepword ) )
authors_list = dest
# Removing stuff before "after"
dest = [ ]
for author in authors_list :
for afterword in after :
match = afterword . match ( author )
if match :
author = match . group ( 1 )
break
dest . append ( author )
authors_list = dest
# Ignoring ignored authors
dest = [ ]
for author in authors_list :
ignored = False
for ignoreword in ignore :
if author . find ( str ( ignoreword ) ) != - 1 :
ignored = True
break
if not ignored :
dest . append ( author )
authors_list = dest
# Cleaning: removing empty authors and unnecessary spaces
authors_list = [ author . lstrip ( ) for author in authors_list if author . lstrip ( ) ]
# Moving first names after last names
dest = [ ]
for author in authors_list :
first , last = split_author_names ( author )
if first :
dest . append ( " %(last)s , %(first)s " % {
' first ' : first . lstrip ( ) ,
' last ' : last . lstrip ( ) ,
} )
else :
dest . append ( last . lstrip ( ) )
authors_list = dest
return authors_list