From 443783be2c284bed8f2ec686493fe357d3c7b06a Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 6 Jul 2014 12:00:45 +0200 Subject: [PATCH] Solved sorting problem --- patacrep/authors.py | 38 ++++++++---------------- patacrep/content/sorted.py | 4 +-- patacrep/index.py | 61 ++++++++++++++++++++++++-------------- patacrep/plastex.py | 1 + 4 files changed, 54 insertions(+), 50 deletions(-) diff --git a/patacrep/authors.py b/patacrep/authors.py index 73f3fc7a..a1feda2e 100644 --- a/patacrep/authors.py +++ b/patacrep/authors.py @@ -64,7 +64,7 @@ def split_author_names(string): brace_count += 1 if char == "{": brace_count -= 1 - return string[:last_space], string[last_space:] + return string[last_space:], string[:last_space] def split_sep_author(string, sep): @@ -162,23 +162,6 @@ def processauthors_clean_authors(authors_list): if author.lstrip() ] -def processauthors_invert_names(authors_list): - """Move first names after last names - - See docstring of processauthors() for more information. - """ - dest = [] - for author in authors_list: - first, last = split_author_names(author) - if first: - dest.append(ur"\indexauthor{{{first}}}{{{last}}}".format( - first=first.strip(), - last=last.strip(), - )) - else: - dest.append(last.lstrip()) - return dest - def processauthors(authors_string, after=None, ignore=None, sep=None): r"""Return a list of authors @@ -210,10 +193,12 @@ def processauthors(authors_string, after=None, ignore=None, sep=None): 4) Strings containing words of "ignore" are dropped. # ["William Blake", "Hubert Parry", The Royal\ Choir~of~Nowhere"] - 5) First and last names are processed through LaTeX command \indexauthor - (which will, by default, invert first and last names). - # ["\indexauthor{William}{Blake}", "\indexauthor{Hubert}{Parry}", - # \indexthaor{The}{Royal\ Choir~of~Nowhere}"] + 5) First and last names are splitted + # [ + # ("Blake", "William"), + # ("Parry", "Hubert"), + # ("Royal\ Choir~of~Nowhere", "The"), + # ] """ if not sep: @@ -223,8 +208,10 @@ def processauthors(authors_string, after=None, ignore=None, sep=None): if not ignore: ignore = [] - return processauthors_invert_names( - processauthors_clean_authors( + return [ + split_author_names(author) + for author + in processauthors_clean_authors( processauthors_ignore_authors( processauthors_remove_after( processauthors_split_string( @@ -235,5 +222,4 @@ def processauthors(authors_string, after=None, ignore=None, sep=None): after), ignore) ) - ) - + ] diff --git a/patacrep/content/sorted.py b/patacrep/content/sorted.py index 0a72614a..e9a5e677 100755 --- a/patacrep/content/sorted.py +++ b/patacrep/content/sorted.py @@ -33,8 +33,8 @@ def normalize_field(field): """Return a normalized field, it being a string or a list of strings.""" if isinstance(field, basestring): return normalize_string(field) - elif isinstance(field, list): - return [normalize_string(string) for string in field] + elif isinstance(field, list) or isinstance(field, tuple): + return [normalize_field(string) for string in field] def key_generator(sort): """Return a function that returns the list of values used to sort the song. diff --git a/patacrep/index.py b/patacrep/index.py index 27fa1eb1..63f3058d 100755 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -22,18 +22,6 @@ KEYWORD_PATTERN = re.compile(ur"^%(\w+)\s?(.*)$", re.LOCALE) FIRST_LETTER_PATTERN = re.compile(ur"^(?:\{?\\\w+\}?)*[^\w]*(\w)", re.LOCALE) -def sortkey(value): - """From a title, return something usable for sorting. - - It handles locale (but - don't forget to call locale.setlocale(locale.LC_ALL, '')). It also handles - the sort with latex escape sequences. - """ - return locale.strxfrm( - encoding.unidecode(simpleparse(value).replace(' ', 'A')).lower() - ) - - def process_sxd(filename): """Parse sxd file. @@ -115,12 +103,18 @@ class Index(object): No processing is done on data. It is added raw. See add() for a similar method with processing. """ - first = self.get_first_letter(key) + first = self.get_first_letter(key[0]) if not first in self.data.keys(): self.data[first] = dict() if not key in self.data[first].keys(): - self.data[first][key] = [] - self.data[first][key].append({'num': number, 'link': link}) + self.data[first][key] = { + 'sortingkey': [ + encoding.unidecode(simpleparse(item)).lower() + for item in key + ], + 'entries': [], + } + self.data[first][key]['entries'].append({'num': number, 'link': link}) def add(self, key, number, link): """Add a song to the list. @@ -133,15 +127,15 @@ class Index(object): match = pattern.match(key) if match: self._raw_add( - ur"\indextitle{{{}}}{{{}}}".format( + ( match.group(1).strip(), - (match.group(2) + match.group(3)).strip(), - ), + (match.group(2) + match.group(3)).strip() + ), number, link ) return - self._raw_add(key, number, link) + self._raw_add((key, ""), number, link) if self.indextype == "AUTHOR": # Processing authors @@ -155,10 +149,26 @@ class Index(object): """Return the LaTeX code corresponding to the reference.""" return ur'\hyperlink{{{0[link]}}}{{{0[num]}}}'.format(ref) + def key_to_str(self, key): + """Convert the key (title or author) to the LaTeX command rendering it. + + """ + if self.indextype == "AUTHOR": + if key[1]: + return ur"\indexauthor{{{first}}}{{{last}}}".format( + first=key[1], + last=key[0], + ) + else: + return key[0] + + if self.indextype == "TITLE": + return ur"\indextitle{{{0[0]}}}{{{0[1]}}}".format(key) + def entry_to_str(self, key, entry): """Return the LaTeX code corresponding to the entry.""" return unicode(ur'\idxentry{{{0}}}{{{1}}}' + EOL).format( - key, + self.key_to_str(key), ur'\\'.join([self.ref_to_str(ref) for ref in entry]), ) @@ -168,9 +178,16 @@ class Index(object): Here, an index block is a letter, and all data beginning with this letter. """ + def sortkey(key): + """Return something sortable for `entries[key]`.""" + return [ + locale.strxfrm(item) + for item + in entries[key]['sortingkey'] + ] string = ur'\begin{idxblock}{' + letter + '}' + EOL - for key in sorted(entries.keys(), key=sortkey): - string += self.entry_to_str(key, entries[key]) + for key in sorted(entries, key=sortkey): + string += self.entry_to_str(key, entries[key]['entries']) string += ur'\end{idxblock}' + EOL return string diff --git a/patacrep/plastex.py b/patacrep/plastex.py index b1c906b2..ecfa2d00 100644 --- a/patacrep/plastex.py +++ b/patacrep/plastex.py @@ -39,6 +39,7 @@ def simpleparse(text): """Parse a simple LaTeX string. """ tex = TeX() + tex.disableLogging() tex.input(text) doc = tex.parse() return process_unbr_spaces(doc.textContent)