From 788655f709436030e7b778d2370cc209f7501274 Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Sun, 26 Jun 2016 17:10:55 +0200 Subject: [PATCH] Use unidecode to get the first letter for the index --- patacrep/content/sort.py | 12 +----------- patacrep/index.py | 6 +++--- patacrep/utils.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/patacrep/content/sort.py b/patacrep/content/sort.py index cb10d157..e4bde299 100755 --- a/patacrep/content/sort.py +++ b/patacrep/content/sort.py @@ -5,27 +5,17 @@ to a songbook. """ import logging -import unidecode from patacrep import files from patacrep.content import ContentError from patacrep.content import process_content, validate_parser_argument from patacrep.content.song import OnlySongsError +from patacrep.utils import normalize_string LOGGER = logging.getLogger(__name__) DEFAULT_SORT = ['by', 'album', 'title'] -def normalize_string(string): - """Return a normalized string. - - Normalized means: - - no surrounding spaces; - - lower case; - - passed through unidecode.unidecode(). - """ - return unidecode.unidecode(string.lower().strip()) - def normalize_field(field): """Return a normalized field, it being a string or a list of strings.""" if isinstance(field, str): diff --git a/patacrep/index.py b/patacrep/index.py index e520e713..75d55107 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -7,11 +7,11 @@ from a file generated by the latex compilation of the songbook (.sxd). import locale import re -import unidecode from patacrep import authors from patacrep import encoding from patacrep.latex import tex2plain +from patacrep.utils import normalize_string EOL = "\n" @@ -67,7 +67,7 @@ class Index: def get_first_letter(key): """Return the uppercase first letter of key.""" try: - letter = FIRST_LETTER_PATTERN.match(key).group(1) + letter = FIRST_LETTER_PATTERN.match(normalize_string(key)).group(1) except AttributeError: # classify as number all the non letter characters letter = "0" @@ -108,7 +108,7 @@ class Index: if key not in self.data[first]: self.data[first][key] = { 'sortingkey': [ - unidecode.unidecode(tex2plain(item)).lower() + normalize_string(tex2plain(item)) for item in key ], 'entries': [], diff --git a/patacrep/utils.py b/patacrep/utils.py index 5051a592..1d139f6a 100644 --- a/patacrep/utils.py +++ b/patacrep/utils.py @@ -1,6 +1,7 @@ """Some utility functions""" from collections import UserDict +import unidecode from patacrep import errors, Rx @@ -92,3 +93,13 @@ def validate_yaml_schema(data, schema): schema.validate(data) except Rx.SchemaMismatch as exception: raise errors.SchemaError(rx_exception=exception) + +def normalize_string(string): + """Return a normalized string. + + Normalized means: + - no surrounding spaces; + - lower case; + - passed through unidecode.unidecode(). + """ + return unidecode.unidecode(string.lower().strip())