Browse Source

Use unidecode to get the first letter for the index

pull/231/head
Oliverpool 9 years ago
parent
commit
788655f709
  1. 12
      patacrep/content/sort.py
  2. 6
      patacrep/index.py
  3. 11
      patacrep/utils.py

12
patacrep/content/sort.py

@ -5,27 +5,17 @@ to a songbook.
""" """
import logging import logging
import unidecode
from patacrep import files from patacrep import files
from patacrep.content import ContentError from patacrep.content import ContentError
from patacrep.content import process_content, validate_parser_argument from patacrep.content import process_content, validate_parser_argument
from patacrep.content.song import OnlySongsError from patacrep.content.song import OnlySongsError
from patacrep.utils import normalize_string
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
DEFAULT_SORT = ['by', 'album', 'title'] DEFAULT_SORT = ['by', 'album', 'title']
def normalize_string(string):
"""Return a normalized string.
Normalized means:
- no surrounding spaces;
- lower case;
- passed through unidecode.unidecode().
"""
return unidecode.unidecode(string.lower().strip())
def normalize_field(field): def normalize_field(field):
"""Return a normalized field, it being a string or a list of strings.""" """Return a normalized field, it being a string or a list of strings."""
if isinstance(field, str): if isinstance(field, str):

6
patacrep/index.py

@ -7,11 +7,11 @@ from a file generated by the latex compilation of the songbook (.sxd).
import locale import locale
import re import re
import unidecode
from patacrep import authors from patacrep import authors
from patacrep import encoding from patacrep import encoding
from patacrep.latex import tex2plain from patacrep.latex import tex2plain
from patacrep.utils import normalize_string
EOL = "\n" EOL = "\n"
@ -67,7 +67,7 @@ class Index:
def get_first_letter(key): def get_first_letter(key):
"""Return the uppercase first letter of key.""" """Return the uppercase first letter of key."""
try: try:
letter = FIRST_LETTER_PATTERN.match(key).group(1) letter = FIRST_LETTER_PATTERN.match(normalize_string(key)).group(1)
except AttributeError: except AttributeError:
# classify as number all the non letter characters # classify as number all the non letter characters
letter = "0" letter = "0"
@ -108,7 +108,7 @@ class Index:
if key not in self.data[first]: if key not in self.data[first]:
self.data[first][key] = { self.data[first][key] = {
'sortingkey': [ 'sortingkey': [
unidecode.unidecode(tex2plain(item)).lower() normalize_string(tex2plain(item))
for item in key for item in key
], ],
'entries': [], 'entries': [],

11
patacrep/utils.py

@ -1,6 +1,7 @@
"""Some utility functions""" """Some utility functions"""
from collections import UserDict from collections import UserDict
import unidecode
from patacrep import errors, Rx from patacrep import errors, Rx
@ -92,3 +93,13 @@ def validate_yaml_schema(data, schema):
schema.validate(data) schema.validate(data)
except Rx.SchemaMismatch as exception: except Rx.SchemaMismatch as exception:
raise errors.SchemaError(rx_exception=exception) raise errors.SchemaError(rx_exception=exception)
def normalize_string(string):
"""Return a normalized string.
Normalized means:
- no surrounding spaces;
- lower case;
- passed through unidecode.unidecode().
"""
return unidecode.unidecode(string.lower().strip())

Loading…
Cancel
Save