Browse Source

Use unidecode to get the first letter for the index

pull/231/head
Oliverpool 8 years ago
parent
commit
788655f709
  1. 12
      patacrep/content/sort.py
  2. 6
      patacrep/index.py
  3. 11
      patacrep/utils.py

12
patacrep/content/sort.py

@ -5,27 +5,17 @@ to a songbook.
"""
import logging
import unidecode
from patacrep import files
from patacrep.content import ContentError
from patacrep.content import process_content, validate_parser_argument
from patacrep.content.song import OnlySongsError
from patacrep.utils import normalize_string
LOGGER = logging.getLogger(__name__)
DEFAULT_SORT = ['by', 'album', 'title']
def normalize_string(string):
"""Return a normalized string.
Normalized means:
- no surrounding spaces;
- lower case;
- passed through unidecode.unidecode().
"""
return unidecode.unidecode(string.lower().strip())
def normalize_field(field):
"""Return a normalized field, it being a string or a list of strings."""
if isinstance(field, str):

6
patacrep/index.py

@ -7,11 +7,11 @@ from a file generated by the latex compilation of the songbook (.sxd).
import locale
import re
import unidecode
from patacrep import authors
from patacrep import encoding
from patacrep.latex import tex2plain
from patacrep.utils import normalize_string
EOL = "\n"
@ -67,7 +67,7 @@ class Index:
def get_first_letter(key):
"""Return the uppercase first letter of key."""
try:
letter = FIRST_LETTER_PATTERN.match(key).group(1)
letter = FIRST_LETTER_PATTERN.match(normalize_string(key)).group(1)
except AttributeError:
# classify as number all the non letter characters
letter = "0"
@ -108,7 +108,7 @@ class Index:
if key not in self.data[first]:
self.data[first][key] = {
'sortingkey': [
unidecode.unidecode(tex2plain(item)).lower()
normalize_string(tex2plain(item))
for item in key
],
'entries': [],

11
patacrep/utils.py

@ -1,6 +1,7 @@
"""Some utility functions"""
from collections import UserDict
import unidecode
from patacrep import errors, Rx
@ -92,3 +93,13 @@ def validate_yaml_schema(data, schema):
schema.validate(data)
except Rx.SchemaMismatch as exception:
raise errors.SchemaError(rx_exception=exception)
def normalize_string(string):
"""Return a normalized string.
Normalized means:
- no surrounding spaces;
- lower case;
- passed through unidecode.unidecode().
"""
return unidecode.unidecode(string.lower().strip())

Loading…
Cancel
Save