Browse Source

Merge pull request #231 from patacrep/index_unicode_compliant

Use unidecode to get the first letter for the index
pull/239/head
oliverpool 9 years ago
committed by GitHub
parent
commit
9f3c323b82
  1. 2
      .gitignore
  2. 1
      NEWS.md
  3. 4
      patacrep/build.py
  4. 12
      patacrep/content/sort.py
  5. 6
      patacrep/index.py
  6. 11
      patacrep/utils.py
  7. 23
      setup.py
  8. 2
      test/test_content/test_content.py
  9. 2
      test/test_index/.gitignore
  10. 0
      test/test_index/__init__.py
  11. 26
      test/test_index/idx_auth.sbx
  12. 19
      test/test_index/idx_auth.sxd
  13. 22
      test/test_index/idx_title.sbx
  14. 13
      test/test_index/idx_title.sxd
  15. 46
      test/test_index/test_index.py

2
.gitignore

@ -1,7 +1,7 @@
deb_dist deb_dist
build build
dist dist
.gitignore #.gitignore
*~ *~
*.aux *.aux
*.sbd *.sbd

1
NEWS.md

@ -9,6 +9,7 @@
* Fix the import when the content folder is zipped [#235](https://github.com/patacrep/patacrep/pull/235) * Fix the import when the content folder is zipped [#235](https://github.com/patacrep/patacrep/pull/235)
* PDF generation * PDF generation
* The bookmarks correctly use the unicode encoding [#225](https://github.com/patacrep/patacrep/pull/225) * The bookmarks correctly use the unicode encoding [#225](https://github.com/patacrep/patacrep/pull/225)
* The first letters of the index are "unidecoded" [#231](https://github.com/patacrep/patacrep/pull/231)
* Enhancements * Enhancements
* Error management * Error management
* The 'error' option is considered for contentlist errors (incorrect syntax for instance) [#226](https://github.com/patacrep/patacrep/pull/226) * The 'error' option is considered for contentlist errors (incorrect syntax for instance) [#226](https://github.com/patacrep/patacrep/pull/226)

4
patacrep/build.py

@ -307,8 +307,8 @@ class SongbookBuilder:
raise errors.LatexCompilationError(self.basename) raise errors.LatexCompilationError(self.basename)
def build_sbx(self): def build_sbx(self):
"""Make index""" """Make .sbx indexes from .sxd files"""
LOGGER.info("Building indexes…") LOGGER.info("Building .sbx indexes…")
sxd_files = glob.glob("%s_*.sxd" % self.basename) sxd_files = glob.glob("%s_*.sxd" % self.basename)
for sxd_file in sxd_files: for sxd_file in sxd_files:
LOGGER.debug("Processing " + sxd_file) LOGGER.debug("Processing " + sxd_file)

12
patacrep/content/sort.py

@ -5,27 +5,17 @@ to a songbook.
""" """
import logging import logging
import unidecode
from patacrep import files from patacrep import files
from patacrep.content import ContentError from patacrep.content import ContentError
from patacrep.content import process_content, validate_parser_argument from patacrep.content import process_content, validate_parser_argument
from patacrep.content.song import OnlySongsError from patacrep.content.song import OnlySongsError
from patacrep.utils import normalize_string
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
DEFAULT_SORT = ['by', 'album', 'title'] DEFAULT_SORT = ['by', 'album', 'title']
def normalize_string(string):
"""Return a normalized string.
Normalized means:
- no surrounding spaces;
- lower case;
- passed through unidecode.unidecode().
"""
return unidecode.unidecode(string.lower().strip())
def normalize_field(field): def normalize_field(field):
"""Return a normalized field, it being a string or a list of strings.""" """Return a normalized field, it being a string or a list of strings."""
if isinstance(field, str): if isinstance(field, str):

6
patacrep/index.py

@ -7,11 +7,11 @@ from a file generated by the latex compilation of the songbook (.sxd).
import locale import locale
import re import re
import unidecode
from patacrep import authors from patacrep import authors
from patacrep import encoding from patacrep import encoding
from patacrep.latex import tex2plain from patacrep.latex import tex2plain
from patacrep.utils import normalize_string
EOL = "\n" EOL = "\n"
@ -67,7 +67,7 @@ class Index:
def get_first_letter(key): def get_first_letter(key):
"""Return the uppercase first letter of key.""" """Return the uppercase first letter of key."""
try: try:
letter = FIRST_LETTER_PATTERN.match(key).group(1) letter = FIRST_LETTER_PATTERN.match(normalize_string(key)).group(1)
except AttributeError: except AttributeError:
# classify as number all the non letter characters # classify as number all the non letter characters
letter = "0" letter = "0"
@ -108,7 +108,7 @@ class Index:
if key not in self.data[first]: if key not in self.data[first]:
self.data[first][key] = { self.data[first][key] = {
'sortingkey': [ 'sortingkey': [
unidecode.unidecode(tex2plain(item)).lower() normalize_string(tex2plain(item))
for item in key for item in key
], ],
'entries': [], 'entries': [],

11
patacrep/utils.py

@ -1,6 +1,7 @@
"""Some utility functions""" """Some utility functions"""
from collections import UserDict from collections import UserDict
import unidecode
from patacrep import errors, Rx from patacrep import errors, Rx
@ -92,3 +93,13 @@ def validate_yaml_schema(data, schema):
schema.validate(data) schema.validate(data)
except Rx.SchemaMismatch as exception: except Rx.SchemaMismatch as exception:
raise errors.SchemaError(rx_exception=exception) raise errors.SchemaError(rx_exception=exception)
def normalize_string(string):
"""Return a normalized string.
Normalized means:
- no surrounding spaces;
- lower case;
- passed through unidecode.unidecode().
"""
return unidecode.unidecode(string.lower().strip())

23
setup.py

@ -7,6 +7,7 @@ $ python setup.py install
from patacrep import __version__ from patacrep import __version__
from setuptools import setup, find_packages from setuptools import setup, find_packages
import os
import sys import sys
setup_kwargs = { setup_kwargs = {
@ -14,10 +15,28 @@ setup_kwargs = {
} }
if sys.platform[0:3] == 'win': if sys.platform[0:3] == 'win':
from patacrep import __DATADIR__, files from patacrep import __DATADIR__
def recursive_find(root_directory):
"""Recursively find files from a root_directory.
Return a list of files matching those conditions.
Arguments:
- `root_directory`: root directory of the search.
"""
if not os.path.isdir(root_directory):
return
olddir = os.getcwd()
os.chdir(root_directory)
for root, __ignored, filenames in os.walk(os.curdir):
for filename in filenames:
yield os.path.join(root, filename)
os.chdir(olddir)
# List the data files # List the data files
data_files = files.recursive_find(__DATADIR__) data_files = recursive_find(__DATADIR__)
data_files = ["data/" + d for d in data_files] data_files = ["data/" + d for d in data_files]
setup_kwargs['package_data'] = {'patacrep': data_files} setup_kwargs['package_data'] = {'patacrep': data_files}
else: else:

2
test/test_content/test_content.py

@ -42,7 +42,7 @@ class FileTest(unittest.TestCase, metaclass=dynamic.DynamicTest):
@classmethod @classmethod
def _create_content_test(cls, base): def _create_content_test(cls, base):
"""Return a function that `base.source` produces the correct file list""" """Return a function that tests that `base.source` produces the correct file list"""
def test_content(self): def test_content(self):
"""Test that `base.source` produces the correct file list""" """Test that `base.source` produces the correct file list"""

2
test/test_index/.gitignore

@ -0,0 +1,2 @@
!*.sxd
!*.sbx

0
test/test_index/__init__.py

26
test/test_index/idx_auth.sbx

@ -0,0 +1,26 @@
\begin{idxblock}{A}
\idxentry{
\indexauthor{}{Aba}
}{
\hyperlink{song1-4.2}{4}
} \idxentry{
\indexauthor{}{Äbc}
}{
\hyperlink{song1-2.2}{2}
} \idxentry{
\indexauthor{}{Abd}
}{
\hyperlink{song1-3.2}{3}
}
\end{idxblock}
\begin{idxblock}{J}
\idxentry{
\indexauthor{}{Jack}
}{
\hyperlink{song1-1.2}{1}
} \idxentry{
\indexauthor{}{Johns}
}{
\hyperlink{song1-1.2}{1}
}
\end{idxblock}

19
test/test_index/idx_auth.sxd

@ -0,0 +1,19 @@
AUTHOR INDEX DATA FILE
%ignore unknown
%after by
%sep and
Jack and Johns
1
song1-1.2
Äbc
2
song1-2.2
Abd
3
song1-3.2
Aba
4
song1-4.2
unknown
5
song1-5.2

22
test/test_index/idx_title.sbx

@ -0,0 +1,22 @@
\begin{idxblock}{C}
\idxentry{
\indextitle{}{Caa}
}{
\hyperlink{song1-3.2}{3}
} \idxentry{
\indextitle{}{Çab}
}{
\hyperlink{song1-2.2}{2}
} \idxentry{
\indextitle{}{Cac}
}{
\hyperlink{song1-4.2}{4}
}
\end{idxblock}
\begin{idxblock}{M}
\idxentry{
\indextitle{}{My song}
}{
\hyperlink{song1-1.2}{1}
}
\end{idxblock}

13
test/test_index/idx_title.sxd

@ -0,0 +1,13 @@
TITLE INDEX DATA FILE
My song
1
song1-1.2
Çab
2
song1-2.2
Caa
3
song1-3.2
Cac
4
song1-4.2

46
test/test_index/test_index.py

@ -0,0 +1,46 @@
"""Tests for the index generation."""
import codecs
import glob
import os
import unittest
from patacrep.index import process_sxd
from .. import dynamic # pylint: disable=unused-import
class FileTest(unittest.TestCase, metaclass=dynamic.DynamicTest):
"""Test of the index generation.
For any given `foo.sxd`, it generates the index.
It controls that the generated file is equal to the one in `foo.sbx`.
"""
@classmethod
def _iter_testmethods(cls):
"""Iterate over dynamically generated test methods"""
for source in sorted(glob.glob(os.path.join(
os.path.dirname(__file__),
'*.sxd',
))):
base = source[:-len(".sxd")]
yield (
"test_index_{}".format(os.path.basename(base)),
cls._create_index_test(base),
)
@classmethod
def _create_index_test(cls, base):
"""Return a function that tests that `foo.sxd` produces the sbx file"""
def test_index(self):
"""Test that `foo.sxd` produces the correct sbx file"""
generated_index = process_sxd(base + ".sxd").entries_to_str()
with codecs.open(base + ".sbx", "r", "utf-8") as control_index:
self.assertEqual(control_index.read(), generated_index, )
test_index.__doc__ = (
"Test that '{base}.sxd' produces the correct sbx file"""
).format(base=os.path.basename(base))
return test_index
Loading…
Cancel
Save