From 788655f709436030e7b778d2370cc209f7501274 Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Sun, 26 Jun 2016 17:10:55 +0200 Subject: [PATCH 1/9] Use unidecode to get the first letter for the index --- patacrep/content/sort.py | 12 +----------- patacrep/index.py | 6 +++--- patacrep/utils.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/patacrep/content/sort.py b/patacrep/content/sort.py index cb10d157..e4bde299 100755 --- a/patacrep/content/sort.py +++ b/patacrep/content/sort.py @@ -5,27 +5,17 @@ to a songbook. """ import logging -import unidecode from patacrep import files from patacrep.content import ContentError from patacrep.content import process_content, validate_parser_argument from patacrep.content.song import OnlySongsError +from patacrep.utils import normalize_string LOGGER = logging.getLogger(__name__) DEFAULT_SORT = ['by', 'album', 'title'] -def normalize_string(string): - """Return a normalized string. - - Normalized means: - - no surrounding spaces; - - lower case; - - passed through unidecode.unidecode(). - """ - return unidecode.unidecode(string.lower().strip()) - def normalize_field(field): """Return a normalized field, it being a string or a list of strings.""" if isinstance(field, str): diff --git a/patacrep/index.py b/patacrep/index.py index e520e713..75d55107 100644 --- a/patacrep/index.py +++ b/patacrep/index.py @@ -7,11 +7,11 @@ from a file generated by the latex compilation of the songbook (.sxd). import locale import re -import unidecode from patacrep import authors from patacrep import encoding from patacrep.latex import tex2plain +from patacrep.utils import normalize_string EOL = "\n" @@ -67,7 +67,7 @@ class Index: def get_first_letter(key): """Return the uppercase first letter of key.""" try: - letter = FIRST_LETTER_PATTERN.match(key).group(1) + letter = FIRST_LETTER_PATTERN.match(normalize_string(key)).group(1) except AttributeError: # classify as number all the non letter characters letter = "0" @@ -108,7 +108,7 @@ class Index: if key not in self.data[first]: self.data[first][key] = { 'sortingkey': [ - unidecode.unidecode(tex2plain(item)).lower() + normalize_string(tex2plain(item)) for item in key ], 'entries': [], diff --git a/patacrep/utils.py b/patacrep/utils.py index 5051a592..1d139f6a 100644 --- a/patacrep/utils.py +++ b/patacrep/utils.py @@ -1,6 +1,7 @@ """Some utility functions""" from collections import UserDict +import unidecode from patacrep import errors, Rx @@ -92,3 +93,13 @@ def validate_yaml_schema(data, schema): schema.validate(data) except Rx.SchemaMismatch as exception: raise errors.SchemaError(rx_exception=exception) + +def normalize_string(string): + """Return a normalized string. + + Normalized means: + - no surrounding spaces; + - lower case; + - passed through unidecode.unidecode(). + """ + return unidecode.unidecode(string.lower().strip()) From 608626654dc01134f0ce9b4fc9acd4fdbc0e8708 Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Sun, 26 Jun 2016 17:12:30 +0200 Subject: [PATCH 2/9] update news --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index f959491d..b73d1cbf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,7 @@ * The `tex` keyword correctly includes all files of the list [#228](https://github.com/patacrep/patacrep/pull/228) * PDF generation * The bookmarks correctly use the unicode encoding [#225](https://github.com/patacrep/patacrep/pull/225) + * The first letters of the index are "unidecoded" [#231](https://github.com/patacrep/patacrep/pull/231) * Enhancements * Error management * The 'error' option is considered for contentlist errors (incorrect syntax for instance) [#226](https://github.com/patacrep/patacrep/pull/226) From 5a9c48e5515315243892b8a167e10dc59864990b Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Sun, 26 Jun 2016 19:02:30 +0200 Subject: [PATCH 3/9] Prevent 'files' import (fails windows build) --- setup.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index aacfe910..8449c37a 100755 --- a/setup.py +++ b/setup.py @@ -14,10 +14,26 @@ setup_kwargs = { } if sys.platform[0:3] == 'win': - from patacrep import __DATADIR__, files + from patacrep import __DATADIR__ + + def recursive_find(root_directory): + """Recursively find files from a root_directory. + + Return a list of files matching those conditions. + + Arguments: + - `root_directory`: root directory of the search. + """ + if not os.path.isdir(root_directory): + return + + with chdir(root_directory): + for root, __ignored, filenames in os.walk(os.curdir): + for filename in filenames: + yield os.path.join(root, filename) # List the data files - data_files = files.recursive_find(__DATADIR__) + data_files = recursive_find(__DATADIR__) data_files = ["data/" + d for d in data_files] setup_kwargs['package_data'] = {'patacrep': data_files} else: From 10ad14079ba76079ed9d2f57ebc113f197825c78 Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Sun, 26 Jun 2016 19:14:51 +0200 Subject: [PATCH 4/9] missing import --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 8449c37a..6cbb5c2d 100755 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ $ python setup.py install from patacrep import __version__ from setuptools import setup, find_packages +import os import sys setup_kwargs = { From 43178d5bbf9bceaea04f151bbe94185f62c64086 Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Sun, 26 Jun 2016 20:13:29 +0200 Subject: [PATCH 5/9] prevent the use of chdir --- setup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 6cbb5c2d..6ffe091f 100755 --- a/setup.py +++ b/setup.py @@ -28,10 +28,9 @@ if sys.platform[0:3] == 'win': if not os.path.isdir(root_directory): return - with chdir(root_directory): - for root, __ignored, filenames in os.walk(os.curdir): - for filename in filenames: - yield os.path.join(root, filename) + for root, __ignored, filenames in os.walk(root_directory): + for filename in filenames: + yield os.path.join(root, filename) # List the data files data_files = recursive_find(__DATADIR__) From 8fd4fbabfcfc014d05f3eb086b665dcd75a7393d Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Mon, 27 Jun 2016 07:20:37 +0200 Subject: [PATCH 6/9] data must be relative to patacrep install --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6ffe091f..aedd2009 100755 --- a/setup.py +++ b/setup.py @@ -28,9 +28,12 @@ if sys.platform[0:3] == 'win': if not os.path.isdir(root_directory): return - for root, __ignored, filenames in os.walk(root_directory): + olddir = os.getcwd() + os.chdir(root_directory) + for root, __ignored, filenames in os.walk(os.curdir): for filename in filenames: yield os.path.join(root, filename) + os.chdir(olddir) # List the data files data_files = recursive_find(__DATADIR__) From d49c4c0f7693fb1b96c28704e024f6889da7b881 Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Mon, 27 Jun 2016 08:00:50 +0200 Subject: [PATCH 7/9] improve index building comments --- patacrep/build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/patacrep/build.py b/patacrep/build.py index a66619d7..15a4ca78 100644 --- a/patacrep/build.py +++ b/patacrep/build.py @@ -307,8 +307,8 @@ class SongbookBuilder: raise errors.LatexCompilationError(self.basename) def build_sbx(self): - """Make index""" - LOGGER.info("Building indexes…") + """Make .sbx indexes from .sxd files""" + LOGGER.info("Building .sbx indexes…") sxd_files = glob.glob("%s_*.sxd" % self.basename) for sxd_file in sxd_files: LOGGER.debug("Processing " + sxd_file) From 7e5f81e2e5c9c1c9716cb1c495f0085fab374190 Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Mon, 4 Jul 2016 10:45:19 +0200 Subject: [PATCH 8/9] Test index generation --- test/test_content/test_content.py | 2 +- test/test_index/__init__.py | 0 test/test_index/test_index.py | 46 +++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 test/test_index/__init__.py create mode 100644 test/test_index/test_index.py diff --git a/test/test_content/test_content.py b/test/test_content/test_content.py index 770d4e1a..e3bac8d6 100644 --- a/test/test_content/test_content.py +++ b/test/test_content/test_content.py @@ -42,7 +42,7 @@ class FileTest(unittest.TestCase, metaclass=dynamic.DynamicTest): @classmethod def _create_content_test(cls, base): - """Return a function that `base.source` produces the correct file list""" + """Return a function that tests that `base.source` produces the correct file list""" def test_content(self): """Test that `base.source` produces the correct file list""" diff --git a/test/test_index/__init__.py b/test/test_index/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/test_index/test_index.py b/test/test_index/test_index.py new file mode 100644 index 00000000..94748fc4 --- /dev/null +++ b/test/test_index/test_index.py @@ -0,0 +1,46 @@ +"""Tests for the index generation.""" + +import codecs +import glob +import os +import unittest + +from patacrep.index import process_sxd + +from .. import dynamic # pylint: disable=unused-import + + +class FileTest(unittest.TestCase, metaclass=dynamic.DynamicTest): + """Test of the index generation. + + For any given `foo.sxd`, it generates the index. + It controls that the generated file is equal to the one in `foo.sbx`. + """ + + @classmethod + def _iter_testmethods(cls): + """Iterate over dynamically generated test methods""" + for source in sorted(glob.glob(os.path.join( + os.path.dirname(__file__), + '*.sxd', + ))): + base = source[:-len(".sxd")] + yield ( + "test_index_{}".format(os.path.basename(base)), + cls._create_index_test(base), + ) + + @classmethod + def _create_index_test(cls, base): + """Return a function that tests that `foo.sxd` produces the sbx file""" + + def test_index(self): + """Test that `foo.sxd` produces the correct sbx file""" + generated_index = process_sxd(base + ".sxd").entries_to_str() + with codecs.open(base + ".sbx", "r", "utf-8") as control_index: + self.assertEqual(control_index.read(), generated_index, ) + + test_index.__doc__ = ( + "Test that '{base}.sxd' produces the correct sbx file""" + ).format(base=os.path.basename(base)) + return test_index From aaa2a4c2c0fa4023015dc075ec2ba9f3e18daa56 Mon Sep 17 00:00:00 2001 From: Oliverpool Date: Mon, 4 Jul 2016 11:07:09 +0200 Subject: [PATCH 9/9] Improve gitignore and add missing test index files --- .gitignore | 2 +- test/test_index/.gitignore | 2 ++ test/test_index/idx_auth.sbx | 26 ++++++++++++++++++++++++++ test/test_index/idx_auth.sxd | 19 +++++++++++++++++++ test/test_index/idx_title.sbx | 22 ++++++++++++++++++++++ test/test_index/idx_title.sxd | 13 +++++++++++++ 6 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 test/test_index/.gitignore create mode 100644 test/test_index/idx_auth.sbx create mode 100644 test/test_index/idx_auth.sxd create mode 100644 test/test_index/idx_title.sbx create mode 100644 test/test_index/idx_title.sxd diff --git a/.gitignore b/.gitignore index 69298645..a8e9e8cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ deb_dist build dist -.gitignore +#.gitignore *~ *.aux *.sbd diff --git a/test/test_index/.gitignore b/test/test_index/.gitignore new file mode 100644 index 00000000..371a68d1 --- /dev/null +++ b/test/test_index/.gitignore @@ -0,0 +1,2 @@ +!*.sxd +!*.sbx diff --git a/test/test_index/idx_auth.sbx b/test/test_index/idx_auth.sbx new file mode 100644 index 00000000..1063af69 --- /dev/null +++ b/test/test_index/idx_auth.sbx @@ -0,0 +1,26 @@ +\begin{idxblock}{A} + \idxentry{ + \indexauthor{}{Aba} + }{ + \hyperlink{song1-4.2}{4} + } \idxentry{ + \indexauthor{}{Äbc} + }{ + \hyperlink{song1-2.2}{2} + } \idxentry{ + \indexauthor{}{Abd} + }{ + \hyperlink{song1-3.2}{3} + } +\end{idxblock} +\begin{idxblock}{J} + \idxentry{ + \indexauthor{}{Jack} + }{ + \hyperlink{song1-1.2}{1} + } \idxentry{ + \indexauthor{}{Johns} + }{ + \hyperlink{song1-1.2}{1} + } +\end{idxblock} diff --git a/test/test_index/idx_auth.sxd b/test/test_index/idx_auth.sxd new file mode 100644 index 00000000..5d2f091b --- /dev/null +++ b/test/test_index/idx_auth.sxd @@ -0,0 +1,19 @@ +AUTHOR INDEX DATA FILE +%ignore unknown +%after by +%sep and +Jack and Johns +1 +song1-1.2 +Äbc +2 +song1-2.2 +Abd +3 +song1-3.2 +Aba +4 +song1-4.2 +unknown +5 +song1-5.2 diff --git a/test/test_index/idx_title.sbx b/test/test_index/idx_title.sbx new file mode 100644 index 00000000..5cc83540 --- /dev/null +++ b/test/test_index/idx_title.sbx @@ -0,0 +1,22 @@ +\begin{idxblock}{C} + \idxentry{ + \indextitle{}{Caa} + }{ + \hyperlink{song1-3.2}{3} + } \idxentry{ + \indextitle{}{Çab} + }{ + \hyperlink{song1-2.2}{2} + } \idxentry{ + \indextitle{}{Cac} + }{ + \hyperlink{song1-4.2}{4} + } +\end{idxblock} +\begin{idxblock}{M} + \idxentry{ + \indextitle{}{My song} + }{ + \hyperlink{song1-1.2}{1} + } +\end{idxblock} diff --git a/test/test_index/idx_title.sxd b/test/test_index/idx_title.sxd new file mode 100644 index 00000000..f6ba287f --- /dev/null +++ b/test/test_index/idx_title.sxd @@ -0,0 +1,13 @@ +TITLE INDEX DATA FILE +My song +1 +song1-1.2 +Çab +2 +song1-2.2 +Caa +3 +song1-3.2 +Cac +4 +song1-4.2