From 48aa302491f2db66c9362388d5a511be18129e3d Mon Sep 17 00:00:00 2001 From: Romain Goffe Date: Tue, 10 Jan 2012 19:51:47 +0100 Subject: [PATCH] Utils: merge typo.sh into rules.py --- utils/release.sh | 1 - utils/rules.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++- utils/typo.sh | 41 -------------------------------------- 3 files changed, 50 insertions(+), 43 deletions(-) delete mode 100755 utils/typo.sh diff --git a/utils/release.sh b/utils/release.sh index e5147df1..adcdbdaa 100755 --- a/utils/release.sh +++ b/utils/release.sh @@ -48,7 +48,6 @@ fi; #./utils/indent.sh 2> /dev/null #echo "emacs batch indentation done !" ./utils/rules.py -./utils/typo.sh ./songs/*/*.sg ./utils/resize-cover.py #build all songbooks diff --git a/utils/rules.py b/utils/rules.py index bd193a95..0a7f8847 100755 --- a/utils/rules.py +++ b/utils/rules.py @@ -2,6 +2,10 @@ # -*- coding: utf-8 -*- import glob +import sys +import fileinput +import re +re.LOCALE # the dictionary has target_word:replacement_word pairs word_dic = { @@ -110,10 +114,55 @@ word_dic = { songfiles = glob.glob('songs/*/*.sg') for filename in songfiles: with open(filename, 'r+') as songfile: + data = songfile.read() +#replace words for search, replace in word_dic.items(): data = data.replace(search, replace) + +#no dots for acronyms +# data = re.sub("(?P[A-Z])\.","\g", data) + +#language based typographical rules + if (re.compile("selectlanguage{french}").search(data)): + #ensure non-breaking spaces before symbols ? ! ; : + data = re.sub("(?P\S)(?P[!?;:])","\g \g", data) + # ... except for gtabs macros with capos + data = re.sub("(?Pgtab.*)\s:","\g:", data) +# and apply a second time for cases like \gtab{Gm}{10:X02210:} + data = re.sub("(?Pgtab.*)\s:","\g:", data) + #ensure no spaces after symbols ( + data = re.sub("(?P[\(])\s(?P\S)","\g\g", data) + #convert inverted commas + data = re.sub("``","{\\og}", data) + data = re.sub("''","{\\\\fg}", data) + elif (re.compile("selectlanguage{english}").search(data)): + #print "english song" + #ensure no spaces before symbols ? ! ; : ) + data = re.sub("(?P\S)(?P[!?;:\)])","\g\g", data) + #ensure no spaces after symbols ( + data = re.sub("(?P[\(])\s(?P\S)","\g\g", data) + elif (re.compile("selectlanguage{spanish}").search(data)): + #print "spanish song" + #ensure no spaces before symbols ? ! ; : ) + data = re.sub("(?P\S)(?P[!?;:\)])","\g\g", data) + #ensure no spaces after symbols ¿ ¡ ( + data = re.sub("(?P[¿¡\(])\s(?P\S)","\g\g", data) + elif (re.compile("selectlanguage{portuguese}").search(data)): + #convert inverted commas + data = re.sub("``","{\\og}", data) + data = re.sub("''","{\\\\fg}", data) + else : + print "Warning: language is not defined for song : " + filename + songfile.seek(0) songfile.write(data) songfile.truncate() - + +for i, line in enumerate(fileinput.input(songfiles, inplace = 1)): +#remove trailing spaces and punctuation + line = line.rstrip().rstrip(',.;').rstrip() +#remove multi-spaces within lines + line = re.sub("(?P\S)\s{2,}","\g ", line) +#write correct line + sys.stdout.write(line+'\n') diff --git a/utils/typo.sh b/utils/typo.sh deleted file mode 100755 index dd841e52..00000000 --- a/utils/typo.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/sh -# -#Author: Romain Goffe and Alexandre Dupas -#Date: 27/10/2010 -#Description: fix typographic mistakes, some depending on language - -FILES=songs/*/*.sg - -if [ $# -gt 0 ] ; then - FILES=$@ -fi - -# remove trailing space and double space -sed -i \ - -e 's/\s*$//g' \ - -e 's/[,\.]$//g' \ - -e '/\s*%/! s/\([^ ]\)\s\+/\1 /g' \ - $FILES - -# formating rules depending on language -for song in $FILES; -do - if grep -q "selectlanguage{english}" $song - then - sed -i \ - -e 's/\s*?/?/g' \ - -e 's/\s*!/!/g' \ - -e 's/\s*:/:/g' \ - $song - elif grep -q "selectlanguage{french}" $song - then - sed -i \ - -e 's/\([^ ]\)?/\1 ?/g' \ - -e 's/\([^ ]\)!/\1 !/g' \ - -e 's/\([^ ]\)!/\1 !/g' \ - -e 's/``/{\\og}/g' \ - -e "s/''/{\\\\fg}/g" \ - -e '/\\gtab.*/ ! s/\([^ ]\):/\1 :/g' \ - $song - fi -done