Browse Source

Utils: merge typo.sh into rules.py

remotes/origin/cmake
Romain Goffe 13 years ago
parent
commit
48aa302491
  1. 1
      utils/release.sh
  2. 51
      utils/rules.py
  3. 41
      utils/typo.sh

1
utils/release.sh

@ -48,7 +48,6 @@ fi;
#./utils/indent.sh 2> /dev/null
#echo "emacs batch indentation done !"
./utils/rules.py
./utils/typo.sh ./songs/*/*.sg
./utils/resize-cover.py
#build all songbooks

51
utils/rules.py

@ -2,6 +2,10 @@
# -*- coding: utf-8 -*-
import glob
import sys
import fileinput
import re
re.LOCALE
# the dictionary has target_word:replacement_word pairs
word_dic = {
@ -110,10 +114,55 @@ word_dic = {
songfiles = glob.glob('songs/*/*.sg')
for filename in songfiles:
with open(filename, 'r+') as songfile:
data = songfile.read()
#replace words
for search, replace in word_dic.items():
data = data.replace(search, replace)
#no dots for acronyms
# data = re.sub("(?P<capital_letter>[A-Z])\.","\g<capital_letter>", data)
#language based typographical rules
if (re.compile("selectlanguage{french}").search(data)):
#ensure non-breaking spaces before symbols ? ! ; :
data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:])","\g<last_char> \g<symbol>", data)
# ... except for gtabs macros with capos
data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)
# and apply a second time for cases like \gtab{Gm}{10:X02210:}
data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)
#ensure no spaces after symbols (
data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
#convert inverted commas
data = re.sub("``","{\\og}", data)
data = re.sub("''","{\\\\fg}", data)
elif (re.compile("selectlanguage{english}").search(data)):
#print "english song"
#ensure no spaces before symbols ? ! ; : )
data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)
#ensure no spaces after symbols (
data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
elif (re.compile("selectlanguage{spanish}").search(data)):
#print "spanish song"
#ensure no spaces before symbols ? ! ; : )
data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)
#ensure no spaces after symbols ¿ ¡ (
data = re.sub("(?P<symbol>[¿¡\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
elif (re.compile("selectlanguage{portuguese}").search(data)):
#convert inverted commas
data = re.sub("``","{\\og}", data)
data = re.sub("''","{\\\\fg}", data)
else :
print "Warning: language is not defined for song : " + filename
songfile.seek(0)
songfile.write(data)
songfile.truncate()
for i, line in enumerate(fileinput.input(songfiles, inplace = 1)):
#remove trailing spaces and punctuation
line = line.rstrip().rstrip(',.;').rstrip()
#remove multi-spaces within lines
line = re.sub("(?P<last_char>\S)\s{2,}","\g<last_char> ", line)
#write correct line
sys.stdout.write(line+'\n')

41
utils/typo.sh

@ -1,41 +0,0 @@
#!/bin/sh
#
#Author: Romain Goffe and Alexandre Dupas
#Date: 27/10/2010
#Description: fix typographic mistakes, some depending on language
FILES=songs/*/*.sg
if [ $# -gt 0 ] ; then
FILES=$@
fi
# remove trailing space and double space
sed -i \
-e 's/\s*$//g' \
-e 's/[,\.]$//g' \
-e '/\s*%/! s/\([^ ]\)\s\+/\1 /g' \
$FILES
# formating rules depending on language
for song in $FILES;
do
if grep -q "selectlanguage{english}" $song
then
sed -i \
-e 's/\s*?/?/g' \
-e 's/\s*!/!/g' \
-e 's/\s*:/:/g' \
$song
elif grep -q "selectlanguage{french}" $song
then
sed -i \
-e 's/\([^ ]\)?/\1 ?/g' \
-e 's/\([^ ]\)!/\1 !/g' \
-e 's/\([^ ]\)!/\1 !/g' \
-e 's/``/{\\og}/g' \
-e "s/''/{\\\\fg}/g" \
-e '/\\gtab.*/ ! s/\([^ ]\):/\1 :/g' \
$song
fi
done
Loading…
Cancel
Save