Utils: merge typo.sh into rules.py

14 years ago · 48aa302491
3 changed files with 50 additions and 43 deletions
--- a/utils/release.sh
+++ b/utils/release.sh
@ -48,7 +48,6 @@ fi;
 #./utils/indent.sh 2> /dev/null 
 #echo "emacs batch indentation done !"
 ./utils/rules.py 
-./utils/typo.sh ./songs/*/*.sg 
 ./utils/resize-cover.py 

 #build all songbooks
--- a/utils/rules.py
+++ b/utils/rules.py
@ -2,6 +2,10 @@
 # -*- coding: utf-8 -*-

 import glob
+import sys
+import fileinput
+import re
+re.LOCALE

 # the dictionary has target_word:replacement_word pairs
 word_dic = {
@ -110,10 +114,55 @@ word_dic = {
 songfiles = glob.glob('songs/*/*.sg')
 for filename in songfiles:
   with open(filename, 'r+') as songfile:
+
       data = songfile.read()
+#replace words
       for search, replace in word_dic.items():
             data = data.replace(search, replace)
+
+#no dots for acronyms
+#       data = re.sub("(?P<capital_letter>[A-Z])\.","\g<capital_letter>", data)
+
+#language based typographical rules
+       if (re.compile("selectlanguage{french}").search(data)):
+          #ensure non-breaking spaces before symbols ? ! ; :
+          data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:])","\g<last_char> \g<symbol>", data)
+          # ... except for gtabs macros with capos
+          data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)
+# and apply a second time for cases like \gtab{Gm}{10:X02210:}
+          data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)
+          #ensure no spaces after symbols (
+          data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
+          #convert inverted commas
+          data = re.sub("``","{\\og}", data)
+          data = re.sub("''","{\\\\fg}", data)
+       elif (re.compile("selectlanguage{english}").search(data)):
+          #print "english song"
+          #ensure no spaces before symbols ? ! ; : )
+          data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)
+          #ensure no spaces after symbols (
+          data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
+       elif (re.compile("selectlanguage{spanish}").search(data)):
+          #print "spanish song"
+          #ensure no spaces before symbols ? ! ; : )
+          data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)
+          #ensure no spaces after symbols ¿ ¡ (
+          data = re.sub("(?P<symbol>[¿¡\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
+       elif (re.compile("selectlanguage{portuguese}").search(data)):
+          #convert inverted commas
+          data = re.sub("``","{\\og}", data)
+          data = re.sub("''","{\\\\fg}", data)         
+       else :
+          print "Warning: language is not defined for song : " + filename
+         
       songfile.seek(0)
       songfile.write(data)
       songfile.truncate()
-
+       
+for i, line in enumerate(fileinput.input(songfiles, inplace = 1)):
+#remove trailing spaces and punctuation
+   line = line.rstrip().rstrip(',.;').rstrip()
+#remove multi-spaces within lines
+   line = re.sub("(?P<last_char>\S)\s{2,}","\g<last_char> ", line)
+#write correct line
+   sys.stdout.write(line+'\n')
--- a/utils/typo.sh
+++ b/utils/typo.sh
@ -1,41 +0,0 @@
-#!/bin/sh
-#
-#Author: Romain Goffe and Alexandre Dupas
-#Date: 27/10/2010
-#Description: fix typographic mistakes, some depending on language
-
-FILES=songs/*/*.sg
-
-if [ $# -gt 0 ] ; then
-    FILES=$@
-fi
-
-# remove trailing space and double space
-sed -i \
-    -e 's/\s*$//g' \
-    -e 's/[,\.]$//g' \
-    -e '/\s*%/! s/\([^ ]\)\s\+/\1 /g' \
-    $FILES
-
-# formating rules depending on language
-for song in $FILES;
-do
-    if grep -q "selectlanguage{english}" $song
-    then
-	sed -i \
-            -e 's/\s*?/?/g' \
-            -e 's/\s*!/!/g' \
-            -e 's/\s*:/:/g' \
-            $song
-    elif grep -q "selectlanguage{french}" $song
-    then
-	sed -i \
-            -e 's/\([^ ]\)?/\1 ?/g' \
-            -e 's/\([^ ]\)!/\1 !/g' \
-            -e 's/\([^ ]\)!/\1 !/g' \
-            -e 's/``/{\\og}/g' \
-            -e "s/''/{\\\\fg}/g" \
-            -e '/\\gtab.*/ ! s/\([^ ]\):/\1 :/g' \
-            $song
-    fi
-done