From 48aa302491f2db66c9362388d5a511be18129e3d Mon Sep 17 00:00:00 2001
From: Romain Goffe <romain.goffe@gmail.com>
Date: Tue, 10 Jan 2012 19:51:47 +0100
Subject: [PATCH] Utils: merge typo.sh into rules.py

---
 utils/release.sh |  1 -
 utils/rules.py   | 51 +++++++++++++++++++++++++++++++++++++++++++++++-
 utils/typo.sh    | 41 --------------------------------------
 3 files changed, 50 insertions(+), 43 deletions(-)
 delete mode 100755 utils/typo.sh
diff --git a/utils/release.sh b/utils/release.sh
index e5147df1..adcdbdaa 100755
--- a/utils/release.sh
+++ b/utils/release.sh
@@ -48,7 +48,6 @@ fi;
 #./utils/indent.sh 2> /dev/null 
 #echo "emacs batch indentation done !"
 ./utils/rules.py 
-./utils/typo.sh ./songs/*/*.sg 
 ./utils/resize-cover.py 
 
 #build all songbooks
diff --git a/utils/rules.py b/utils/rules.py
index bd193a95..0a7f8847 100755
--- a/utils/rules.py
+++ b/utils/rules.py
@@ -2,6 +2,10 @@
 # -*- coding: utf-8 -*-
 
 import glob
+import sys
+import fileinput
+import re
+re.LOCALE
 
 # the dictionary has target_word:replacement_word pairs
 word_dic = {
@@ -110,10 +114,55 @@ word_dic = {
 songfiles = glob.glob('songs/*/*.sg')
 for filename in songfiles:
    with open(filename, 'r+') as songfile:
+
        data = songfile.read()
+#replace words
        for search, replace in word_dic.items():
              data = data.replace(search, replace)
+
+#no dots for acronyms
+#       data = re.sub("(?P<capital_letter>[A-Z])\.","\g<capital_letter>", data)
+
+#language based typographical rules
+       if (re.compile("selectlanguage{french}").search(data)):
+          #ensure non-breaking spaces before symbols ? ! ; :
+          data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:])","\g<last_char> \g<symbol>", data)
+          # ... except for gtabs macros with capos
+          data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)
+# and apply a second time for cases like \gtab{Gm}{10:X02210:}
+          data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)
+          #ensure no spaces after symbols (
+          data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
+          #convert inverted commas
+          data = re.sub("``","{\\og}", data)
+          data = re.sub("''","{\\\\fg}", data)
+       elif (re.compile("selectlanguage{english}").search(data)):
+          #print "english song"
+          #ensure no spaces before symbols ? ! ; : )
+          data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)
+          #ensure no spaces after symbols (
+          data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
+       elif (re.compile("selectlanguage{spanish}").search(data)):
+          #print "spanish song"
+          #ensure no spaces before symbols ? ! ; : )
+          data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)
+          #ensure no spaces after symbols ¿ ¡ (
+          data = re.sub("(?P<symbol>[¿¡\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
+       elif (re.compile("selectlanguage{portuguese}").search(data)):
+          #convert inverted commas
+          data = re.sub("``","{\\og}", data)
+          data = re.sub("''","{\\\\fg}", data)         
+       else :
+          print "Warning: language is not defined for song : " + filename
+         
        songfile.seek(0)
        songfile.write(data)
        songfile.truncate()
-
+       
+for i, line in enumerate(fileinput.input(songfiles, inplace = 1)):
+#remove trailing spaces and punctuation
+   line = line.rstrip().rstrip(',.;').rstrip()
+#remove multi-spaces within lines
+   line = re.sub("(?P<last_char>\S)\s{2,}","\g<last_char> ", line)
+#write correct line
+   sys.stdout.write(line+'\n')
diff --git a/utils/typo.sh b/utils/typo.sh
deleted file mode 100755
index dd841e52..00000000
--- a/utils/typo.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/sh
-#
-#Author: Romain Goffe and Alexandre Dupas
-#Date: 27/10/2010
-#Description: fix typographic mistakes, some depending on language
-
-FILES=songs/*/*.sg
-
-if [ $# -gt 0 ] ; then
-    FILES=$@
-fi
-
-# remove trailing space and double space
-sed -i \
-    -e 's/\s*$//g' \
-    -e 's/[,\.]$//g' \
-    -e '/\s*%/! s/\([^ ]\)\s\+/\1 /g' \
-    $FILES
-
-# formating rules depending on language
-for song in $FILES;
-do
-    if grep -q "selectlanguage{english}" $song
-    then
-	sed -i \
-            -e 's/\s*?/?/g' \
-            -e 's/\s*!/!/g' \
-            -e 's/\s*:/:/g' \
-            $song
-    elif grep -q "selectlanguage{french}" $song
-    then
-	sed -i \
-            -e 's/\([^ ]\)?/\1 ?/g' \
-            -e 's/\([^ ]\)!/\1 !/g' \
-            -e 's/\([^ ]\)!/\1 !/g' \
-            -e 's/``/{\\og}/g' \
-            -e "s/''/{\\\\fg}/g" \
-            -e '/\\gtab.*/ ! s/\([^ ]\):/\1 :/g' \
-            $song
-    fi
-done