patacrep/utils/rules.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

import glob
import sys
import fileinput
import re
re.LOCALE

# the dictionary has target_word:replacement_word pairs
word_dic = {
##: oe inclusion
"coeur": "cœur",
"boeuf": "bœuf",
"oeuvre": "œuvre",
"soeur": "sœur",
"noeud": "nœud",
"oeil": "œil",
"voeu": "vœu",
"oeuf": "œuf",
"oe{}": "œ",
##: Contractions
"ptit": "p'tit",
##: Punctuation
"’": "'",
"Ca ": "Ça ",
"\\musicnote{Intro": "\\musicnote{intro",
"\\musicnote{Outro": "\\musicnote{outro",
"...": "{\\dots}",
"…": "{\\dots}",
"say: ``":"say, ``",
"says: ``":"says, ``",
"said: ``":"said, ``",
#replace tabs with two spaces
"	": "  ",
##: Typo
"New-York": "New York",
" i ": " I ",
"avant hier": "avant-hier",
##: Conversion to anglo-saxon chords
"Lam ": "Am ",
"La7": "A7",
"Lasus2": "Asus2",
"Sim ": "Bm ",
"Sim}": "Bm}",
"Sim]": "Bm]",
"Si7": "B7",
"Dom ": "Cm ",
"Do7": "C7",
"Do9": "C9",
"Ré ": "D ",
"Rém ": "Dm ",
"Rém]": "Dm]",
"Ré7": "D7",
"Ré#": "D#",
"Mim ": "Em ",
"Mim]": "Em]",
"Mim7": "Em7",
"Mim}": "Em}",
"Mi7": "E7",
"Mi7sus4": "E7sus4",
"Fa ": "F ",
"Fa}": "F}",
"Fa\\": "F\\",
"Fam ": "Fm ",
"Fa7": "F7",
"Sol ": "G ",
"Sol]": "G]",
"Solm ": "Gm ",
"Solm]": "Gm]",
"Sol7": "G7",
"/La": "/A",
"/Si": "/B",
"/Do": "/C",
"/Ré": "/D",
"/Mi": "/E",
"/Fa": "/F",
"/Sol": "/G",
"gtab{La": "gtab{A",
"gtab{Si": "gtab{B",
"gtab{Do": "gtab{C",
"gtab{Ré": "gtab{D",
"gtab{Mi": "gtab{E",
"gtab{Fa": "gtab{F",
"gtab{Sol": "gtab{G",
"\\[La": "\\[A",
"\\[Si": "\\[B",
"\\[Do": "\\[C",
"\\[Ré": "\\[D",
"\\[Mi": "\\[E",
"\\[Fa": "\\[F",
"\\[Sol": "\\[G",
"\\[Re": "\\[D",
"b]": "&]",
"b7]": "&7]",
#C
"032010": "X32010",
#A
"002220": "X02220",
"002020": "X02020",
"002210": "X02210",
#D
"000232": "XX0232",
"X00232": "XX0232",
"000212": "XX0212",
"000231": "XX0231",
"X00231": "XX0231",
#B
"021202": "X21202",
### end of rules
}
 
# Process song files
songfiles = glob.glob('songs/*/*.sg')
for filename in songfiles:
   with open(filename, 'r+') as songfile:

       data = songfile.read()
#replace words
       for search, replace in word_dic.items():
             data = data.replace(search, replace)

#no dots for acronyms
#       data = re.sub("(?P<capital_letter>[A-Z])\.","\g<capital_letter>", data)

#language based typographical rules
       if (re.compile("selectlanguage{french}").search(data)):
          #ensure non-breaking spaces before symbols ? ! ; :
          data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:])","\g<last_char> \g<symbol>", data)
          # ... except for gtabs macros with capos
          data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)
# and apply a second time for cases like \gtab{Gm}{10:X02210:}
          data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)
          #ensure no spaces after symbols (
          data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
          #convert inverted commas
          data = re.sub("``","{\\og}", data)
          data = re.sub("''","{\\\\fg}", data)
       elif (re.compile("selectlanguage{english}").search(data)):
          #print "english song"
          #ensure no spaces before symbols ? ! ; : )
          data = re.sub("(?P<last_char>\S)\s(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)
          #ensure no spaces after symbols (
          data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
       elif (re.compile("selectlanguage{spanish}").search(data)):
          #print "spanish song"
          #ensure no spaces before symbols ? ! ; : )
          data = re.sub("(?P<last_char>\S)\s(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)
          #ensure no spaces after symbols ¿ ¡ (
          data = re.sub("(?P<symbol>[¿¡\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)
       elif (re.compile("selectlanguage{portuguese}").search(data)):
          #convert inverted commas
          data = re.sub("``","{\\og}", data)
          data = re.sub("''","{\\\\fg}", data)         
       else :
          print "Warning: language is not defined for song : " + filename

       lines = data.split('\n')
       for index, line in enumerate(lines):
          #remove trailing spaces and punctuation
          line = line.rstrip().rstrip(',.;').rstrip()
          #remove multi-spaces within lines
          line = re.sub("(?P<last_char>\S)\s{2,}","\g<last_char> ", line)
          lines[index] = line

       data = "\n".join(lines)
       songfile.seek(0)
       songfile.write(data)
       songfile.truncate()
Utils: rewrite latex-preprocessing in python 14 years ago			`#!/usr/bin/python`
			`# -- coding: utf-8 --`

			`import glob`
Utils: merge typo.sh into rules.py 13 years ago			`import sys`
			`import fileinput`
			`import re`
			`re.LOCALE`
Utils: latex-preprocessing twice faster, half the code 14 years ago
Utils: rewrite latex-preprocessing in python 14 years ago			`# the dictionary has target_word:replacement_word pairs`
			`word_dic = {`
Utils: add rules to latex-preprocessing 14 years ago			`##: oe inclusion`
Utils: rewrite latex-preprocessing in python 14 years ago			`"coeur": "cœur",`
			`"boeuf": "bœuf",`
			`"oeuvre": "œuvre",`
			`"soeur": "sœur",`
			`"noeud": "nœud",`
			`"oeil": "œil",`
Utils: add rules to latex-preprocessing 14 years ago			`"voeu": "vœu",`
Utils: add rule Utils: add rule 13 years ago			`"oeuf": "œuf",`
Utils: rewrite latex-preprocessing in python 14 years ago			`"oe{}": "œ",`
Utils: add rule Utils: add rule 13 years ago			`##: Contractions`
			`"ptit": "p'tit",`
Utils: add rules to latex-preprocessing 14 years ago			`##: Punctuation`
Utils: rewrite latex-preprocessing in python 14 years ago			`"’": "'",`
			`"Ca ": "Ça ",`
do not capitalize intro/outro in musicnote environments 14 years ago			`"\\musicnote{Intro": "\\musicnote{intro",`
			`"\\musicnote{Outro": "\\musicnote{outro",`
ensure range of dots command 13 years ago			`"...": "{\\dots}",`
			`"…": "{\\dots}",`
english quotes are introduced by commas, not colons 13 years ago			"say: ``":"say, ``",
			"says: ``":"says, ``",
			"said: ``":"said, ``",
Utils: add rules 14 years ago			`#replace tabs with two spaces`
			`" ": " ",`
Utils: add spelling rule for New York 14 years ago			`##: Typo`
			`"New-York": "New York",`
Utils: add rules 14 years ago			`" i ": " I ",`
Utils: add rule to latex-preprocessing 14 years ago			`"avant hier": "avant-hier",`
write songs with us chords convention to enable transposition macros 13 years ago			`##: Conversion to anglo-saxon chords`
			`"Lam ": "Am ",`
			`"La7": "A7",`
			`"Lasus2": "Asus2",`
			`"Sim ": "Bm ",`
			`"Sim}": "Bm}",`
			`"Sim]": "Bm]",`
			`"Si7": "B7",`
			`"Dom ": "Cm ",`
			`"Do7": "C7",`
			`"Do9": "C9",`
			`"Ré ": "D ",`
			`"Rém ": "Dm ",`
			`"Rém]": "Dm]",`
			`"Ré7": "D7",`
			`"Ré#": "D#",`
			`"Mim ": "Em ",`
			`"Mim]": "Em]",`
			`"Mim7": "Em7",`
			`"Mim}": "Em}",`
			`"Mi7": "E7",`
			`"Mi7sus4": "E7sus4",`
			`"Fa ": "F ",`
			`"Fa}": "F}",`
			`"Fa\\": "F\\",`
			`"Fam ": "Fm ",`
			`"Fa7": "F7",`
			`"Sol ": "G ",`
			`"Sol]": "G]",`
			`"Solm ": "Gm ",`
			`"Solm]": "Gm]",`
			`"Sol7": "G7",`
			`"/La": "/A",`
			`"/Si": "/B",`
			`"/Do": "/C",`
			`"/Ré": "/D",`
			`"/Mi": "/E",`
			`"/Fa": "/F",`
			`"/Sol": "/G",`
			`"gtab{La": "gtab{A",`
			`"gtab{Si": "gtab{B",`
			`"gtab{Do": "gtab{C",`
			`"gtab{Ré": "gtab{D",`
			`"gtab{Mi": "gtab{E",`
			`"gtab{Fa": "gtab{F",`
			`"gtab{Sol": "gtab{G",`
			`"\\[La": "\\[A",`
			`"\\[Si": "\\[B",`
			`"\\[Do": "\\[C",`
			`"\\[Ré": "\\[D",`
			`"\\[Mi": "\\[E",`
			`"\\[Fa": "\\[F",`
			`"\\[Sol": "\\[G",`
			`"\\[Re": "\\[D",`
Utils: rewrite latex-preprocessing in python 14 years ago			`"b]": "&]",`
write songs with us chords convention to enable transposition macros 13 years ago			`"b7]": "&7]",`
			`#C`
Utils: rewrite latex-preprocessing in python 14 years ago			`"032010": "X32010",`
write songs with us chords convention to enable transposition macros 13 years ago			`#A`
Utils: rewrite latex-preprocessing in python 14 years ago			`"002220": "X02220",`
			`"002020": "X02020",`
			`"002210": "X02210",`
write songs with us chords convention to enable transposition macros 13 years ago			`#D`
Utils: rewrite latex-preprocessing in python 14 years ago			`"000232": "XX0232",`
			`"X00232": "XX0232",`
			`"000212": "XX0212",`
			`"000231": "XX0231",`
			`"X00231": "XX0231",`
write songs with us chords convention to enable transposition macros 13 years ago			`#B`
Utils: rewrite latex-preprocessing in python 14 years ago			`"021202": "X21202",`
synchro with songbook-client 14 years ago			`### end of rules`
Utils: rewrite latex-preprocessing in python 14 years ago			`}`

			`# Process song files`
			`songfiles = glob.glob('songs//.sg')`
Utils: latex-preprocessing twice faster, half the code 14 years ago			`for filename in songfiles:`
			`with open(filename, 'r+') as songfile:`
Utils: merge typo.sh into rules.py 13 years ago
Utils: latex-preprocessing twice faster, half the code 14 years ago			`data = songfile.read()`
Utils: merge typo.sh into rules.py 13 years ago			`#replace words`
Utils: latex-preprocessing twice faster, half the code 14 years ago			`for search, replace in word_dic.items():`
			`data = data.replace(search, replace)`
Utils: merge typo.sh into rules.py 13 years ago
			`#no dots for acronyms`
			`# data = re.sub("(?P<capital_letter>[A-Z])\.","\g<capital_letter>", data)`

			`#language based typographical rules`
			`if (re.compile("selectlanguage{french}").search(data)):`
			`#ensure non-breaking spaces before symbols ? ! ; :`
			`data = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:])","\g<last_char> \g<symbol>", data)`
			`# ... except for gtabs macros with capos`
			`data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)`
			`# and apply a second time for cases like \gtab{Gm}{10:X02210:}`
			`data = re.sub("(?P<gtab>gtab.*)\s:","\g<gtab>:", data)`
			`#ensure no spaces after symbols (`
			`data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)`
			`#convert inverted commas`
			data = re.sub("``","{\\og}", data)
			`data = re.sub("''","{\\\\fg}", data)`
			`elif (re.compile("selectlanguage{english}").search(data)):`
			`#print "english song"`
			`#ensure no spaces before symbols ? ! ; : )`
Utils: correct regexp for removing spaces before punctuation symbols 13 years ago			`data = re.sub("(?P<last_char>\S)\s(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)`
Utils: merge typo.sh into rules.py 13 years ago			`#ensure no spaces after symbols (`
			`data = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)`
			`elif (re.compile("selectlanguage{spanish}").search(data)):`
			`#print "spanish song"`
			`#ensure no spaces before symbols ? ! ; : )`
Utils: correct regexp for removing spaces before punctuation symbols 13 years ago			`data = re.sub("(?P<last_char>\S)\s(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", data)`
Utils: merge typo.sh into rules.py 13 years ago			`#ensure no spaces after symbols ¿ ¡ (`
			`data = re.sub("(?P<symbol>[¿¡\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", data)`
			`elif (re.compile("selectlanguage{portuguese}").search(data)):`
			`#convert inverted commas`
			data = re.sub("``","{\\og}", data)
			`data = re.sub("''","{\\\\fg}", data)`
			`else :`
			`print "Warning: language is not defined for song : " + filename`
Utils: improve performance by opening files only once (thanks Guibou!) 13 years ago
			`lines = data.split('\n')`
			`for index, line in enumerate(lines):`
			`#remove trailing spaces and punctuation`
			`line = line.rstrip().rstrip(',.;').rstrip()`
			`#remove multi-spaces within lines`
			`line = re.sub("(?P<last_char>\S)\s{2,}","\g<last_char> ", line)`
			`lines[index] = line`

			`data = "\n".join(lines)`
Utils: latex-preprocessing twice faster, half the code 14 years ago			`songfile.seek(0)`
			`songfile.write(data)`
			`songfile.truncate()`
Utils: improve performance by opening files only once (thanks Guibou!) 13 years ago