patacrep/utils/rules.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

import glob
import getopt, sys
import re
import logging
import locale
re.LOCALE

# the dictionary has target_word:replacement_word pairs
word_dic = {
##: oe inclusion
"coeur": "cœur",
"boeuf": "bœuf",
"oeuvre": "œuvre",
"soeur": "sœur",
"noeud": "nœud",
"oeil": "œil",
"voeu": "vœu",
"oeuf": "œuf",
"oe{}": "œ",
"\\œ": "œ",
##: Contractions
"ptit": "p'tit",
##: Punctuation
"’": "'",
"Ca ": "Ça ",
"...": "{\\dots}",
"…": "{\\dots}",
"say: ``":"say, ``",
"says: ``":"says, ``",
"said: ``":"said, ``",
#replace tabs with two spaces
"	": "  ",
##: Typo
"New-York": "New York",
" i ": " I ",
"avant hier": "avant-hier",
##: Conversion to anglo-saxon chords
"Lam ": "Am ",
"La7": "A7",
"Lasus2": "Asus2",
"Sim ": "Bm ",
"Sim}": "Bm}",
"Sim]": "Bm]",
"Si7": "B7",
"Dom ": "Cm ",
"Do7": "C7",
"Do9": "C9",
"Ré ": "D ",
"Rém ": "Dm ",
"Rém]": "Dm]",
"Ré7": "D7",
"Ré#": "D#",
"Mim ": "Em ",
"Mim]": "Em]",
"Mim7": "Em7",
"Mim}": "Em}",
"Mi7": "E7",
"Mi7sus4": "E7sus4",
"Fa ": "F ",
"Fa}": "F}",
"Fa\\": "F\\",
"Fam ": "Fm ",
"Fa7": "F7",
"Sol ": "G ",
"Sol]": "G]",
"Solm ": "Gm ",
"Solm]": "Gm]",
"Sol7": "G7",
"/La": "/A",
"/Si": "/B",
"/Do": "/C",
"/Ré": "/D",
"/Mi": "/E",
"/Fa": "/F",
"/Sol": "/G",
"tab{La": "tab{A",
"tab{Si": "tab{B",
"tab{Do": "tab{C",
"tab{Ré": "tab{D",
"tab{Mi": "tab{E",
"tab{Fa": "tab{F",
"tab{Sol": "tab{G",
"\\[La": "\\[A",
"\\[Si": "\\[B",
"\\[Do": "\\[C",
"\\[Ré": "\\[D",
"\\[Mi": "\\[E",
"\\[Fa": "\\[F",
"\\[Sol": "\\[G",
"\\[Re": "\\[D",
"b]": "&]",
"b7]": "&7]",
#C
"032010": "X32010",
#A
"002220": "X02220",
"002020": "X02020",
"002210": "X02210",
#D
"000232": "XX0232",
"X00232": "XX0232",
"000212": "XX0212",
"000231": "XX0231",
"X00231": "XX0231",
#B
"021202": "X21202",
## LaTeX
"beginchorus": "begin{chorus}",
"endchorus": "end{chorus}",
"beginverse*": "begin{verse*}",
"beginverse": "begin{verse}",
"endverse": "end{verse}",
"}[by=": "}\n  [by=",
### end of rules
}


def usage():
   print '''
Usage: rules.py [OPTION]

OPTIONS
    -h, --help
      display this help and exit

    -f, --files=FILES
      apply the set of rules on FILES
      default is songs/*/*.sg

    -l, --log=LEVEL
      set the logging level to LEVEL
      outputs logging information to rules.log
      possible LEVEL values are : debug, info, warning, error and critical
'''

def replace_words(string):
   '''
   Search the data string for words defined in the dictionary and
   replace them. This method avoids usual spelling and typos mistakes
   when writing a song.
   '''
   logging.info("replace_words: search and replace words from dictionary into song data")
   for search, replace in word_dic.items():
      string = string.replace(search, replace)
   return string

#language based typographical rules
def language_rules(string):
   '''
   Search the data string for common typographical mistakes.
   Implemented rules depend on the current song language that is
   defined by babel for every .sg file through the macro
   \selectlanguage{<lang>}
   '''
   logging.info("language_rules: looking for common typographical mistakes")
   if (re.compile("selectlanguage{french}").search(string)):
      logging.info("  song language is set to : french")
      #ensure non-breaking spaces before symbols ? ! ; :
      string = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:])","\g<last_char> \g<symbol>", string)
      #... except for gtabs macros with capos
      string = re.sub("(?P<gtab>tab.?{.*)\s:","\g<gtab>:", string)
      #and apply a second time for cases like \gtab{Gm}{10:X02210:}
      string = re.sub("(?P<gtab>tab.?{.*)\s:","\g<gtab>:", string)
      #ensure no spaces after symbols (
      string = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", string)
      #convert inverted commas
      string = re.sub("``","{\\og}", string)
      string = re.sub("''","{\\\\fg}", string)
   elif (re.compile("selectlanguage{english}").search(string)):
      logging.info("  song language is set to : english")
      #ensure no spaces before symbols ? ! ; : )
      string = re.sub("(?P<last_char>\S)\s(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", string)
      #ensure no spaces after symbols (
      string = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", string)
   elif (re.compile("selectlanguage{spanish}").search(string)):
      logging.info("  song language is set to : spanish")
      #ensure no spaces before symbols ? ! ; : )
      string = re.sub("(?P<last_char>\S)\s(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", string)
      #ensure no spaces after symbols ¿ ¡ (
      string = re.sub("(?P<symbol>[¿¡\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", string)
   elif (re.compile("selectlanguage{portuguese}").search(string)):
      logging.info("  song language is set to : portuguese")
      #convert inverted commas
      string = re.sub("``","{\\og}", string)
      string = re.sub("''","{\\\\fg}", string)
   else :
      print "Warning: language is not defined for song : " + filename
   return string

def process_lines(lines):
   '''
   Removes trailing punctuation and multi-spaces from lines.  Note
   that it preserves whitespaces at the beginning of lines that
   correspond to indentation.
   '''
   logging.info("process_lines: handling song data line by line")
   star = False
   for index, line in enumerate(lines):
      if re.compile("begin{verse\*}").search(line):
         star = True

      if re.compile("end{verse\*}").search(line):
         star = False

      if star == True and re.compile("end{verse}").search(line):
         line = line.replace("verse", "verse*")
         star = False

      #remove trailing spaces and punctuation
      line = line.rstrip().rstrip(',.;').rstrip()
      #remove multi-spaces within lines
      line = re.sub("(?P<last_char>\S)\s{2,}","\g<last_char> ", line)
      lines[index] = line
   return lines


def main():
   locale.setlocale(locale.LC_ALL, '')
   try:
      opts, args = getopt.getopt(sys.argv[1:],
                                 "hf:l:",
                                 ["help", "files=", "log="])
   except getopt.GetoptError:
      usage()
      sys.exit(2)

   songfiles = glob.glob('songs/*/*.sg')
   loglevel  = "warning"

   for option, arg in opts:
      if option in ("-h", "--help"):
         usage()
         sys.exit()
      elif option in ("-f", "--files"):
         songfiles = glob.glob(arg)
      elif option in ("-l", "--log"):
         loglevel = arg
         numeric_level = getattr(logging, loglevel.upper(), None)
         if not isinstance(numeric_level, int):
            raise ValueError('Invalid log level: %s' % loglevel)
         logging.basicConfig(level=numeric_level, filename='rules.log', filemode='w')
      else:
         assert False, "unhandled option"

   for filename in songfiles:
      with open(filename, 'r+') as songfile:
         logging.info("checking file: "+filename)
         data = songfile.read()
         #no dots for acronyms
         #data = re.sub("(?P<capital_letter>[A-Z])\.","\g<capital_letter>", data)
         data = replace_words(data)
         data = language_rules(data)
         lines = process_lines(data.split('\n'))
         data = "\n".join(lines)
         songfile.seek(0)
         songfile.write(data)
         songfile.truncate()
      
if __name__ == '__main__':
    main()
Utils: rewrite latex-preprocessing in python 15 years ago			`#!/usr/bin/python`
			`# -- coding: utf-8 --`

			`import glob`
Utils: add comments, options, and methods to rules.py 13 years ago			`import getopt, sys`
Utils: merge typo.sh into rules.py 13 years ago			`import re`
Utils: add comments, options, and methods to rules.py 13 years ago			`import logging`
			`import locale`
Utils: merge typo.sh into rules.py 13 years ago			`re.LOCALE`
Utils: latex-preprocessing twice faster, half the code 15 years ago
Utils: rewrite latex-preprocessing in python 15 years ago			`# the dictionary has target_word:replacement_word pairs`
			`word_dic = {`
Utils: add rules to latex-preprocessing 15 years ago			`##: oe inclusion`
Utils: rewrite latex-preprocessing in python 15 years ago			`"coeur": "cœur",`
			`"boeuf": "bœuf",`
			`"oeuvre": "œuvre",`
			`"soeur": "sœur",`
			`"noeud": "nœud",`
			`"oeil": "œil",`
Utils: add rules to latex-preprocessing 15 years ago			`"voeu": "vœu",`
Utils: add rule Utils: add rule 14 years ago			`"oeuf": "œuf",`
Utils: rewrite latex-preprocessing in python 15 years ago			`"oe{}": "œ",`
fix badly escaped character in rules.py 13 years ago			`"\\œ": "œ",`
Utils: add rule Utils: add rule 14 years ago			`##: Contractions`
			`"ptit": "p'tit",`
Utils: add rules to latex-preprocessing 15 years ago			`##: Punctuation`
Utils: rewrite latex-preprocessing in python 15 years ago			`"’": "'",`
			`"Ca ": "Ça ",`
ensure range of dots command 13 years ago			`"...": "{\\dots}",`
			`"…": "{\\dots}",`
english quotes are introduced by commas, not colons 13 years ago			"say: ``":"say, ``",
			"says: ``":"says, ``",
			"said: ``":"said, ``",
Utils: add rules 14 years ago			`#replace tabs with two spaces`
			`" ": " ",`
Utils: add spelling rule for New York 15 years ago			`##: Typo`
			`"New-York": "New York",`
Utils: add rules 14 years ago			`" i ": " I ",`
Utils: add rule to latex-preprocessing 14 years ago			`"avant hier": "avant-hier",`
write songs with us chords convention to enable transposition macros 13 years ago			`##: Conversion to anglo-saxon chords`
			`"Lam ": "Am ",`
			`"La7": "A7",`
			`"Lasus2": "Asus2",`
			`"Sim ": "Bm ",`
			`"Sim}": "Bm}",`
			`"Sim]": "Bm]",`
			`"Si7": "B7",`
			`"Dom ": "Cm ",`
			`"Do7": "C7",`
			`"Do9": "C9",`
			`"Ré ": "D ",`
			`"Rém ": "Dm ",`
			`"Rém]": "Dm]",`
			`"Ré7": "D7",`
			`"Ré#": "D#",`
			`"Mim ": "Em ",`
			`"Mim]": "Em]",`
			`"Mim7": "Em7",`
			`"Mim}": "Em}",`
			`"Mi7": "E7",`
			`"Mi7sus4": "E7sus4",`
			`"Fa ": "F ",`
			`"Fa}": "F}",`
			`"Fa\\": "F\\",`
			`"Fam ": "Fm ",`
			`"Fa7": "F7",`
			`"Sol ": "G ",`
			`"Sol]": "G]",`
			`"Solm ": "Gm ",`
			`"Solm]": "Gm]",`
			`"Sol7": "G7",`
			`"/La": "/A",`
			`"/Si": "/B",`
			`"/Do": "/C",`
			`"/Ré": "/D",`
			`"/Mi": "/E",`
			`"/Fa": "/F",`
			`"/Sol": "/G",`
support for utabs and important gtabs in rules.py 13 years ago			`"tab{La": "tab{A",`
			`"tab{Si": "tab{B",`
			`"tab{Do": "tab{C",`
			`"tab{Ré": "tab{D",`
			`"tab{Mi": "tab{E",`
			`"tab{Fa": "tab{F",`
			`"tab{Sol": "tab{G",`
write songs with us chords convention to enable transposition macros 13 years ago			`"\\[La": "\\[A",`
			`"\\[Si": "\\[B",`
			`"\\[Do": "\\[C",`
			`"\\[Ré": "\\[D",`
			`"\\[Mi": "\\[E",`
			`"\\[Fa": "\\[F",`
			`"\\[Sol": "\\[G",`
			`"\\[Re": "\\[D",`
Utils: rewrite latex-preprocessing in python 15 years ago			`"b]": "&]",`
write songs with us chords convention to enable transposition macros 13 years ago			`"b7]": "&7]",`
			`#C`
Utils: rewrite latex-preprocessing in python 15 years ago			`"032010": "X32010",`
write songs with us chords convention to enable transposition macros 13 years ago			`#A`
Utils: rewrite latex-preprocessing in python 15 years ago			`"002220": "X02220",`
			`"002020": "X02020",`
			`"002210": "X02210",`
write songs with us chords convention to enable transposition macros 13 years ago			`#D`
Utils: rewrite latex-preprocessing in python 15 years ago			`"000232": "XX0232",`
			`"X00232": "XX0232",`
			`"000212": "XX0212",`
			`"000231": "XX0231",`
			`"X00231": "XX0231",`
write songs with us chords convention to enable transposition macros 13 years ago			`#B`
Utils: rewrite latex-preprocessing in python 15 years ago			`"021202": "X21202",`
use LaTeX instead of TeX for verse and chorus environments 13 years ago			`## LaTeX`
			`"beginchorus": "begin{chorus}",`
			`"endchorus": "end{chorus}",`
			`"beginverse": "begin{verse}",`
			`"beginverse": "begin{verse}",`
			`"endverse": "end{verse}",`
			`"}[by=": "}\n [by=",`
synchro with songbook-client 15 years ago			`### end of rules`
Utils: rewrite latex-preprocessing in python 15 years ago			`}`
Utils: merge typo.sh into rules.py 13 years ago

Utils: add comments, options, and methods to rules.py 13 years ago			`def usage():`
			`print '''`
			`Usage: rules.py [OPTION]`

			`OPTIONS`
			`-h, --help`
			`display this help and exit`

			`-f, --files=FILES`
			`apply the set of rules on FILES`
			`default is songs//.sg`

			`-l, --log=LEVEL`
			`set the logging level to LEVEL`
Utils: add log info for rules.py 13 years ago			`outputs logging information to rules.log`
Utils: add comments, options, and methods to rules.py 13 years ago			`possible LEVEL values are : debug, info, warning, error and critical`
			`'''`

			`def replace_words(string):`
			`'''`
			`Search the data string for words defined in the dictionary and`
			`replace them. This method avoids usual spelling and typos mistakes`
			`when writing a song.`
			`'''`
			`logging.info("replace_words: search and replace words from dictionary into song data")`
			`for search, replace in word_dic.items():`
			`string = string.replace(search, replace)`
			`return string`
Utils: merge typo.sh into rules.py 13 years ago
			`#language based typographical rules`
Utils: add comments, options, and methods to rules.py 13 years ago			`def language_rules(string):`
			`'''`
			`Search the data string for common typographical mistakes.`
			`Implemented rules depend on the current song language that is`
			`defined by babel for every .sg file through the macro`
			`\selectlanguage{<lang>}`
			`'''`
			`logging.info("language_rules: looking for common typographical mistakes")`
			`if (re.compile("selectlanguage{french}").search(string)):`
			`logging.info(" song language is set to : french")`
			`#ensure non-breaking spaces before symbols ? ! ; :`
			`string = re.sub("(?P<last_char>\S)(?P<symbol>[!?;:])","\g<last_char> \g<symbol>", string)`
			`#... except for gtabs macros with capos`
support for utabs and important gtabs in rules.py 13 years ago			`string = re.sub("(?P<gtab>tab.?{.*)\s:","\g<gtab>:", string)`
Utils: add comments, options, and methods to rules.py 13 years ago			`#and apply a second time for cases like \gtab{Gm}{10:X02210:}`
support for utabs and important gtabs in rules.py 13 years ago			`string = re.sub("(?P<gtab>tab.?{.*)\s:","\g<gtab>:", string)`
Utils: add comments, options, and methods to rules.py 13 years ago			`#ensure no spaces after symbols (`
			`string = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", string)`
			`#convert inverted commas`
			string = re.sub("``","{\\og}", string)
			`string = re.sub("''","{\\\\fg}", string)`
			`elif (re.compile("selectlanguage{english}").search(string)):`
			`logging.info(" song language is set to : english")`
			`#ensure no spaces before symbols ? ! ; : )`
			`string = re.sub("(?P<last_char>\S)\s(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", string)`
			`#ensure no spaces after symbols (`
			`string = re.sub("(?P<symbol>[\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", string)`
			`elif (re.compile("selectlanguage{spanish}").search(string)):`
			`logging.info(" song language is set to : spanish")`
			`#ensure no spaces before symbols ? ! ; : )`
			`string = re.sub("(?P<last_char>\S)\s(?P<symbol>[!?;:\)])","\g<last_char>\g<symbol>", string)`
			`#ensure no spaces after symbols ¿ ¡ (`
			`string = re.sub("(?P<symbol>[¿¡\(])\s(?P<next_char>\S)","\g<symbol>\g<next_char>", string)`
			`elif (re.compile("selectlanguage{portuguese}").search(string)):`
			`logging.info(" song language is set to : portuguese")`
			`#convert inverted commas`
			string = re.sub("``","{\\og}", string)
			`string = re.sub("''","{\\\\fg}", string)`
			`else :`
			`print "Warning: language is not defined for song : " + filename`
			`return string`

			`def process_lines(lines):`
			`'''`
use LaTeX instead of TeX for verse and chorus environments 13 years ago			`Removes trailing punctuation and multi-spaces from lines. Note`
Utils: add comments, options, and methods to rules.py 13 years ago			`that it preserves whitespaces at the beginning of lines that`
			`correspond to indentation.`
			`'''`
			`logging.info("process_lines: handling song data line by line")`
use LaTeX instead of TeX for verse and chorus environments 13 years ago			`star = False`
Utils: add comments, options, and methods to rules.py 13 years ago			`for index, line in enumerate(lines):`
use LaTeX instead of TeX for verse and chorus environments 13 years ago			`if re.compile("begin{verse\*}").search(line):`
			`star = True`

			`if re.compile("end{verse\*}").search(line):`
			`star = False`

			`if star == True and re.compile("end{verse}").search(line):`
			`line = line.replace("verse", "verse*")`
			`star = False`

Utils: add comments, options, and methods to rules.py 13 years ago			`#remove trailing spaces and punctuation`
			`line = line.rstrip().rstrip(',.;').rstrip()`
			`#remove multi-spaces within lines`
			`line = re.sub("(?P<last_char>\S)\s{2,}","\g<last_char> ", line)`
			`lines[index] = line`
			`return lines`


			`def main():`
			`locale.setlocale(locale.LC_ALL, '')`
			`try:`
			`opts, args = getopt.getopt(sys.argv[1:],`
			`"hf:l:",`
			`["help", "files=", "log="])`
			`except getopt.GetoptError:`
			`usage()`
			`sys.exit(2)`

			`songfiles = glob.glob('songs//.sg')`
			`loglevel = "warning"`

			`for option, arg in opts:`
			`if option in ("-h", "--help"):`
			`usage()`
			`sys.exit()`
			`elif option in ("-f", "--files"):`
			`songfiles = glob.glob(arg)`
			`elif option in ("-l", "--log"):`
			`loglevel = arg`
			`numeric_level = getattr(logging, loglevel.upper(), None)`
			`if not isinstance(numeric_level, int):`
			`raise ValueError('Invalid log level: %s' % loglevel)`
			`logging.basicConfig(level=numeric_level, filename='rules.log', filemode='w')`
			`else:`
			`assert False, "unhandled option"`

			`for filename in songfiles:`
			`with open(filename, 'r+') as songfile:`
Utils: add log info for rules.py 13 years ago			`logging.info("checking file: "+filename)`
Utils: add comments, options, and methods to rules.py 13 years ago			`data = songfile.read()`
			`#no dots for acronyms`
			`#data = re.sub("(?P<capital_letter>[A-Z])\.","\g<capital_letter>", data)`
			`data = replace_words(data)`
			`data = language_rules(data)`
			`lines = process_lines(data.split('\n'))`
			`data = "\n".join(lines)`
			`songfile.seek(0)`
			`songfile.write(data)`
			`songfile.truncate()`
Utils: improve performance by opening files only once (thanks Guibou!) 13 years ago
Utils: add comments, options, and methods to rules.py 13 years ago			`if __name__ == '__main__':`
			`main()`