From 9e835f03388549912683ad596333e9810de66bdb Mon Sep 17 00:00:00 2001 From: crep Date: Wed, 12 May 2010 23:49:21 +0200 Subject: [PATCH] Utils: rewrite latex-preprocessing in python --- utils/checker.sh | 46 -------------------------- utils/latex-preprocessing.py | 63 ++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 46 deletions(-) delete mode 100755 utils/checker.sh create mode 100755 utils/latex-preprocessing.py diff --git a/utils/checker.sh b/utils/checker.sh deleted file mode 100755 index 4d6dfd70..00000000 --- a/utils/checker.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/sh -# Utility that corrects the songs input to respect some LaTeX rules - - -for i in songs/*/*.sg; -do - -# oe inclusion -sed -i "s/coeur/cœur/g" $i; -sed -i "s/boeuf/bœuf/g" $i; -sed -i "s/oeuvre/œuvre/g" $i; -sed -i "s/oeuf/œuf/g" $i; -sed -i "s/soeur/sœur/g" $i; -sed -i "s/noeud/nœud/g" $i; -sed -i "s/oeil/œil/g" $i; -sed -i "s/\\\oe{}/œ/g" $i; -#punctuation -sed -i "s/’/'/g" $i; -sed -i "s/Ca /Ça /g" $i; - -# dots rules -sed -i "s/\\.\\.\\./\\\dots/g" $i; - -#chords -sed -i "s/\\[Re\\]/\\[Ré\\]/g" $i; -sed -i "s/b\\]/\\&\\]/g" $i; -#Do -sed -i "s/032010}/X32010}/g" $i; -#La -sed -i "s/002220}/X02220}/g" $i; -sed -i "s/002020}/X02020}/g" $i; -#Lam -sed -i "s/002210}/X02210}/g" $i; -#Ré -sed -i "s/000232}/XX0232}/g" $i; -sed -i "s/X00232}/XX0232}/g" $i; -#Ré7 -sed -i "s/000212}/XX0212}/g" $i; -#Rém -sed -i "s/000231}/XX0231}/g" $i; -sed -i "s/X00231}/XX0231}/g" $i; -#Si7 -sed -i "s/021202}/X21202}/g" $i; - - -done; diff --git a/utils/latex-preprocessing.py b/utils/latex-preprocessing.py new file mode 100755 index 00000000..5968211c --- /dev/null +++ b/utils/latex-preprocessing.py @@ -0,0 +1,63 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import re +import glob +import codecs + +def replace_words(text, word_dic): + """ + take a text and replace words + that match a key in a dictionary with the associated value, + return the changed text + """ + rc = re.compile('|'.join(map(re.escape, word_dic))) + def translate(match): + return word_dic[match.group(0)] + return rc.sub(translate, text) + +# the dictionary has target_word:replacement_word pairs +word_dic = { +#oe inclusion +"coeur": "cœur", +"boeuf": "bœuf", +"oeuvre": "œuvre", +"soeur": "sœur", +"noeud": "nœud", +"oeil": "œil", +"oe{}": "œ", +#punctuation +"’": "'", +"Ca ": "Ça ", +"...": "\\dots ", +#Chords +"\\[Re]": "\\[Ré]", +"b]": "&]", +#Do +"032010": "X32010", +#La +"002220": "X02220", +"002020": "X02020", +"002210": "X02210", +#Ré +"000232": "XX0232", +"X00232": "XX0232", +"000212": "XX0212", +"000231": "XX0231", +"X00231": "XX0231", +#Si +"021202": "X21202", +} + +# Process song files +songfiles = glob.glob('songs/*/*.sg') +for file in songfiles: + songfile = codecs.open(file, "r", "utf-8") + data = songfile.read().encode("utf-8") + data = replace_words(data, word_dic) + songfile.close() + songfile = open(file, "w") + songfile.write(data) + songfile.close() + +