Browse Source

Utils: rewrite latex-preprocessing in python

remotes/origin/split-songs
crep 15 years ago
parent
commit
9e835f0338
  1. 46
      utils/checker.sh
  2. 63
      utils/latex-preprocessing.py

46
utils/checker.sh

@ -1,46 +0,0 @@
#!/bin/sh
# Utility that corrects the songs input to respect some LaTeX rules
for i in songs/*/*.sg;
do
# oe inclusion
sed -i "s/coeur/cœur/g" $i;
sed -i "s/boeuf/bœuf/g" $i;
sed -i "s/oeuvre/œuvre/g" $i;
sed -i "s/oeuf/œuf/g" $i;
sed -i "s/soeur/sœur/g" $i;
sed -i "s/noeud/nœud/g" $i;
sed -i "s/oeil/œil/g" $i;
sed -i "s/\\\oe{}/œ/g" $i;
#punctuation
sed -i "s/’/'/g" $i;
sed -i "s/Ca /Ça /g" $i;
# dots rules
sed -i "s/\\.\\.\\./\\\dots/g" $i;
#chords
sed -i "s/\\[Re\\]/\\[Ré\\]/g" $i;
sed -i "s/b\\]/\\&\\]/g" $i;
#Do
sed -i "s/032010}/X32010}/g" $i;
#La
sed -i "s/002220}/X02220}/g" $i;
sed -i "s/002020}/X02020}/g" $i;
#Lam
sed -i "s/002210}/X02210}/g" $i;
#Ré
sed -i "s/000232}/XX0232}/g" $i;
sed -i "s/X00232}/XX0232}/g" $i;
#Ré7
sed -i "s/000212}/XX0212}/g" $i;
#Rém
sed -i "s/000231}/XX0231}/g" $i;
sed -i "s/X00231}/XX0231}/g" $i;
#Si7
sed -i "s/021202}/X21202}/g" $i;
done;

63
utils/latex-preprocessing.py

@ -0,0 +1,63 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
import glob
import codecs
def replace_words(text, word_dic):
"""
take a text and <strong class="highlight">replace</strong> words
that match a key in a dictionary with the associated value,
return the changed text
"""
rc = re.compile('|'.join(map(re.escape, word_dic)))
def translate(match):
return word_dic[match.group(0)]
return rc.sub(translate, text)
# the dictionary has target_word:replacement_word pairs
word_dic = {
#oe inclusion
"coeur": "cœur",
"boeuf": "bœuf",
"oeuvre": "œuvre",
"soeur": "sœur",
"noeud": "nœud",
"oeil": "œil",
"oe{}": "œ",
#punctuation
"": "'",
"Ca ": "Ça ",
"...": "\\dots ",
#Chords
"\\[Re]": "\\[Ré]",
"b]": "&]",
#Do
"032010": "X32010",
#La
"002220": "X02220",
"002020": "X02020",
"002210": "X02210",
#Ré
"000232": "XX0232",
"X00232": "XX0232",
"000212": "XX0212",
"000231": "XX0231",
"X00231": "XX0231",
#Si
"021202": "X21202",
}
# Process song files
songfiles = glob.glob('songs/*/*.sg')
for file in songfiles:
songfile = codecs.open(file, "r", "utf-8")
data = songfile.read().encode("utf-8")
data = replace_words(data, word_dic)
songfile.close()
songfile = open(file, "w")
songfile.write(data)
songfile.close()
Loading…
Cancel
Save