@ -9,13 +9,15 @@
# src is the .sxd file generated by latex
# src is the .sxd file generated by latex
#
#
from plasTeX . TeX import TeX
from unidecode import unidecode
from unidecode import unidecode
import sys
import sys
import re
import re
import locale
import locale
import warnings
import warnings
from tools import processauthors
from utils . plastex import simpleparse
# Pattern set to ignore latex command in title prefix
# Pattern set to ignore latex command in title prefix
keywordPattern = re . compile ( r " ^ % ( \ w+) \ s?(.*)$ " )
keywordPattern = re . compile ( r " ^ % ( \ w+) \ s?(.*)$ " )
firstLetterPattern = re . compile ( r " ^(?: \ { ? \\ \ w+ \ }?)*[^ \ w]*( \ w) " )
firstLetterPattern = re . compile ( r " ^(?: \ { ? \\ \ w+ \ }?)*[^ \ w]*( \ w) " )
@ -26,9 +28,7 @@ def sortkey(value):
don ' t forget to call locale.setlocale(locale.LC_ALL, ' ' )). It also handles
don ' t forget to call locale.setlocale(locale.LC_ALL, ' ' )). It also handles
the sort with latex escape sequences .
the sort with latex escape sequences .
'''
'''
tex = TeX ( )
return locale . strxfrm ( unidecode ( simpleparse ( value ) . replace ( ' ' , ' A ' ) ) )
tex . input ( value )
return locale . strxfrm ( unidecode ( tex . parse ( ) . textContent . replace ( ' ' , ' A ' ) ) )
def processSXDEntry ( tab ) :
def processSXDEntry ( tab ) :
return ( tab [ 0 ] , tab [ 1 ] , tab [ 2 ] )
return ( tab [ 0 ] , tab [ 1 ] , tab [ 2 ] )
@ -40,9 +40,8 @@ def processSXD(filename):
data . append ( line . strip ( ) )
data . append ( line . strip ( ) )
file . close ( )
file . close ( )
type = data [ 0 ]
i = 1
i = 1
idx = index ( )
idx = index ( data [ 0 ] )
if len ( data ) > 1 :
if len ( data ) > 1 :
while data [ i ] . startswith ( ' % ' ) :
while data [ i ] . startswith ( ' % ' ) :
@ -54,12 +53,21 @@ def processSXD(filename):
for i in range ( i , len ( data ) , 3 ) :
for i in range ( i , len ( data ) , 3 ) :
entry = processSXDEntry ( data [ i : i + 3 ] )
entry = processSXDEntry ( data [ i : i + 3 ] )
idx . add ( entry [ 0 ] , entry [ 1 ] , entry [ 2 ] )
idx . add ( entry [ 0 ] , entry [ 1 ] , entry [ 2 ] )
return idx
return idx
class index :
class index :
def __init__ ( self ) :
def __init__ ( self , indextype ) :
self . data = dict ( )
self . data = dict ( )
self . keywords = dict ( )
self . keywords = dict ( )
if indextype == " TITLE INDEX DATA FILE " :
self . indextype = " TITLE "
elif indextype == " SCRIPTURE INDEX DATA FILE " :
self . indextype = " SCRIPTURE "
elif indextype == " AUTHOR INDEX DATA FILE " :
self . indextype = " AUTHOR "
else :
self . indextype = " "
def filter ( self , key ) :
def filter ( self , key ) :
letter = firstLetterPattern . match ( key ) . group ( 1 )
letter = firstLetterPattern . match ( key ) . group ( 1 )
@ -74,16 +82,27 @@ class index:
def compileKeywords ( self ) :
def compileKeywords ( self ) :
self . prefix_patterns = [ ]
self . prefix_patterns = [ ]
if ' prefix ' in self . keywords :
if self . indextype == " TITLE " :
for prefix in self . keywords [ ' prefix ' ] :
if ' prefix ' in self . keywords :
self . prefix_patterns . append ( re . compile ( r " ^( %s )( \ b| \\ )( \ s*.*)$ " % prefix ) )
for prefix in self . keywords [ ' prefix ' ] :
self . prefix_patterns . append ( re . compile ( r " ^( %s )( \ b| \\ )( \ s*.*)$ " % prefix ) )
def add ( self , key , number , link ) :
for pattern in self . prefix_patterns :
self . authwords = { " after " : [ ] , " ignore " : [ ] , " sep " : [ ] }
match = pattern . match ( key )
if self . indextype == " AUTHOR " :
if match :
for key in self . keywords :
key = " %s ( %s ) " % ( match . group ( 2 ) + match . group ( 3 ) , match . group ( 1 ) )
if key in self . authwords :
break # Only one match per key
self . authwords [ key ] = self . keywords [ key ]
for word in self . authwords . keys ( ) :
if word in self . keywords :
if word == " after " :
self . authwords [ word ] = [ re . compile ( r " ^.* %s \ b(.*) " % after ) for after in self . keywords [ word ] ]
elif word == " sep " :
self . authwords [ word ] = [ " %s " % sep for sep in self . authwords [ word ] ] + [ " , " ]
self . authwords [ word ] = [ re . compile ( r " ^(.*) %s (.*)$ " % sep ) for sep in self . authwords [ word ] ]
else :
self . authwords [ word ] = self . keywords [ word ]
def _raw_add ( self , key , number , link ) :
( first , key ) = self . filter ( key )
( first , key ) = self . filter ( key )
if not self . data . has_key ( first ) :
if not self . data . has_key ( first ) :
self . data [ first ] = dict ( )
self . data [ first ] = dict ( )
@ -91,6 +110,25 @@ class index:
self . data [ first ] [ key ] = [ ]
self . data [ first ] [ key ] = [ ]
self . data [ first ] [ key ] . append ( { ' num ' : number , ' link ' : link } )
self . data [ first ] [ key ] . append ( { ' num ' : number , ' link ' : link } )
def add ( self , key , number , link ) :
if self . indextype == " TITLE " :
# Removing prefixes before titles
for pattern in self . prefix_patterns :
match = pattern . match ( key )
if match :
self . _raw_add (
" %s ( %s ) " % ( match . group ( 2 ) + match . group ( 3 ) , match . group ( 1 ) ) ,
number , link )
return
self . _raw_add ( key , number , link )
if self . indextype == " AUTHOR " :
# Processing authors
for author in processauthors (
key ,
* * self . authwords ) :
self . _raw_add ( author , number , link )
def refToStr ( self , ref ) :
def refToStr ( self , ref ) :
if sys . version_info > = ( 2 , 6 ) :
if sys . version_info > = ( 2 , 6 ) :
return ' \\ hyperlink {{ {0[link]} }} {{ {0[num]} }} ' . format ( ref )
return ' \\ hyperlink {{ {0[link]} }} {{ {0[num]} }} ' . format ( ref )
@ -99,9 +137,9 @@ class index:
def entryToStr ( self , key , entry ) :
def entryToStr ( self , key , entry ) :
if sys . version_info > = ( 2 , 6 ) :
if sys . version_info > = ( 2 , 6 ) :
return ' \\ idxentry {{ {0} }} {{ {1} }} \n ' . format ( key , ' \\ \\ ' . join ( map ( self . refToStr , entry ) ) )
return unicode ( ' \\ idxentry {{ {0} }} {{ {1} }} \n ' ) . format ( key , ' \\ \\ ' . join ( map ( self . refToStr , entry ) ) )
else :
else :
return ' \\ idxentry { %s } { %s } \n ' % ( key , ' \\ \\ ' . join ( map ( self . refToStr , entry ) ) )
return unicode ( ' \\ idxentry { %s } { %s } \n ' ) % ( key , ' \\ \\ ' . join ( map ( self . refToStr , entry ) ) )
def idxBlockToStr ( self , letter , entries ) :
def idxBlockToStr ( self , letter , entries ) :
str = ' \\ begin {idxblock} { ' + letter + ' } ' + ' \n '
str = ' \\ begin {idxblock} { ' + letter + ' } ' + ' \n '