[sword-svn] r396 - trunk/modules/python

chrislit at crosswire.org chrislit at crosswire.org
Sun Aug 26 02:28:04 MST 2012


Author: chrislit
Date: 2012-08-26 02:28:04 -0700 (Sun, 26 Aug 2012)
New Revision: 396

Modified:
   trunk/modules/python/usfm2osis.py
Log:
implemented sorting key functions for canonical & usfm-numberic orders


Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py	2012-08-26 01:39:07 UTC (rev 395)
+++ trunk/modules/python/usfm2osis.py	2012-08-26 09:28:04 UTC (rev 396)
@@ -209,7 +209,7 @@
     'INDEX', 'GAZETTEER', 'X-OTHER'
     ]
 
-sfmNumericOrder = [
+usfmNumericOrder = [
     # Front Matter
     'FRONT', 'INTRODUCTION', 
 
@@ -282,6 +282,7 @@
 
 osis2locBk = dict()
 loc2osisBk = dict()
+filename2osis = dict()
 verbose = bool()
 ucs4 = (sys.maxunicode > 0xFFFF)
 
@@ -317,6 +318,18 @@
 END PSF-licened segment
 """
 
+def keycanon(filename):
+    if filename2osis:
+        return canonicalOrder.index(filename2osis[filename])
+    else:
+        return keynat(filename)
+
+def keyusfm(filename):
+    if filename2osis:
+        return usfmNumericOrder.index(filename2osis[filename])
+    else:
+        return keynat(filename)
+
 def convertToOSIS(sFile):
     global encoding
     global relaxedConformance
@@ -375,18 +388,19 @@
         return osis
 
 
-    def cvtIdentification(osis, relaxedConformance):
+    def cvtIdentification(osis, relaxedConformance, filename):
         """
         Identification
         supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3
         """
-        global loc2osisBk, osis2locBk
+        global loc2osisBk, osis2locBk, filename2osis
         # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
         osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n'+']*?)'+'\n'+r'(.*)(?=\\id|$)', lambda m: u'﷐<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') +  m.group(3) + u'</div type="book">﷐\n' , osis, flags=re.DOTALL)
         # keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
         osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis)
         if osisBook:
             osisBook = bookDict[osisBook.group(1)]
+            filename2osis[filename] = osisBook
 
         # \ide_<ENCODING>
         osis = re.sub(r'\\ide\b.*'+'\n', '', osis) # delete, since this was handled above
@@ -1128,7 +1142,7 @@
     # call individual conversion processors in series
     osis = cvtPreprocess(osis, relaxedConformance)
     osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance)
-    osis = cvtIdentification(osis, relaxedConformance)
+    osis = cvtIdentification(osis, relaxedConformance, sFile)
     osis = cvtIntroductions(osis, relaxedConformance)
     osis = cvtTitles(osis, relaxedConformance)
     osis = cvtChaptersAndVerses(osis, relaxedConformance)
@@ -1175,7 +1189,7 @@
     print('  -h, --help       print this usage information')
     print('  -o FILENAME      output filename (default is: <osisWork>.osis.xml)')
     print('  -r               enable relaxed markup processing (for non-standard USFM)')
-    print('  -s mode          set book sorting mode: natural (default), alpha, canonical, none')
+    print('  -s mode          set book sorting mode: natural (default), alpha, canonical, usfm, none')
     print('  -v               verbose feedback')
     print('  -x               disable XML validation')
     print('')
@@ -1280,24 +1294,22 @@
                 printUsage()
             if sys.argv[i].startswith('a'):
                 sortKey = None
-                sortCmp = None
                 print('Sorting book files alphanumerically.')
             elif sys.argv[i].startswith('na'):
                 sortKey = keynat
-                sortCmp = None
                 print('Sorting book files naturally.')
             elif sys.argv[i].startswith('c'):
-                sortKey = keynat # TODO: write appropriate helpers
-                sortCmp = None
+                sortKey = keycanon
                 print('Sorting book files canonically.')
+            elif sys.argv[i].startswith('u'):
+                sortKey = keyusfm
+                print('Sorting book files by USFM book number.')
             else:
                 sortKey = None # TODO: write appropriate helpers
-                sortCmp = None
                 print('Leaving book files unsorted.')
             inputFilesIdx += 2 # increment 2, reflecting 2 args for -s
         else:
             sortKey = keynat
-            sortCmp = None
             print('Sorting book files naturally.')
 
 




More information about the sword-cvs mailing list