[sword-svn] r364 - trunk/modules/python

chrislit at crosswire.org chrislit at crosswire.org
Sat Aug 4 17:03:38 MST 2012


Author: chrislit
Date: 2012-08-04 17:03:38 -0700 (Sat, 04 Aug 2012)
New Revision: 364

Modified:
   trunk/modules/python/usfm2osis.py
Log:
Updated USFM book codes & separated non-standard codes to be excluded unless -r switch is specified.


Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py	2012-08-04 23:37:28 UTC (rev 363)
+++ trunk/modules/python/usfm2osis.py	2012-08-05 00:03:38 UTC (rev 364)
@@ -58,6 +58,7 @@
 
 bookDict = {
     ### Known USFM Book codes from Paratext
+    ### Cf. http://ubs-icap.org/chm/usfm/2.35/index.html?book_codes.htm
     # OT
     'GEN':'Gen', 'EXO':'Exod', 'LEV':'Lev', 'NUM':'Num', 'DEU':'Deut', 'JOS':'Josh', 'JDG':'Judg', 'RUT':'Ruth',
     '1SA':'1Sam', '2SA':'2Sam', '1KI':'1Kgs', '2KI':'2Kgs', '1CH':'1Chr', '2CH':'2Chr', 'EZR':'Ezra', 'NEH':'Neh',
@@ -75,41 +76,49 @@
     # DC - Eastern Orthodox
     '3MA':'3Macc', '4MA':'4Macc', '1ES':'1Esd', '2ES':'2Esd', 'MAN':'PrMan', 'PS2':'Ps151',
     # Rahlfs' LXX
-    'ODA':'Odes', 'PSS':'PssSol', 'JSA':'JoshA', 'JDB':'JudgB', 'TBS':'TobS', 'SST':'SusTh', 'DNT':'DanTh',
-    'BLT':'BelTh',
+    'ODA':'Odes', 'PSS':'PssSol', 
     # Esdrae
-    '4ES':'4Ezra', '5ES':'5Ezra', '6ES':'6Ezra',
+    'EZA':'4Ezra', '5EZ':'5Ezra', '6EZ':'6Ezra',
+    # Inconsistency with Esther
+    'DAG':'DanGr',
+    # Syriac
+    'PS3':'5ApocSyrPss', '2BA':'2Bar', 'LBA':'EpBar',
+    # Ethiopic
+    'JUB':'Jub', 'ENO':'1En', '1MQ':'1Meq', '2MQ':'2Meq', '3MQ':'3Meq', 'REP':'Reproof', '4BA':'4Bar',
+    # Vulgate
+    'LAO':'EpLao',
+
     # Additional non-biblical books
     'XXA':'XXA', 'XXB':'XXB', 'XXC':'XXC', 'XXD':'XXD', 'XXE':'XXE', 'XXF':'XXF', 'XXG':'XXG',
-    ###
 
+    # Peripheral books
+    'FRT':'FRONT', 'INT':'INTRODUCTION', 'BAK':'BACK', 'CNC':'CONCORDANCE', 'GLO':'GLOSSARY',
+    'TDX':'INDEX', 'NDX':'GAZETTEER', 'OTH':'X-OTHER'
+    }
+
+addBookDict = {
+    ### Deprecated
+    # Rahlfs
+    'JSA':'JoshA', 'JDB':'JudgB', 'TBS':'TobS', 'SST':'SusTh', 'DNT':'DanTh', 'BLT':'BelTh',
+    # Esdrae
+    '4ES':'4Ezra', '5ES':'5Ezra', '6ES':'6Ezra',
+
+
     ### Proposed Additions <http://lc.bfbs.org.uk/e107_files/downloads/canonicalissuesinparatext.pdf>
-    # Inconsistency with Esther
-    'DAG':'DanGr',
     # Alternate Psalms
     'PSB':'Ps',
-    # Ethiopic
-    'JUB':'Jub', 'ENO':'1En', 'REP':'Reproof', # == Tegsas
-    '1MQ':'1Meq', '2MQ':'2Meq', '3MQ':'3Meq', '4BA':'4Bar',
-    # Syriac
-    '2BA':'2Bar', 'LBA':'EpBar', 'PS3':'5ApocSyrPss',
     # Vulgate
-    'LAO':'EpLao', 'PSO':'PrSol', 'PJE':'PrJer',
+    'PSO':'PrSol', 'PJE':'PrJer',
     # Armenian
     'WSI':'WSir', 'COP':'CorCorr', '3CO':'3Cor', 'EUT':'PrEut', 'DOJ':'DJohn',
     # Apostolic Fathers
     '1CL':'1Clem', '2CL':'2Clem', 'SHE':'Herm', 'LBA':'Barn', 'DID':'Did',
     ###
-
     # Proposed replacements <http://lc.bfbs.org.uk/e107_files/downloads/canonicalissuesinparatext.pdf>
-    'ODE':'Odes', 'EZA':'4Ezra', '5EZ':'5Ezra', '6EZ':'6Ezra',
-
+    'ODE':'Odes', 
+    
     # Additional biblical books
-    'ADE':'AddEsth',
-
-    # Peripheral books
-    'FRT':'FRONT', 'INT':'INTRODUCTION', 'BAK':'BACK', 'CNC':'CONCORDANCE', 'GLO':'GLOSSARY',
-    'TDX':'INDEX', 'NDX':'GAZETTEER', 'OTH':'X-OTHER'
+    'ADE':'AddEsth'
     }
 
 specialBooks = ['FRONT', 'INTRODUCTION', 'BACK', 'CONCORDANCE', 'GLOSSARY', 'INDEX', 'GAZETTEER', 'X-OTHER']
@@ -195,7 +204,7 @@
         """
         global loc2osisBk, osis2locBk
         # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)  ###TESTED###
-        osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\\n]*?)\n(.*)(?=\\id|$)', lambda m: u'󠁂<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') +  m.group(3) + u'</div type="book">󠁂\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\\n]*?)\n(.*)(?=\\id|$)', lambda m: u'󠁂<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') +  m.group(3) + u'</div type="book">󠁂\n' , osis, flags=re.DOTALL)
         # keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
         osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis)
         if osisBook:
@@ -906,8 +915,8 @@
     print('    python usfm2osis.py Bible.KJV ./KJV/*.usfm')
     verbosePrint('')
     verbosePrint('Supported encodings: ' + ', '.join(aliases))
-    exit()
 
+
 class Worker(multiprocessing.Process):
     def __init__(self, work_queue, result_queue):
 
@@ -976,6 +985,7 @@
 
         if '-r' in sys.argv:
             relaxedConformance = True
+            bookDict = dict(bookDict.items() + addBookDict.items())
             inputFilesIdx += 1
 
         usfmDocList = sys.argv[inputFilesIdx:]




More information about the sword-cvs mailing list