[sword-svn] r373 - trunk/modules/python

chrislit at crosswire.org chrislit at crosswire.org
Fri Aug 10 04:30:42 MST 2012


Author: chrislit
Date: 2012-08-10 04:30:42 -0700 (Fri, 10 Aug 2012)
New Revision: 373

Modified:
   trunk/modules/python/usfm2osis.py
Log:
started organizing & implementing deprecated/obsolete/private-use USFM from stylesheet


Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py	2012-08-10 10:46:41 UTC (rev 372)
+++ trunk/modules/python/usfm2osis.py	2012-08-10 11:30:42 UTC (rev 373)
@@ -268,6 +268,41 @@
         return osis
 
 
+    def cvtRelaxedConformanceRemaps(osis, relaxedConformance):
+        if not relaxedConformance:
+            return osis
+
+        # \tr#: DEP: map to \tr
+        osis = re.sub(r'\\tr\d\b', r'\\tr', osis)
+
+        # remapped 2.0 periphs
+        # \pub
+        osis = re.sub(r'\\pub\b\s', '\\periph Publication Data\n', osis)
+        # \toc : \periph Table of Contents
+        osis = re.sub(r'\\toc\b\s', '\\periph Table of Contents\n', osis)
+        # \pref
+        osis = re.sub(r'\\pref\b\s', '\\periph Preface\n', osis)
+        # \maps
+        osis = re.sub(r'\\maps\b\s', '\\periph Map Index\n', osis)
+        # \cov
+        osis = re.sub(r'\\cov\b\s', '\\periph Cover\n', osis)
+        # \spine
+        osis = re.sub(r'\\spine\b\s', '\\periph Spine\n', osis)
+        # \pubinfo
+        osis = re.sub(r'\\pubinfo\b\s', '\\periph Publication Information\n', osis)
+
+        # \intro
+        osis = re.sub(r'\\intro\b\s', '\\id INT\n', osis)
+        # \conc
+        osis = re.sub(r'\\conc\b\s', '\\id CNC\n', osis)
+        # \glo
+        osis = re.sub(r'\\glo\b\s', '\\id GLO\n', osis)
+        # \idx
+        osis = re.sub(r'\\idx\b\s', '\\id TDX\n', osis)
+
+        return osis
+
+
     def cvtIdentification(osis, relaxedConformance):
         """
         Identification
@@ -289,6 +324,10 @@
 
         # \rem_text...
         osis = re.sub(r'\\rem\b\s+(.+)', r'<!-- rem - \1 -->', osis)
+        
+        # \restore: unpublished, seek example
+        if relaxedConformance:
+            osis = re.sub(r'\\restore\b\s+(.+)', r'<!-- restore - \1 -->', osis)
 
         # \h#_text...
         osis = re.sub(r'\\h\b\s+(.+)\s*\n', r'<title type="runningHead">\1</title>\n', osis)
@@ -533,6 +572,13 @@
         # \b
         osis = re.sub(r'\\b\b\s?', r'<lb type="x-p"/>', osis)
 
+        if relaxedConformance:
+            # TODO: \phi: DEP: Paragraph text, indented with hanging indent
+            # TODO: \ps: DEP: Paragraph text, no break with next paragraph text at chapter boundary
+            # TODO: \psi: DEP: Paragraph text, indented, with no break with next paragraph text (at chapter boundary)
+            # TODO: \p#: Front or back matter text paragraph, level # (if multiple levels)
+            pass
+
         return osis
 
 
@@ -669,6 +715,12 @@
         # \xt_
         note = re.sub(r'\\xt\s', r'', note)
 
+        if relaxedConformance:
+            # TODO: \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
+            # TODO: \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference.
+            pass
+
+
         # \xo_##SEP##
         note = re.sub(r'\\xo\b\s(.+?)(?=(\\x|'+u'﷟))', u'﷟'+r'<reference>\1</reference>', note)
 
@@ -745,6 +797,11 @@
         # \sls_...\sls*
         osis = re.sub(r'\\sls\b\s*(.+?)\\sls\*', r'<foreign>/1</foreign>', osis, flags=re.DOTALL)  # find a better mapping than <foreign>?
 
+        if relaxedConformance:
+            # TODO: \addpn...\addpn*: For chinese words to be dot underline & underline
+            # TODO: \k#: Concordance main entry text or keyword, level #
+            pass
+
         return osis
 
 
@@ -838,6 +895,10 @@
         # \wh_...\wh*
         osis = re.sub(r'\\wh\s+(.+?)(\s*)\\wh\*', r'\1<index index="Hebrew" level1="\1"/>\2', osis, flags=re.DOTALL)
 
+        if relaxedConformance:
+            # TODO: \wr...\wr*: OBS:  Auxiliary - Wordlist/Glossary Reference
+            pass
+
         return osis
 
 
@@ -858,6 +919,7 @@
                 periph += 'x-unknown'
             periph += '">\n' +  contents + '</div>\n'
             return periph
+
         osis = re.sub(r'\\periph\s+([^\n]+)\s*\n(.+?)(?=(</div type="book">|\\periph\s+))', tagPeriph, osis, flags=re.DOTALL)
 
         return osis
@@ -891,6 +953,14 @@
         supported: \z<Extension>
         We can't really know what these mean, but will preserve them as <milestone/> elements.
         """
+        if relaxedConformance:
+            # publishing assistant markers
+            # \zpa-xb...\zpa-xb* : \periph Book
+            # \zpa-xc...\zpa-xc* : \periph Chapter
+            # \zpa-xv...\zpa-xv* : \periph Verse
+            # \zpa-xd...\zpa-xd* : \periph Description
+            pass
+
         # \z
         osis = re.sub(r'\\z([^\s]+)', r'<milestone type="x-usfm-z-\1"/>', osis)
 
@@ -983,6 +1053,7 @@
 
     # call individual conversion processors in series
     osis = cvtPreprocess(osis, relaxedConformance)
+    osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance)
     osis = cvtIdentification(osis, relaxedConformance)
     osis = cvtIntroductions(osis, relaxedConformance)
     osis = cvtTitles(osis, relaxedConformance)
@@ -1010,7 +1081,6 @@
     return osis
 
 
-
 def writeOSISHeader(oFile, workID, lang='en'):
     oFile.write('<?xml version="1.0" encoding="UTF-8"?>\n<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.'+OSISversion+'.xsd">\n<osisText osisRefWork="Bible" xml:lang="' + lang + '" osisIDWork="' + workID + '">\n<header>\n<work osisWork="' + workID + '"/>\n</header>\n')
 
@@ -1187,39 +1257,3 @@
             print('Unhandled USFM tags: ' + ', '.join(sorted(unhandledTags)) + ' (' + str(len(unhandledTags)) + ' total)')
             if not relaxedConformance:
                 print('Consider using the -r option for relaxed markup processing.')
-
-
-# TOOD: relaxed tags to add:
-# \restore: unpublished, seek example
-# \addpn...\addpn*: For chinese words to be dot underline & underline
-# \p#: Front or back matter text paragraph, level # (if multiple levels)
-# \k#: Concordance main entry text or keyword, level #
-# \xtSee..\xtSee*: Concordance and Names Index markup for an alternate entry target reference.
-# \xtSeeAlso...\xtSeeAlso: Concordance and Names Index markup for an additional entry target reference.
-# \tr#: DEP: map to \tr
-# \phi: DEP: Paragraph text, indented with hanging indent
-# \ps: DEP: Paragraph text, no break with next paragraph text at chapter boundary
-# \psi: DEP: Paragraph text, indented, with no break with next paragraph text (at chapter boundary)
-# \wr...\wr*: OBS:  Auxiliary - Wordlist/Glossary Reference
-
-# 2.0 periphs to remap
-# FRONT MATTER
-# \pub : \periph Publication Data
-# \toc : \periph Table of Contents
-# \pref : \periph Preface
-# \intro : \periph Introduction
-# BACK MATTER
-# \conc : \periph Concordance
-# \glo : \periph Glossary
-# \idx : \periph Index
-# \maps : \periph Map Index
-# OTHER
-# \cov : \periph Cover
-# \spine : \periph Spine
-# \pubinfo: \periph Publication Information
-
-# publishing assistant markers
-# \zpa-xb...\zpa-xb* : \periph Book
-# \zpa-xc...\zpa-xc* : \periph Chapter
-# \zpa-xv...\zpa-xv* : \periph Verse
-# \zpa-xd...\zpa-xd* : \periph Description




More information about the sword-cvs mailing list