[sword-svn] r363 - trunk/modules/python

chrislit at crosswire.org chrislit at crosswire.org
Sat Aug 4 16:37:28 MST 2012


Author: chrislit
Date: 2012-08-04 16:37:28 -0700 (Sat, 04 Aug 2012)
New Revision: 363

Modified:
   trunk/modules/python/usfm2osis.py
Log:
converted lowercase p tag to uppercase P for consistency


Modified: trunk/modules/python/usfm2osis.py
===================================================================
--- trunk/modules/python/usfm2osis.py	2012-08-04 11:24:44 UTC (rev 362)
+++ trunk/modules/python/usfm2osis.py	2012-08-04 23:37:28 UTC (rev 363)
@@ -318,7 +318,7 @@
         supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp*
         """
         # \c_#  ###TESTED###
-        osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: u'󠁃<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) +  u'<chapter eID="$BOOK$.' + m.group(1) + u'"/>󠁰\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: u'󠁃<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) +  u'<chapter eID="$BOOK$.' + m.group(1) + u'"/>󠁐\n', osis, flags=re.DOTALL)
 
         # \cp_#
         # \ca_#\ca*
@@ -372,7 +372,7 @@
         supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b
         """
         # \p(_text...)  ###TESTED###
-        osis = re.sub(r'\\p\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'󠁰<p>\n' + m.group(1) + u'󠁰</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\p\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'󠁐<p>\n' + m.group(1) + u'󠁐</p>\n', osis, flags=re.DOTALL)
 
         # \pc(_text...)
         # \pr(_text...)
@@ -385,19 +385,19 @@
         # \mi(_text...)
         # \nb  ###TESTED###
         pType = {'pc':'x-center', 'pr':'x-right', 'm':'x-noindent', 'pmo':'x-embedded-opening', 'pm':'x-embedded', 'pmc':'x-embedded-closing', 'pmr':'x-right', 'pi':'x-indented-1', 'pi1':'x-indented-1', 'pi2':'x-indented-2', 'pi3':'x-indented-3', 'pi4':'x-indented-4', 'pi5':'x-indented-5', 'mi':'x-noindent-indented', 'nb':'x-nobreak'}
-        osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'󠁰<p type="' + pType[m.group(1)]  + '">\n' + m.group(2) + u'󠁰</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\(pc|pr|m|pmo|pm|pmc|pmr|pi|pi1|pi2|pi3|pi4|pi5|mi|nb)\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'󠁐<p type="' + pType[m.group(1)]  + '">\n' + m.group(2) + u'󠁐</p>\n', osis, flags=re.DOTALL)
 
         # \cls_text...
-        osis = re.sub(r'\\m\s+(.+?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'󠁰<closer>' + m.group(1) + u'󠁰</closer>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\m\s+(.+?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'󠁐<closer>' + m.group(1) + u'󠁐</closer>\n', osis, flags=re.DOTALL)
 
         # \ph#(_text...)
         # \li#(_text...)  ###TESTED###
         osis = re.sub(r'\\ph\b\s*', r'\\li ', osis)
         osis = re.sub(r'\\ph(\d+)\b\s*', r'\\li\1 ', osis)
-        osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u'󠁂󠁃󠁰󠁄'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\li(\d+)\b\s*(.*?)(?=(['+u'󠁂󠁃󠁰󠁄'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\li\b\s*(.*?)(?=(['+u'󠁂󠁃󠁐󠁄'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-1">\1</item>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\li(\d+)\b\s*(.*?)(?=(['+u'󠁂󠁃󠁐󠁄'+r']|\\li[\d\s]|<lb\b|<title\b))', r'<item type="x-indent-\1">\2</item>', osis, flags=re.DOTALL)
         osis = osis.replace('\n</item>', '</item>\n')
-        osis = re.sub(u'(<item [^󠁂󠁃󠁰󠁄]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
+        osis = re.sub(u'(<item [^󠁂󠁃󠁐󠁄]+</item>)', r'<list>\1</list>', osis, flags=re.DOTALL)
 
         # \b  ###TESTED###
         osis = re.sub(r'\\b\b\s?', r'<lb type="p"/>', osis)
@@ -414,17 +414,17 @@
         osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL)
 
         # \q#(_text...)  ###TESTED###
-        osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u'󠁂󠁃󠁰󠁄'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
-        osis = re.sub(r'\\q(\d+)\b\s*(.*?)(?=(['+u'󠁂󠁃󠁰󠁄'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\q\b\s*(.*?)(?=(['+u'󠁂󠁃󠁐󠁄'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="1">\1</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\q(\d+)\b\s*(.*?)(?=(['+u'󠁂󠁃󠁐󠁄'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', r'<l level="\1">\2</l>', osis, flags=re.DOTALL)
 
         # \qr_text...
         # \qc_text...
         # \qm#(_text...)
         qType = {'qr':'x-right', 'qc':'x-center', 'qm':'x-embedded" level="1', 'qm1':'x-embedded" level="1', 'qm2':'x-embedded" level="2', 'qm3':'x-embedded" level="3', 'qm4':'x-embedded" level="4', 'qm5':'x-embedded" level="5'}
-        osis = re.sub(r'\\(qr|qc|qm\d+)\b\s*(.*?)(?=(['+u'󠁂󠁃󠁰󠁄'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\(qr|qc|qm\d+)\b\s*(.*?)(?=(['+u'󠁂󠁃󠁐󠁄'+r']|\\q[\d\s]|<l\b|<lb\b|<title\b))', lambda m: r'<l type="' + qType[m.group(1)] + '">' + m.group(2) + '</l>', osis, flags=re.DOTALL)
 
         osis = osis.replace('\n</l>', '</l>\n')
-        osis = re.sub(u'(<l [^󠁂󠁃󠁰󠁄]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
+        osis = re.sub(u'(<l [^󠁂󠁃󠁐󠁄]+</l>)', r'<lg>\1</lg>', osis, flags=re.DOTALL)
 
         # \b  ###TESTED###
         osis = re.sub('(<lg>.+?</lg>)', lambda m: m.group(1).replace(r'<lb type="p"/>', r'</lg><lg>'), osis, flags=re.DOTALL) # re-handle \b that occurs within <lg>
@@ -444,7 +444,7 @@
         supported: \tr, \th#, \thr#, \tc#, \tcr#
         """
         # \tr_
-        osis = re.sub(r'\\tr\b\s*(.*?)(?=(['+u'󠁂󠁃󠁰󠁄'+r']|\\tr\s|<lb\b|<title\b))', r'<row>\1</row>', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\tr\b\s*(.*?)(?=(['+u'󠁂󠁃󠁐󠁄'+r']|\\tr\s|<lb\b|<title\b))', r'<row>\1</row>', osis, flags=re.DOTALL)
 
         # \th#_text...
         # \thr#_text...
@@ -599,7 +599,7 @@
         osis = re.sub(r'\\k\s+(.+?)\\k\*', r'<seg type="keyword">\1</seg>', osis, flags=re.DOTALL)
 
         # \lit
-        osis = re.sub(r'\\lit\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'󠁰<p type="x-liturgical">\n' + m.group(1) + u'󠁰</p>\n', osis, flags=re.DOTALL)
+        osis = re.sub(r'\\lit\s+(.*?)(?=(\\(m|p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: u'󠁐<p type="x-liturgical">\n' + m.group(1) + u'󠁐</p>\n', osis, flags=re.DOTALL)
 
         # \dc_...\dc*  #### TODO: Find an example---should this really be transChange?
         osis = re.sub(r'\\dc\b\s*(.+?)\\dc\*', r'<transChange type="added" editions="dc">\1</transChange>', osis, flags=re.DOTALL)
@@ -803,9 +803,9 @@
 
     def osisReorderAndCleanup(osis):
         # assorted re-orderings
-        osis = re.sub(u'(󠁰<chapter eID=.+?\n)(<verse eID=.+?>󠁖)\n?', r'\2\n\1', osis)
+        osis = re.sub(u'(󠁐<chapter eID=.+?\n)(<verse eID=.+?>󠁖)\n?', r'\2\n\1', osis)
         osis = re.sub(u'([󠀰󠀱󠀲]</div>)([^󠀰󠀱󠀲]*<chapter eID.+?>)', r'\2\1', osis)
-        osis = re.sub(u'(󠁰</p>\n?󠁰<p>)\n?(<verse eID=.+?>󠁖)\n?', r'\2\n\1\n', osis)
+        osis = re.sub(u'(󠁐</p>\n?󠁐<p>)\n?(<verse eID=.+?>󠁖)\n?', r'\2\n\1\n', osis)
         osis = re.sub(u'\n(<verse eID=.+?>󠁖)', r'\1\n', osis)
         osis = re.sub(u'\n*(<l.+?>)(<verse eID=.+?>[󠁖\n]*<verse osisID=.+?>)', r'\2\1', osis)
 
@@ -813,7 +813,7 @@
         osis = re.sub(r'(</[^\s>]+) [^>]*>', r'\1>', osis)
         osis = osis.replace(r'<lb type="p"/>', r'<lb/>')
         # delete Unicode tags
-        for c in u'󠁂󠁃󠁖󠁰󠁄󠀰󠀱󠀲󠀳󠀴󠀵':
+        for c in u'󠁂󠁃󠁖󠁐󠁄󠀰󠀱󠀲󠀳󠀴󠀵':
             osis = osis.replace(c, '')
 
         for endBlock in ['p', 'div', 'note', 'l', 'lg', 'chapter', 'verse']:




More information about the sword-cvs mailing list