[sword-svn] r209 - trunk/modules/perlconverters

Thu Jun 18 15:37:02 MST 2009

Author: chrislit
Date: 2009-06-18 15:37:02 -0700 (Thu, 18 Jun 2009)
New Revision: 209

Modified:
   trunk/modules/perlconverters/usfm2osis.pl
Log:
removed trailing whitespace from file

Modified: trunk/modules/perlconverters/usfm2osis.pl
===================================================================

--- trunk/modules/perlconverters/usfm2osis.pl	2009-06-18 22:31:19 UTC (rev 208)
+++ trunk/modules/perlconverters/usfm2osis.pl	2009-06-18 22:37:02 UTC (rev 209)
@@ -6,11 +6,11 @@
 
 # Copyright (c) 2002-2008 CrossWire Bible Society <http://www.crosswire.org/>
 # All rights reserved.
-# 
+#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
-# 
+#
 #     * Redistributions of source code must retain the above copyright
 #        notice, this list of conditions and the following disclaimer.
 #     * Redistributions in binary form must reproduce the above copyright
@@ -21,7 +21,7 @@
 #       its contributors may be used to endorse or promote products
 #       derived from this software without specific prior written
 #       permission.
-# 
+#
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
@@ -44,7 +44,7 @@
 $date = "2009-02-08";
 # Sets the version of OSIS used in the OSIS header
 $osisVersion = "2.1.1";
-# Stores the USFM Version 
+# Stores the USFM Version
 $usfmVersion = "2.1"; # The USFM reference document can be found at http://confluence.ubs-icap.org/display/USFM/Home;jsessionid=97071C5C1E562036A1CAF4FF77147565 (as of 2008-07-07)
 
 # This is the hash which maps the conversion of USFM book abbreviations to OSIS book abbreviations. ***I would like to add the ability to access an external file to provide options for other languages. In other words, in preparing a USFM file for conversion, a separate file could be created which could be used to map the conversion of abbreviated book names in other languages to OSIS. This would be especially useful for cross-references, but I haven't figured out how to do it yet.
@@ -172,7 +172,7 @@
 	push (@filedata, $sfline);
     }
     close (SFM);
-    
+
     $ollevel = 0;
     $vers = 0;
     $chap = 0;
@@ -183,13 +183,13 @@
     # Creates array for the attribute "n" in cross-references
     @nCR = (a .. z);
     # Sets the initial value for the attribute "n" in cross-references.
-    $nCR = @nCR [0]; 
-    
+    $nCR = @nCR [0];
+
     #encoding stuff
     for ($i = 0; $i < scalar(@filedata); $i++) {
 	$line = @filedata[$i];
 	$line =~ s/[\r\n]//g;
-	
+
 	### Basic XML entity encoding
 	$line =~ s/&(?![a-zA-Z0-9])/&amp;/g;
 	$line =~ s/<< ?/\@/g;
@@ -240,7 +240,7 @@
 	    openTag("<\/div type=\"book\">");
 	    $line = "";
 	}
-	
+
 	# \h (running header--discard)
 	if ($line =~ /^\\h\b/) {
 	    $line = "";
@@ -265,7 +265,7 @@
 	if ($line =~ /^\\toc\d\b/) {
 	    $line = "";
 	}
-	
+
 	### Introduction--Markers Supported: \imt#, \is#, \iot, \io#, \ip
 	#### Markers Not Yet Supported: \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \ior...\ior*, \iex, \imte, \ie
 
@@ -275,7 +275,7 @@
 #	    openTag("<\/div>");
 #	}
 
-	# \imt major title 
+	# \imt major title
 	if ($line =~ /^\\imt\b\s*(.+)/) {
 	    $line = "<div type=\"introduction\">\n<title>$1<\/title>";
 	    openTag("<\/div>");
@@ -291,12 +291,12 @@
 	    $line = "<div type=\"section\"><title>$2<\/title>";
 	    openTag("<\/div>");
 	}
-	
+
 	# \iot introduction outline title
 	if ($line =~ /^\\iot\b\s*(.*)/) {
 	    $line = "<div type=\"outline\">\n<title>$1<\/title>";
 	}
-	
+
 	# \io\d+ introduction outline item
 	if ($line =~ /^\\io(\d+)\b\s*(.*)/) {
 	    if ($ollevel == $1) {
@@ -321,7 +321,7 @@
 		}
 		$line .= "<item>$2<\/item>\n";
 	    }
-	
+
 	    if (@filedata[$i+1] !~ /^\\io/) {
 		while ($ollevel > 0) {
 		    $line .= "\n<\/list>";
@@ -340,9 +340,9 @@
 	}
 
 	### Titles, Headings, and Labels (elsewhere?)--Markers Supported: \d, \ms#, \s#, \mt#, \r, \sp
-	#### Markers Not Yet Supported: \mte#, \mr, \sr, \rq...\rq* 
-	
-	# \d \ms majorSection 
+	#### Markers Not Yet Supported: \mte#, \mr, \sr, \rq...\rq*
+
+	# \d \ms majorSection
 	if ($line =~ /^\\(ms|d)\b\s*(.+)/) {
 	    push (@outdata, closeTag("<\/p>"));
 	    push (@outdata, closeTag("<\/div type=\"majorSection\">"));
@@ -482,7 +482,7 @@
 	#### Markers Not Yet Supported: \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \cls, \li#, \pc, \pr, \ph#, \b
 
 	# Hack to solve an issue in a module that used <R> for linebreaks in the usfm files--may be commented out (not USFM 2.1)
-	$line =~ s/\\lb\*/<lb \/>/g; 
+	$line =~ s/\\lb\*/<lb \/>/g;
 
 	# \p paragraph (From Chapters and Verses)
 	if ($line =~ /^\\p\b\s*/) {
@@ -509,17 +509,17 @@
 	}
 
 	# \b
-	$line =~ s/\\b\b//; 
+	$line =~ s/\\b\b//;
 	# \m
-	$line =~ s/\\m\b//; 
+	$line =~ s/\\m\b//;
 	# \nb
-	$line =~ s/\\nb\b//; 
+	$line =~ s/\\nb\b//;
 
 	### Poetry--Markers Supported: \q#, \qs...\qs*, \qc, \qm#
 	#### Markers Not Yet Supported: \qr, \qa, \qac...\qac*, \b
 
 	# \qt...\qt*, OT quotation (handle early)
-	$line =~ s/\\qt\b\s*(.*?)\\qt\*/<seg type="otPassage">$1<\/seg>/g; 
+	$line =~ s/\\qt\b\s*(.*?)\\qt\*/<seg type="otPassage">$1<\/seg>/g;
 
 	# \q line
 	if ($line =~ /^\\q/) {
@@ -553,7 +553,7 @@
 		}
 	    }
 	}
-	
+
 	# \qs...\qs*, Selah
 	$line =~ s/\\qs\b\s*([^\\]+)\\qs\*/<l type="selah"> $1<\/l>/;
 
@@ -570,7 +570,7 @@
 		}
 		$line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g;
 		$line = "<row>$line<\/row>";
-	    }	
+	    }
 
 	    if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
 		$line = $1;
@@ -592,7 +592,7 @@
 		    $table = 1;
 		}
 		$line = "<row><cell role=\"label\">$1<\/cell>\n";
-	    }	
+	    }
 	    elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
 		$line = "<cell role=\"label\">$1<\/cell>\n";
 	    }
@@ -610,16 +610,16 @@
 		    $line .= "<\/row><\/table>\n";
 		    $table = 0;
 		}
-	    }	
+	    }
 	    elsif ($line =~ /^\\tb\d+\b\s*(.*)/) {
 		$line = "<cell>$1<\/cell>\n";
 		if (@filedata[$i+1] !~ /\\tb/) {
 		    $line .= "<\/row><\/table>\n";
 		    $table = 0;
 		}
-	    }	
+	    }
 	}
-	
+
 	sub parseRef {
 	    $ref = @_[0];
 
@@ -633,17 +633,17 @@
 	}
 
 	### Footnotes--Markers Supported: \fk, \fq, \f...\f*, \fv, \fqa
-	####Markers Not Yet Supported: \fe...\fe*, \fr, \fl, \fp, \ft, \fdc...\fdc*, \fm...\fm* 
-	
+	####Markers Not Yet Supported: \fe...\fe*, \fr, \fl, \fp, \ft, \fdc...\fdc*, \fm...\fm*
+
 	sub footnoteHandler {
 	    $note = @_[0];
 	    $note = "<note>$note</note>";
-	    
+
 	    # \fk Catch Words
 	    $note =~ s/\\fk\s(.+?)\\fk\*/<catchWord>$1<\/catchWord>/g;
 	    $note =~ s/\\fk\s(.+?)(?=\\f)/<catchWord>$1<\/catchWord>/g;
 	    $note =~ s/\\fk\*//g;
-	    
+
 	    # \fq Quotations in Footnotes
 	    # CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document.
 	    $note =~ s/\\fq\s(.+?)\\fq\*/<catchWord>$1<\/catchWord>/g;
@@ -657,18 +657,18 @@
 
 	    # \fv Footnote verse number
 	    $note =~ s/\\fv\s(.+?)\\fv\*/<reference osisID=\"$book.$chap.$1\">$1<\/reference>/g;
-	    $note =~ s/\\fv\s*(\d+)\b\s*(?=\\f)/<reference osisID=\"$book.$chap.$1\">$1<\/reference>/g;	
+	    $note =~ s/\\fv\s*(\d+)\b\s*(?=\\f)/<reference osisID=\"$book.$chap.$1\">$1<\/reference>/g;
 	    $note =~ s/\\fv\*//g;
-    
+
 	    # \fr Footnote origin reference (the verse where the fn appears)
 	    while ($note =~ /\\fr\s*(.+?)\s*(?=\\f)/) {
 		$sourceVal = parseRef($1);
-		$nFN++; 		
+		$nFN++;
 #		$note =~ s/\\fr\s*(.+?)\s*(?=\\f)//;
 		$note =~ s/\\fr\s*//;
 		$note =~ s/<note>/<note n="$nFN">/;
 	    }
-	    
+
 	    # \ft Footnote text
 	    $note =~ s/\\ft\s//g;
 	    $note =~ s/\\ft\*//g;
@@ -678,23 +678,23 @@
 
 	    # \f Footnote opener
 	    $note =~ s/\\f\b\s*([^\s]\s*)?//;
-	    
+
 	    return $note;
 	}
-	
+
 	$line =~ s/(\\f\b.+?\\f\*)/footnoteHandler($1)/eg;
-	
+
 	### Crossreferences--Markers Supported: \x + \xo...\x*, \xk, \xq, \xt
-	#### Markers Not Yet Supported: \xdc...\xdc* 
+	#### Markers Not Yet Supported: \xdc...\xdc*
 	sub xrefHandler {
 	    $xref = @_[0];
 	    $xref = "<note type=\"crossReference\">$xref</note>";
-	    
+
 	    # \xk Catch Words
 	    $xref =~ s/\\xk\s(.+?)\\xk\*/<catchWord>$1<\/catchWord>/g;
 	    $xref =~ s/\\xk\s(.+?)(?=\\x)/<catchWord>$1<\/catchWord>/g;
 	    $xref =~ s/\\xk\*//g;
-	    
+
 	    # \xq Quotations in Footnotes
 	    # CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document.
 	    $xref =~ s/\\xq\s(.+?)\\xq\*/<catchWord>$1<\/catchWord>/g;
@@ -704,7 +704,7 @@
 	    # \xo Footnote origin reference (the verse where the fn appears)
 	    while ($xref =~ /\\xo\s*(.+?)\s*(?=\\x)/) {
 		$sourceVal = parseRef($1);
-		$xFN++; 
+		$xFN++;
 #		$xref =~ s/\\xo\s*(.+?)\s*(?=\\x)//;
 		$xref =~ s/\\xo\s*//;
 		$xref =~ s/<note type=\"crossReference\">/<note type=\"crossReference\" n="$xFN">/;
@@ -720,19 +720,19 @@
 
 	    # \x Footnote opener
 	    $xref =~ s/\\x\b\s*([^\s]\s*)?//;
-	    
+
 	    return $xref;
 	}
-	
+
 	$line =~ s/(\\x\b.+?\\x\*)/xrefHandler($1)/eg;
 
-	
+
 	# crossReference osisRef=""
 	$line =~ s/<reference osisRef="">([^<]+)<\/reference>/<reference osisRef="$1">$1<\/reference>/g;
 	$line =~ s/osisRef="\s/osisRef="\s/g;
 	$line =~ s/\s">/">/g;
-	$line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period 
-	
+	$line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period
+
 	$line =~ s/<reference osisRef="([^\"]+):([^\"]+)"/<reference osisRef="$1\.$2"/g; # Gen 1:1
 	$line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.(\d+)-(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$2\.$4"/g; # Gen 1:1-2
 	$line =~ s/<reference osisRef="([^\.\"]+).(\d+):(\d+)-(\d+).(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$4\.$5"/g; # Gen 1:1-2:3
@@ -744,19 +744,19 @@
 	#### Markers Not Yet Supported: Special Text: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \ord...\ord*, \sig...\sig*, \sls...\sls*, \wj...\wj*; Character Styling: \em...\em*, \bd...\bd*, \bdit...\bdit*, \no...\no*, \sc...\sc*; Spacing and Breaks: !$, //, \pb; Special Features: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh*
 
 	# \it...\it*, italic text
-	$line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g; 
+	$line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g;
 
 	# \nd...\nd*, Divine Name
-	$line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g; 
+	$line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g;
 
 	# \pn...\pn*, Proper name
-	$line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g; 
+	$line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g;
 
 	# \tl...\tl*, Foreign Langauge (treated here merely as transliterated text)
-	$line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g; 
+	$line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g;
 
 	# \add...\add*, text added for translation purposes
-	$line =~ s/\\add\b\s*(.*?)\\add\*/<transChange type=\"added\">$1<\/transChange>/g; 
+	$line =~ s/\\add\b\s*(.*?)\\add\*/<transChange type=\"added\">$1<\/transChange>/g;
 
 	$line =~ s/_/ /g;