[sword-svn] r141 - trunk/modules/perlconverters

chrislit at www.crosswire.org chrislit at www.crosswire.org
Wed Jun 11 23:06:03 MST 2008


Author: chrislit
Date: 2008-06-11 23:06:02 -0700 (Wed, 11 Jun 2008)
New Revision: 141

Modified:
   trunk/modules/perlconverters/usfm2osis.pl
Log:
fixed a few bugs, improved a few things for broader USFM support

Modified: trunk/modules/perlconverters/usfm2osis.pl
===================================================================
--- trunk/modules/perlconverters/usfm2osis.pl	2008-04-03 09:46:47 UTC (rev 140)
+++ trunk/modules/perlconverters/usfm2osis.pl	2008-06-12 06:06:02 UTC (rev 141)
@@ -1,10 +1,10 @@
 #!/usr/bin/perl
 
-## USFM to OSIS (2.0) converter
+## USFM to OSIS (2.1.1) converter
 
 ## Licensed under the standard BSD license:
 
-# Copyright (c) 2002,2003,2007 CrossWire Bible Society <http://www.crosswire.org/>
+# Copyright (c) 2002-2008 CrossWire Bible Society <http://www.crosswire.org/>
 # All rights reserved.
 # 
 # Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,8 @@
 
 #########################################################################
 
-$version = "1.1";
-$date = "2007-04-23";
+$version = "1.2";
+$date = "2008-05-16";
 $osisVersion = "2.1.1";
 
 %OSISbook = (
@@ -98,7 +98,7 @@
     }
 }
 
-push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
+push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
 
 $tagStack = "<\/osisText><\/osis>";
 $chapClose = "";
@@ -108,7 +108,7 @@
     $tag = @_[0];
 
     if ($tagStack =~ /$tag/) {
-	$tagStack =~ s/(.*?$tag)//;
+	$tagStack =~ s/^(.*?$tag)//;
 	$taglist = $1;
 	$taglist =~ s/>/>\n/g;
 	$taglist =~ s/(<\/\w+)\s+[^>]+>/$1>/g;
@@ -126,7 +126,7 @@
 }
 
 foreach $file (@files) {
-    @filedata = `uconv -f windows-1252 -t utf-8 $file`;
+    @filedata = `cat \"$file\"`;
 
     $ollevel = 0;
     $vers = 0;
@@ -159,18 +159,25 @@
 
 	### File Identification
 
+	$line =~ s/\\v\b\s+(\d+)(\-\d+|\s*\\v\b\s+\d+)\s*\\v\b\s+(\d+)/\\v $1\-$3/;
+	$line =~ s/\\v\b\s+(\d+)\s*\\v\b\s+(\d+\-)?(\d+)/\\v $1\-$3/;
+	$line =~ s/^\\(p[is]|mi)\b/\\p/;
+	$line =~ s/^\\li\b/\\p/; #\li isn't part of USFM, so we'll make it \p
+
 	# \id (book marker)
 	if ($line =~ /^\\id\b\s*([^ ]*)/) {
 	    $book = $OSISbook{$1};
 	    $chap = 0;
-	    if ($chapClose =~ "<verse") {
-		push (@outdata, $verseClose); # close verse
-		$verseClose = "";
+	    if ($versClose =~ /<verse/) {
+		push (@outdata, $versClose); # close verse
+		$versClose = "";
 	    }
-	    if ($chapClose =~ "<chapter") {
+#	    push (@outdata, closeTag("<\/div[^>]*?>")); # close section
+	    if ($chapClose =~ /<chapter/) {
 		push (@outdata, $chapClose); # close chapter
 		$chapClose = "";
 	    }
+
 	    push (@outdata, closeTag("<\/div type=\"book\">")); #close book
 	    if ($book eq "") {
 		$book = "UnknownUSFMBook";
@@ -199,6 +206,11 @@
 	    openTag("<\/div>");
 	}
 
+	# \imt major title
+	if ($line =~ /^\\imt\b\s*(.+)/) {
+	    $line = "<title>$1<\/title>";
+	}
+
 	# \is introduction section title
 	if ($line =~ /^\\is(\d*)\b\s*(.*)/) {
 	    $level = $1;
@@ -222,13 +234,16 @@
 	    elsif ($ollevel > $1) {
 		$line = "";
 		while ($ollevel > $1) {
-		    $line .= "<\/list>";
+		    $line .= "<\/list><\/item>\n";
 		    $ollevel--;
 		}
 		$line .= "<item>$2<\/item>";
 	    }
 	    elsif ($ollevel < $1) {
 		$line = "";
+		if ($ollevel != 0) {
+		    $line .= "<item>";
+		}
 		while ($ollevel < $1) {
 		    $line .= "<list>\n";
 		    $ollevel++;
@@ -239,6 +254,7 @@
 	    if (@filedata[$i+1] !~ /^\\io/) {
 		while ($ollevel > 0) {
 		    $line .= "\n<\/list>";
+		    if ($ollevel > 1) {$line .= "<\/item>";}
 		    $ollevel--;
 		}
 		if ($ollevel == 0) {
@@ -267,7 +283,7 @@
 	    push (@outdata, $versClose);
 	    $versClose = "";
 	    push (@outdata, closeTag("<\/p>"));
-	    if ($chapClose =~ "<chapter") {
+	    if ($chapClose =~ /<chapter/) {
 		push (@outdata, $chapClose); # close previous chapter
 		$chapClose = "";
 	    } else {
@@ -279,13 +295,13 @@
 	    $line =~ s/\\c\b\s*([^ ]*)//;
 	}
 
-	# \d majorSection
-	if ($line =~ /^\\d\b\s*(.+)/) {
+	# \d \ms majorSection
+	if ($line =~ /^\\(ms|d)\b\s*(.+)/) {
 	    push (@outdata, closeTag("<\/p>"));
 	    push (@outdata, closeTag("<\/div type=\"majorSection\">"));
 	    push (@outdata, "<div type=\"majorSection\">\n");
 	    openTag("<\/div type=\"majorSection\">");
-	    $line =~ s/\\d\b\s*(.+)/<title>$1<\/title>/;
+	    $line =~ s/\\(ms|d)\b\s*(.+)/<title>$1<\/title>/;
 	}
 
 	# \s section
@@ -300,18 +316,18 @@
 	    }
 	}
 
-	# \ss subSection
-	if ($line =~ /^\\ss\b\s*(.+)/) {
-	    $line =~ s/\\ss\b\s*(.+)/<title>$1<\/title>/;
+	# \ss \s2 subSection
+	if ($line =~ /^\\s[s2]\b\s*(.+)/) {
+	    $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/;
 	}
 
-	# \sss x-subsubSection
-	if ($line =~ /^\\sss\b\s*(.+)/) {
+	# \sss \s3 x-subsubSection
+	if ($line =~ /^\\s(ss|3)\b\s*(.+)/) {
 	    push (@outdata, closeTag("<\/p>"));
 	    push (@outdata, closeTag("<\/div type=\"x=subSubSection\">"));
 	    push (@outdata, "<div type=\"x-subSubSection\">\n");
 	    openTag("<\/div type=\"x-subSubSection\">");
-	    $line =~ s/\\sss\b\s*(.+)/<title>$1<\/title>/;
+	    $line =~ s/\\s(ss|3)\b\s*(.+)/<title>$2<\/title>/;
 	}
 
 	# \p paragraph
@@ -386,6 +402,12 @@
 	    $line =~ s/(<note [^>]+>)([A-Z][^a-z:]*?):/$1<catchWord>$2<\/catchWord>/g;
 	}
 	
+	# \f if we STILL have notes, just change them to <note>
+	if ($line =~ /\\f\b\s*/) {
+	    $line =~ s/\\f\b\s*/<note>/;
+	}
+	
+	
 	# \x crossReference
 	for ($j = 2; $j > 0; $j--) {
 	    if ($line =~ /\\x\b\s*\\rf\s*([^\\]+)\\rf\*\s*/) {
@@ -466,6 +488,30 @@
 
 	# \th table heading
 	if ($line =~ /^\\t/) {
+	    if ($line =~ /^\\tr\b\s*(\\th.*)/) {
+		$line = "$1";
+		if ($table != 1) {
+		    push (@outdata, "<table>\n");
+		    $table = 1;
+		}
+		$line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g;
+		$line = "<row>$line<\/row>";
+	    }	
+
+	    if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
+		$line = $1;
+		if ($table != 1) {
+		    push (@outdata, "<table>\n");
+		    $table = 1;
+		}
+		$line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/<cell>$1<\/cell>/g;
+		$line = "<row>$line<\/row>";
+		if (@filedata[$i+1] !~ /\\tr/) {
+		    $line .= "<\/table>\n";
+		    $table = 0;
+		}
+	    }
+
 	    if ($line =~ /^\\th1\b\s*(.*)/) {
 		if ($table != 1) {
 		    push (@outdata, "<table>\n");
@@ -475,7 +521,8 @@
 	    }	
 	    elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
 		$line = "<cell role=\"label\">$1<\/cell>\n";
-	    }	
+	    }
+
 	    if ($line =~ /^\\tb1\b\s*(.*)/) {
 		if ($table != 1) {
 		    push (@outdata, "<table>\n");
@@ -514,11 +561,16 @@
 	    }
 	}
 
-	# \mt title
-	if ($line =~ /^\\mt\b\s*(.+)/) {
+	# \mt\mt1 title
+	if ($line =~ /^\\mt[1]?\b\s*(.+)/) {
 	    $line = "<title type=\"main\">$1<\/title>";
 	}
 
+	# \mt2 title
+	if ($line =~ /^\\mt2\b\s*(.+)/) {
+	    $line = "<title type=\"continued\">$1<\/title>";
+	}
+
 	# \st,\st2 title
 	if ($line =~ /^\\st2?\b\s*(.+)/) {
 	    $line = "<title type=\"continued\">$1<\/title>";
@@ -553,6 +605,7 @@
 	# remove unnecessary tags
 	$line =~ s/\\b\b//;
 	$line =~ s/\\m\b//;
+	$line =~ s/\\restore\b//;
 
 
 	$line =~ s/\\bq\*/<\/p><\/q>/g;
@@ -592,6 +645,8 @@
 @filedata = <INF>;
 close (INF);
 open (OUTF, ">$outputFilename");
+
+#bubble chapter down
 for ($i = 0; $i < scalar(@filedata); $i++) {
     if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) {
 	$temp = @filedata[$i];
@@ -603,6 +658,7 @@
 for ($i = 0; $i < scalar(@filedata); $i++) {
     $fullfile .= @filedata[$i];
 }
+$fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
 
 $q = 1;
 




More information about the sword-cvs mailing list