[sword-svn] r179 - trunk/versification

chrislit at crosswire.org chrislit at crosswire.org
Sun Mar 15 20:10:57 MST 2009


Author: chrislit
Date: 2009-03-15 20:10:56 -0700 (Sun, 15 Mar 2009)
New Revision: 179

Modified:
   trunk/versification/v11nsys.pl
Log:
added ability to generate v11n system definitions from OSIS files
added option to either count verses/chapters or trust the number of the final verse/chapter (with optional warnings if the numbers differ)
added warning messages
added help text
added progress messages
added (but disabled) ability to generate builtin_abbrevs replacements
added v11n name & book order comments to output .h
sundry bugfixes


Modified: trunk/versification/v11nsys.pl
===================================================================
--- trunk/versification/v11nsys.pl	2009-03-16 02:34:45 UTC (rev 178)
+++ trunk/versification/v11nsys.pl	2009-03-16 03:10:56 UTC (rev 179)
@@ -2,14 +2,10 @@
 
 ###############################################################################
 # 
-#  When run without any arguments, this script reads the CCEL files:
-#  Bible.*.xml.
-#  
-#  Otherwise, supply the script with a list (or wildcard) files and it
-#  will attempt to create a v11n system definition on that basis.
-#  
-#  The --vpl switch instructs the script to interpret files as VPL files.
-#  The --imp switch instructs the script to interpret files as IMP files.
+# versification (v11n) system .h file generator:
+# processes various file types, including VPL, IMP, OSIS, & CCEL's
+# versification XML files (see http://www.ccel.org/refsys/refsys.html)
+# and generates a C++ header file for use in the Sword project
 #
 ###############################################################################
 
@@ -23,14 +19,30 @@
     }
 
     $osisBook = $osis{lc($lastBook)};
+    if ($count eq "count") {
+	$cval = $cCount;
+    }
+    else {
+	$cval = $lastChap;
+    }
     if ($otnt == 0) {
-	$otbooks .= "\t{\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cCount},\n";
+	$otbooks .= "  {\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cval},\n";
     }
     else {
-	$ntbooks .= "\t{\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cCount},\n";
+	$ntbooks .= "  {\"$idmap{$osisBook}\", \"$osisBook\", \"$osisBook\", $cval},\n";
     }
+    $bookOrder .= " $osisBook";
+    if ($warn == 1) {
+	if ($cCount ne $lastChap) {
+	    print "WARNING: chapter count ($cCount) does not equal last chapter ($lastChap) in book $osisBook of versification $v11n ($infile).\n";
+	}
+    }
 }
 
+sub printUsage() {
+    print " v11nsys.pl --? --warn --(vpl|imp|xml|osis) --(count|last) [files]\n\n  When run without any arguments, this script looks for the CCEL files: Bible.*.xml.\n\n  Otherwise, supply the script with a list of files (or wildcard) and it  will attempt to create a v11n system definition on that basis.\n\n --? prints usage (this).\n\n --warn turns on warning messages.\n\n --vpl instructs the script to interpret files as VPL files.\n --imp instructs the script to interpret files as IMP files.\n --xml instructs the script to interpret files as XML files using CCEL's definition format (default).\n --osis instructs the script to interpret files as OSIS XML files.\n\n --count instructs the script to count chapters/verse it encounters.\n --last instructs the script to assume that the last chapter/verse it encounters is equal to the number of chaptes/verses in a book/chapter (default).\n\n"
+}
+
 # @canons will contain this list of files, these are in a basic XML format.
 # Each file lists osisIDs along with the English names associated with the
 # osisID. These aren't exhaustive, and may or may not overlap (but hopefully
@@ -65,7 +77,9 @@
 		$osis{lc($name)} = $id;
 	    }
 	    else {
-#		print "ERROR: Duplicate mapping from $id found in $mapfile (<abbr>).\n";
+		if ($warn == 1) {
+		    print "ERROR: Duplicate mapping from $id found in $mapfile (<abbr>).\n";
+		}
 	    }
 	}
 	elsif ($line =~ /<name>(.+?)<\/name>/) {
@@ -75,7 +89,9 @@
 		$abbrevs{lc($id)} .= "$name;"
 	    }
 	    else {
-#		print "ERROR: Duplicate mapping from $id found in $mapfile (<name>).\n";
+		if ($warn == 1) {
+		    print "ERROR: Duplicate mapping from $id found in $mapfile (<name>).\n";
+		}
 	    }
 
 	    if ($idmap{$id} eq "") {
@@ -83,45 +99,77 @@
 	    }
 	    else {
 		# Duplicates most likely indicate alternate names, so ignore them.
-#		print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+		if ($warn == 1) {
+		    print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+		}
 	    }
 	}
     }
     close (MAP);
 }
 
+$mode = "xml";
+$count = "last";
+$warn = 0;
+
 if (@ARGV[0] ne "") {
     $n = 0;
-    $mode = "xml";
     while (@ARGV[$n] ne "") {
 	if (@ARGV[$n] eq "--vpl") {
 	    $mode = "vpl";
+	    print "Set interpretation mode to VPL.\n";
 	}
 	elsif (@ARGV[$n] eq "--imp") {
 	    $mode = "imp";
+	    print "Set interpretation mode to IMP.\n";
 	}
+	elsif (@ARGV[$n] eq "--osis") {
+	    $mode = "osis";
+	    print "Set interpretation mode to OSIS.\n";
+	}
 	elsif (@ARGV[$n] eq "--xml") {
 	    $mode = "xml";
 	}
+	elsif (@ARGV[$n] eq "--count") {
+	    $count = "count";
+	    print "Set count mode to last.\n";
+	}
+	elsif (@ARGV[$n] eq "--last") {
+	    $count = "last";
+	}
+	elsif (@ARGV[$n] eq "--warn") {
+	    $warn = 1;
+	    print "Warning messages enabled.\n";
+	}
+	elsif (@ARGV[$n] =~ /^\-+(h|\?|usage)/) {
+	    printUsage();
+	    exit();
+	}
 	else {
 	    push @srcfiles, @ARGV[$n];
 	}
 	$n++;
     }
 }
-else {
-    opendir (DIR, ".");
-    @srcfiles = grep /Bible\.[^\.]+\.xml$/, readdir DIR;
-    closedir(DIR);
-    $mode = "xml";
+if ($mode eq "xml") {
+    print "Set interpretation mode to CCEL XML.\n";
 }
+if ($count eq "last") {
+    print "Set count mode to last.\n";
+}
 
+if (@srcfiles == 0) {
+    printUsage();
+}
+
 foreach $infile (@srcfiles) {
-    
     if ($infile =~ /^Bible.+xml$/) {
-	$infile =~ /^Bible\.([^\.]+)\.xml/;
-	$v11n = $1;
-	$outfile = lc("v11n$1.h");
+	$infile =~ /^Bible(\.(.+))?\.xml/;
+	$v11n = $2;
+	if ($v11n eq "") {
+	    $v11n = "NRSVA";
+	}
+	$outfile = lc("v11n$2.h");
     }
     else {
 	$v11n = $infile;
@@ -130,6 +178,8 @@
 	$outfile = lc("v11n$v11n.h");
     }
 
+    print "Processing $infile --> $outfile (v11n: $v11n).\n";
+
     open INF, $infile;
     open OUTF, ">$outfile";
 
@@ -145,31 +195,55 @@
     $vCount = 0;
     
     $abbrevs = "/******************************************************************************\n *	Abbreviations - MUST be in alphabetical order & by PRIORITY\n *		RULE: first match of entire key\n *			(e.g. key: \"1CH\"; match: \"1CHRONICLES\")\n */\n\nconst struct abbrev builtin_abbrevs$v11n\[\] = {\n";
-    $abbrevsCloser = "\t{\"\", \"\"}\n};\n\n\n";
+    $abbrevsCloser = "  {\"\", \"\"}\n};\n\n\n";
     $abbrevsList = "";
 
-    $vm = "/******************************************************************************\n *	Maximum verses per chapter\n */\n\nint vm$v11n\[\] = {";
-    $otbooks = "/******************************************************************************\n * [on]tbooks$v11n - initialize static instance for all canonical text names\n *		and chapmax\n */\nstruct sbook otbooks$v11n\[\] = {\n";
-    $ntbooks = "struct sbook ntbooks$v11n\[\] = {\n";
-    $booksCloser = "\t{\"\", \"\", \"\", 0}\n};\n\n";
+    $vm = "/******************************************************************************\n *	Maximum verses per chapter\n */\n\nint vm_$v11n\[\] = {";
+    $otbooks = "/******************************************************************************\n * [on]tbooks_$v11n - initialize static instance for all canonical text names\n *		and chapmax\n */\nstruct sbook otbooks_$v11n\[\] = {\n";
+    $ntbooks = "struct sbook ntbooks_$v11n\[\] = {\n";
+    $booksCloser = "  {\"\", \"\", \"\", 0}\n};\n\n";
     
+    $bookOrder = "// Book order:";
+
     $otnt = 0; # 0 = ot, 1 = nt
     
+    if ($mode eq "osis") {
+	while (<INF>) {
+	    $line = $_;
+	    $line =~ s/<verse([^>]+)(osisID=\"[^\"]+\")/$idList .= "$2\n";/eg;
+	}
+
+	close (INF);
+
+	open (TMP, ">tempfile");
+	print TMP $idList;
+	close (TMP);
+
+	open (INF, "tempfile");
+    }
+
     while (<INF>) {
 	$line = $_;
 		
 	$osisID = "";
 
-	if ($line =~ /<osisID.+?code=\"(.+?)\"\/>/) {
+	if ($line =~ /<osisID.+?code=\"([^\"]+?)\"\/>/) {
 	    $osisID = $1;
 	}
-	elsif ($mode eq "vpl" && $line =~ /^(.+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) {
+	elsif ($mode eq "vpl" && $line =~ /^([^:]+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) {
 	    $osisID = "$1.$2.$3";
 	}
-	elsif ($mode eq "imp" && $line =~ /^\$\$\$(.+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) {
+	elsif ($mode eq "imp" && $line =~ /^\$\$\$([^:]+?) ([0-9A-Za-z]+):([0-9A-Za-z]+)/) {
 	    $osisID = "$1.$2.$3";
 	}
-
+	elsif ($mode eq "osis" && $line =~ /osisID=\"([^\"]+)\"/) {
+	    $osisID = $1;
+	}
+	
+	if ($osisID =~ /\.0(\.|$)/) { # in the case of chap/verse 0
+	    $osisID = "";
+	}
+	
 	if ($osisID ne "") {
 	    $lastBook = $thisBook;
 	    $lastChap = $thisChap;
@@ -181,13 +255,23 @@
 	    $thisChap = $2;
 	    $thisVers = $3;
 	    
-	    if ((($thisBook ne $lastBook) ||($thisChap ne $lastChap)) && $lastVers ne "") {
-		$vm .= "$vCount, ";
+	    if ((($thisBook ne $lastBook) || ($thisChap ne $lastChap)) && $lastVers ne "") {
+		if ($count eq "count") {
+		    $vm .= "$vCount, ";
+		}
+		else {
+		    $vm .= "$lastVers, ";
+		}
+		if ($warn == 1) {
+		    if ($vCount ne $lastVers) {
+			print "WARNING: verse count ($vCount) does not equal last verse ($lastVers) in chapter $osis{lc($lastBook)} $lastChap of versification $v11n ($infile).\n";
+		    }
+		}
 	    }
-
+	    
 	    if ($thisBook ne $lastBook) {
 		$bCount++;
-		$vm .= "\n\t// $idmap{$osis{lc($thisBook)}}\n\t";
+		$vm .= "\n  // $idmap{$osis{lc($thisBook)}}\n  ";
 		$abbrevsList .= $abbrevs{lc($osis{lc($thisBook)})};
 		
 		if ($lastBook ne "") {
@@ -205,7 +289,19 @@
 	    }
 	}
     }
-    $vm .= "$vCount\n};\n";
+    if ($count eq "count") {
+	$vm .= "$vCount";
+    }
+    else {
+	$vm .= "$thisVers";
+    }
+    $vm .= "\n};\n";
+    if ($warn == 1) {
+	if ($vCount ne $thisVers) {
+	    print "WARNING: verse count ($vCount) does not equal last verse ($thisVers) in chapter $osis{lc($lastBook)} $lastChap of versification $v11n ($infile).\n";
+	}
+    }
+
     buildBooksArrays();
     $otbooks .= $booksCloser;
     $ntbooks .= $booksCloser;
@@ -218,17 +314,23 @@
     }
     @abbrevsQueue = sort @abbrevsQueue;
     foreach $a (@abbrevsQueue) {
-	$abbrevs .= "\t{\"" . uc($a) . "\", \"" . $osis{lc($a)} . "\"},\t\t//" . $idmap{$osis{lc($a)}} . "\n";
+	$abbrevs .= "  {\"" . uc($a) . "\", \"" . $osis{lc($a)} . "\"},\t\t//" . $idmap{$osis{lc($a)}} . "\n";
     }
     $abbrevs .= $abbrevsCloser;
 
+
+
+    print OUTF "// Versification system: $v11n\n";
+    print OUTF "$bookOrder\n\n";
+    
     print OUTF $otbooks;
     print OUTF $ntbooks;
-    print OUTF $abbrevs;
+#    print OUTF $abbrevs;  # line disabled so that we don't print out replacement builtin_abbrev line
     print OUTF $vm;
     
     print OUTF "\n\nSWORD_NAMESPACE_END\n\n\n#endif\n";
     
     close (INF);
     close (OUTF);
+    unlink("tempfile");
 }




More information about the sword-cvs mailing list