[sword-svn] r175 - trunk/versification

chrislit at crosswire.org chrislit at crosswire.org
Sat Mar 14 19:23:29 MST 2009


Author: chrislit
Date: 2009-03-14 19:23:28 -0700 (Sat, 14 Mar 2009)
New Revision: 175

Added:
   trunk/versification/makeabbrevs.pl
Log:
generate a builtin_abbrevs from XML canon definitions

Added: trunk/versification/makeabbrevs.pl
===================================================================
--- trunk/versification/makeabbrevs.pl	                        (rev 0)
+++ trunk/versification/makeabbrevs.pl	2009-03-15 02:23:28 UTC (rev 175)
@@ -0,0 +1,72 @@
+#!/usr/bin/perl
+
+# @canons will contain this list of files, these are in a basic XML format.
+# Each file lists osisIDs along with the English names associated with the
+# osisID. These aren't exhaustive, and may or may not overlap (but hopefully
+# don't). We are only using these to load mappings from osisIDs.
+ at canons = (
+    "canon.bible.xml",      # the Bible, broadly defined
+#    "canon.af.xml",         # Apostolic Fathers
+#    "canon.otp.xml",        # OT pseudepigrapha
+#    "canon.nta.xml",        # NT apocrypha
+#    "canon.lds.xml",        # Mormon books
+#    "canon.naghammadi.xml", # Nag Hammadi Library
+#    "canon.qumran.xml",     # Qumran mss
+#    "canon.classical.xml",  # intended for classical works, currently just Josephus
+);
+
+ at abbrevsQueue = (); 
+ 
+foreach $mapfile (@canons) {
+    open MAP, "$mapfile";
+    while (<MAP>) {
+	$line = $_;
+	
+	$line =~ s/<!\-\-.+?\-\->//g;
+	$line =~ s/\&amp;/\&/g;
+
+	if ($line =~ /<id>(.+?)<\/id>/) {
+	    $id = $1;
+	    $osis{lc($id)} = $id;
+	    push @abbrevsQueue, "$id"
+	}
+	elsif ($line =~ /<name>(.+?)<\/name>/) {
+	    $name = $1;
+	    if ($osis{lc($name)} eq "") {
+		$osis{lc($name)} = $id;
+		push @abbrevsQueue, "$name"
+	    }
+	    else {
+		if ($warn) {
+		    print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+		}
+	    }
+
+	    if ($idmap{$id} eq "") {
+		$idmap{$id} = $name;
+	    }
+	    else {
+		# Duplicates most likely indicate alternate names, so ignore them.
+		if ($warn) {
+		    print "ERROR: Duplicate mapping from $id found in $mapfile.\n";
+		}
+	    }
+	}
+    }
+    close (MAP);
+}
+
+$abbrevs = "/******************************************************************************\n *	Abbreviations - MUST be in alphabetical order & by PRIORITY\n *		RULE: first match of entire key\n *			(e.g. key: \"1CH\"; match: \"1CHRONICLES\")\n */\n\nconst struct abbrev builtin_abbrevs\[\] = {\n";
+
+ at abbrevsQueue = sort @abbrevsQueue;
+foreach $a (@abbrevsQueue) {
+    if ($a =~ /^.+\d/) {
+	$abbrevs .= "//";
+    }
+    $abbrevs .= "  {\"" . uc($a) . "\", \"" . $osis{lc($a)} . "\"},\t\t//" . $idmap{$osis{lc($a)}} . "\n";
+}
+$abbrevs .= "  {\"\", \"\"}\n};\n\n\n";
+
+open OUTF, ">builtin_abbrevs.h";
+print OUTF $abbrevs;
+close OUTF;




More information about the sword-cvs mailing list