[sword-svn] r142 - trunk/modules/perlconverters

chrislit at www.crosswire.org chrislit at www.crosswire.org
Thu Jun 12 02:04:39 MST 2008


Author: chrislit
Date: 2008-06-12 02:04:38 -0700 (Thu, 12 Jun 2008)
New Revision: 142

Modified:
   trunk/modules/perlconverters/usfm2osis.pl
Log:
added improved encoding support (removing need for uconv)
added some status messages


Modified: trunk/modules/perlconverters/usfm2osis.pl
===================================================================
--- trunk/modules/perlconverters/usfm2osis.pl	2008-06-12 06:06:02 UTC (rev 141)
+++ trunk/modules/perlconverters/usfm2osis.pl	2008-06-12 09:04:38 UTC (rev 142)
@@ -39,8 +39,8 @@
 
 #########################################################################
 
-$version = "1.2";
-$date = "2008-05-16";
+$version = "1.3";
+$date = "2008-06-12";
 $osisVersion = "2.1.1";
 
 %OSISbook = (
@@ -67,37 +67,63 @@
  "1MA" => "1Macc", "2MA" => "2Macc", "3MA" => "3Macc", "4MA" => "4Macc",
  "1ES" => "1Esd", "2ES" => "2Esd", "MAN" => "PrMan",
 # Following this is just an uneducated guess
-"PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
+ "PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
  "JSB" => "Josh", "TBS" => "Tob", "SST" => "Sus", "DNT" => "Dan",
  "BLT" => "Bel", "ADE" => "AddEsth"
 );
 
+use Encode;
+ at encodingList = Encode->encodings(":all");
+foreach $enc (@encodingList) {
+    $encodings .= "$enc, ";
+}
+$encodings =~ s/\, $//;
+
+
 if (scalar(@ARGV) < 2) {
-    print "usfm2osis.pl -- USFM to OSIS $osisVersion converter version $version ($date)\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] <USFM filenames|wildcard>\n";
+    print "usfm2osis.pl -- USFM to OSIS $osisVersion converter version $version ($date)\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] [-e USFM encoding] <USFM filenames|wildcard>\n\n";
+    print "Supported encodings include:\n\t$encodings\n\n";
+    print "If the encoding is omitted, utf8 is the default value.\n";
     exit (-1);
 }
 
 $osisWork = $ARGV[0];
 
-if ($ARGV[1] eq "-o") {
-    $outputFilename = "$ARGV[2];"
+$nextarg = 1;
+
+if ($ARGV[$nextarg] eq "-o") {
+    $outputFilename = "$ARGV[$nextarg+1]";
+    $nextarg += 2;
 }
 else {
     $outputFilename = "$osisWork.osis.xml";
 }
-open (OUTF, ">$outputFilename") or die "Could not open file $ARGV[2] for writing.";
+open (OUTF, , ">:utf8", "$outputFilename") or die "Could not open file $ARGV[2] for writing.";
 
-if ($ARGV[1] eq "-o") {
-    for ($i = 3; $i < scalar(@ARGV); $i++) {
-	push(@files, $ARGV[$i]);
-    }
+if ($ARGV[$nextarg] eq "-e") {
+    $inputEncoding = "$ARGV[$nextarg+1]";
+    $nextarg += 2;
 }
 else {
-    for ($i = 1; $i < scalar(@ARGV); $i++) {
-	push(@files, $ARGV[$i]);
+    $inputEncoding = "utf8";
+}
+$encFound = 0;
+foreach $enc (@encodingList) {
+    if ($enc eq $inputEncoding) {
+	$encFound = 1;
     }
 }
+if ($encFound == 0) {
+    die "Encoding $inputEncoding not supported.\nSupported encodings include:\n\t$encodings\n";
+}
+else {
+    print "Encoding \"$inputEncoding\" is supported.\n"
+}
 
+for (; $nextarg < scalar(@ARGV); $nextarg++) {
+    push(@files, $ARGV[$nextarg]);
+}
+
 push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
 
 $tagStack = "<\/osisText><\/osis>";
@@ -126,7 +152,15 @@
 }
 
 foreach $file (@files) {
-    @filedata = `cat \"$file\"`;
+    print "Processing $file.\n";
+    open (SFM, "$file");
+    my @filedata = "";
+    while (<SFM>) {
+	my $sfline;
+	$sfline = decode($inputEncoding, $_);
+	push (@filedata, $sfline);
+    }
+    close (SFM);
 
     $ollevel = 0;
     $vers = 0;
@@ -619,8 +653,6 @@
 	    push (@outdata, "$line\n");
 	}
     }
-
-    close (INF);
 }
 
 push (@outdata, closeTag("<\/osis>"));
@@ -638,9 +670,10 @@
 	print OUTF @outdata[$i];
     }
 }
-
 close (OUTF);
 
+print "Doing some cleanup.\n";
+
 open (INF, "$outputFilename");
 @filedata = <INF>;
 close (INF);
@@ -660,6 +693,8 @@
 }
 $fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
 
+print "Tagging quotations.\n";
+
 $q = 1;
 
 $fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
@@ -680,7 +715,7 @@
 
 $fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
 
-
-
 print OUTF $fullfile;
 close (OUTF);
+
+print "All done! OSIS file: $outputFilename\n";




More information about the sword-cvs mailing list