[sword-svn] r321 - in trunk/modules: . misc_cleanup

refdoc at crosswire.org refdoc at crosswire.org
Sat Apr 16 15:50:09 MST 2011


Author: refdoc
Date: 2011-04-16 15:50:09 -0700 (Sat, 16 Apr 2011)
New Revision: 321

Added:
   trunk/modules/misc_cleanup/
   trunk/modules/misc_cleanup/title_cleanup.pl
Log:
improved title encoding


Added: trunk/modules/misc_cleanup/title_cleanup.pl
===================================================================
--- trunk/modules/misc_cleanup/title_cleanup.pl	                        (rev 0)
+++ trunk/modules/misc_cleanup/title_cleanup.pl	2011-04-16 22:50:09 UTC (rev 321)
@@ -0,0 +1,50 @@
+#!/usr/bin/perl
+
+use XML::LibXML;
+use utf8;
+use strict;
+
+## Obtain arguments
+if (scalar(@ARGV) < 1) {
+    print "\ntitle_cleanup.pl <osisfile> [-o outputfile]-- - fix output of usfm2osis.pl \n";
+    print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
+    print "- If no -o option is specified for the output filename, the default output file is: \n\t<osisfile>.new\n";
+    print "- The script will improve the encoding of title tag.\n";
+    exit (-1);
+}
+
+my $file = @ARGV[0];
+my $nextarg = 1;
+my $outputFilename;
+
+if (@ARGV[$nextarg] eq "-o") {
+    $outputFilename = "@ARGV[$nextarg+1]";
+    $nextarg += 2;
+}
+else {
+    $outputFilename = "$file.new";
+}
+open (OUTF, , ">", "$outputFilename") or die "Could not open file @ARGV[2] for writing.";
+
+## Initialise OSIS file
+
+my $parser = XML::LibXML->new();
+my $doc = $parser->parse_file($file);
+
+## Search for titles without 'type' info and give parental type. Also add 'subType=x-preverse'
+
+my @titles = $doc->getElementsByTagName('title');
+
+foreach (@titles) {
+    
+    if (($_->parentNode->hasAttribute('type')) && !($_->hasAttribute('type'))) {
+        $_->setAttribute('type',$_->parentNode->getAttribute('type'));
+    }
+    if (!($_->hasAttribute('subType')) && ($_->getAttribute('type') eq 'section')) {
+        $_->setAttribute('subType','x-preverse');
+    }
+}
+
+my $whole_doc=$doc->toString('utf8');
+
+print OUTF $whole_doc;


Property changes on: trunk/modules/misc_cleanup/title_cleanup.pl
___________________________________________________________________
Added: svn:executable
   + *




More information about the sword-cvs mailing list