[sword-svn] r331 - trunk/modules/misc_cleanup

refdoc at crosswire.org refdoc at crosswire.org
Thu Jul 7 13:44:21 MST 2011


Author: refdoc
Date: 2011-07-07 13:44:20 -0700 (Thu, 07 Jul 2011)
New Revision: 331

Added:
   trunk/modules/misc_cleanup/README
   trunk/modules/misc_cleanup/charmap.pl
   trunk/modules/misc_cleanup/numbers.pl
   trunk/modules/misc_cleanup/osis_tr.pl
Modified:
   trunk/modules/misc_cleanup/order.pl
Log:
some small utilities to clean up OSIS files


Added: trunk/modules/misc_cleanup/README
===================================================================
--- trunk/modules/misc_cleanup/README	                        (rev 0)
+++ trunk/modules/misc_cleanup/README	2011-07-07 20:44:20 UTC (rev 331)
@@ -0,0 +1,8 @@
+The scripts in this directory are meant to assist with minor clean up jobs
+during module creation, motly from USFM files (Paratext).
+
+As these scripts have dependencies which are not commonly fulfilled on
+normal machines, and also are often not needed in the first place, they 
+are kept out of usfm2osis.pl
+
+These scripts are maintained by Peter von Kaehne (refdoc at crosswire.org)

Added: trunk/modules/misc_cleanup/charmap.pl
===================================================================
--- trunk/modules/misc_cleanup/charmap.pl	                        (rev 0)
+++ trunk/modules/misc_cleanup/charmap.pl	2011-07-07 20:44:20 UTC (rev 331)
@@ -0,0 +1,109 @@
+#!/usr/bin/perl
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2002-2011 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of the CrossWire Bible Society nor the names of
+#       its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written
+#       permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support at crosswire.org
+
+#########################################################################
+use XML::LibXML;
+use strict;
+use Unicode::UCD 'charinfo';
+binmode (STDOUT,":utf8");
+
+## Obtain arguments
+if (scalar(@ARGV) < 1) {
+    print "\ncharmap.pl <osisfile> [-o outputfile]\n\n"; 
+    print "- prints a list of characters in text nodes of an OSIS file, ignoring tags etc\n";
+    print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
+    print "- If no -o option is specified the output goes to <STDOUT>.\n";
+    exit (-1);
+}
+
+my $file = @ARGV[0];
+my $outputFilename;
+my %list;
+
+if (@ARGV[1] eq "-o") {
+    $outputFilename = "@ARGV[2]";
+    open (OUTF , ">:utf8", "$outputFilename") or die "Could not open file @ARGV[2] for writing.";
+    select(OUTF);
+}
+
+## Initialise OSIS file
+
+my $parser = XML::LibXML->new();
+my $doc = $parser->parse_file($file);
+
+# count out the characters in text nodes only
+
+&text_nodes($doc);
+
+# print results
+
+foreach my $key(sort keys %list) {
+  my $c;
+  if ($key =~ /\p{Cc}/) { $c = " "}
+  else { $c = $key }
+  
+  my $ci = charinfo(ord($key)); 
+        
+  print "\t".$c."\tU+".$ci->{'code'}."\t".$list{$key}."\t".$ci->{'script'}."\t".$ci->{'name'}."\n";
+  }
+
+##########################################
+
+sub text_nodes(){
+    my $node = @_[0];
+    if ($node->nodeType==XML_TEXT_NODE) {
+        my $text  = $node->toString();
+        &addTextToCounter($text);
+    }
+    else { 
+        my @children = $node->childNodes();
+        foreach (@children) {
+            &text_nodes($_);
+        }
+    }
+} 
+
+###########################################
+
+sub addTextToCounter() {
+  my @complete = split(//, at _[0]);
+
+  foreach (@complete) {
+    my $char=$_;
+    $list{$char}++;
+  }
+}       


Property changes on: trunk/modules/misc_cleanup/charmap.pl
___________________________________________________________________
Added: svn:executable
   + *

Added: trunk/modules/misc_cleanup/numbers.pl
===================================================================
--- trunk/modules/misc_cleanup/numbers.pl	                        (rev 0)
+++ trunk/modules/misc_cleanup/numbers.pl	2011-07-07 20:44:20 UTC (rev 331)
@@ -0,0 +1,86 @@
+#!/usr/bin/perl
+
+# numbers.pl translates Western numbers into Arabic-Indic numbers in the textnodes of XML files
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2002-2011 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of the CrossWire Bible Society nor the names of
+#       its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written
+#       permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support at crosswire.org
+
+#########################################################################
+use XML::LibXML;
+use utf8;
+use strict;
+
+## Obtain arguments
+if (scalar(@ARGV) < 1) {
+    print "\nnumbers.pl <osisfile> [-o outputfile]-- - fix Latin numbers in Arabic script text \n";
+    print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
+    print "- If no -o option is specified for the output filename, the default output file is: \n\t<osisfile>.new\n";
+    exit (-1);
+}
+
+my $file = @ARGV[0];
+my $nextarg = 1;
+my $outputFilename;
+
+if (@ARGV[$nextarg] eq "-o") {
+    $outputFilename = "@ARGV[$nextarg+1]";
+    open (OUTF, , ">", "$outputFilename") or die "Could not open file @ARGV[2] for writing.";
+    select(OUTF);
+}
+
+## Initialise OSIS file
+
+my $parser = XML::LibXML->new();
+my $doc = $parser->parse_file($file);
+
+&delatinize($doc);
+
+print $doc->toString();
+
+sub delatinize(){
+    my $node = @_[0];
+    if ($node->nodeType==XML_TEXT_NODE) {
+        my $text  = $node->toString();
+        $text =~ tr/[0123456789]/[۰۱۲۳۴۵۶۷۸۹]/;
+        $node->replaceDataString($node->toString,$text);
+    }
+    else { 
+        my @children = $node->childNodes();
+        foreach (@children) {
+            &delatinize($_);
+        }
+    }
+} 
+       
\ No newline at end of file


Property changes on: trunk/modules/misc_cleanup/numbers.pl
___________________________________________________________________
Added: svn:executable
   + *

Modified: trunk/modules/misc_cleanup/order.pl
===================================================================
--- trunk/modules/misc_cleanup/order.pl	2011-06-30 07:30:07 UTC (rev 330)
+++ trunk/modules/misc_cleanup/order.pl	2011-07-07 20:44:20 UTC (rev 331)
@@ -3,6 +3,43 @@
 # order of the books of the Bible. You need to edit the script to prepare for different versififcations/caanons, 
 # You also need to edit line 22 for your particular file naming scheme
 
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2002-2011 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of the CrossWire Bible Society nor the names of
+#       its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written
+#       permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support at crosswire.org
+
+#########################################################################
+
 use strict;
 use warnings;
 use File::Copy;

Added: trunk/modules/misc_cleanup/osis_tr.pl
===================================================================
--- trunk/modules/misc_cleanup/osis_tr.pl	                        (rev 0)
+++ trunk/modules/misc_cleanup/osis_tr.pl	2011-07-07 20:44:20 UTC (rev 331)
@@ -0,0 +1,92 @@
+#!/usr/bin/perl
+
+# osis_tr.pl does on the textnodes of OSIS files what tr does on normal text
+
+## Licensed under the standard BSD license:
+
+# Copyright (c) 2002-2011 CrossWire Bible Society <http://www.crosswire.org/>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of the CrossWire Bible Society nor the names of
+#       its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written
+#       permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## For general inquiries, comments, suggestions, bug reports, etc. email:
+## sword-support at crosswire.org
+
+#########################################################################
+use XML::LibXML;
+#use utf8;
+use strict;
+
+## Obtain arguments
+if (scalar(@ARGV) < 3) {
+    print "\nosis_tr.pl <osisfile> <in> <out> [-o outputfile]-- - exchange characters with others, only in text nodes \n";
+    print "- Arguments in braces < > are required. Arguments in brackets [ ] are optional.\n";
+    print "- If no -o option is specified, the default output is STDOUT\n";
+    exit (-1);
+}
+
+my $file = @ARGV[0];
+my $nextarg = 1;
+
+my $in = @ARGV[$nextarg];
+$nextarg++;
+
+my $out = @ARGV[$nextarg];
+$nextarg++;
+ 
+my $outputFilename;
+if (@ARGV[$nextarg] eq "-o") {
+    $outputFilename = "@ARGV[$nextarg+1]";
+    open (OUTF, , ">", "$outputFilename") or die "Could not open file @ARGV[2] for writing.";
+    select(OUTF);
+}
+
+## Initialise OSIS file
+
+my $parser = XML::LibXML->new();
+my $doc = $parser->parse_file($file);
+
+&translate($doc);
+
+print $doc->toString();
+
+sub translate(){
+    my $node = @_[0];
+    if ($node->nodeType==XML_TEXT_NODE) {
+        my $text  = $node->toString();
+        $text =~ tr/[$in]/[$out]/;
+        $node->replaceDataString($node->toString,$text);
+    }
+    else { 
+        my @children = $node->childNodes();
+        foreach (@children) {
+            &translate($_);
+        }
+    }
+} 
+       
\ No newline at end of file


Property changes on: trunk/modules/misc_cleanup/osis_tr.pl
___________________________________________________________________
Added: svn:executable
   + *




More information about the sword-cvs mailing list