[sword-svn] r169 - in trunk/source/data/translit/crosswire: . cldr

chrislit at crosswire.org chrislit at crosswire.org
Fri Feb 20 20:47:57 MST 2009


Author: chrislit
Date: 2009-02-20 20:47:57 -0700 (Fri, 20 Feb 2009)
New Revision: 169

Modified:
   trunk/source/data/translit/crosswire/cldr/cldr2icu.pl
   trunk/source/data/translit/crosswire/icu2cldr.pl
Log:
added NFC normalization
fixed a space (\s) over-generalization issue
fixed a mis-ordered regex substitution


Modified: trunk/source/data/translit/crosswire/cldr/cldr2icu.pl
===================================================================
--- trunk/source/data/translit/crosswire/cldr/cldr2icu.pl	2009-02-21 03:16:40 UTC (rev 168)
+++ trunk/source/data/translit/crosswire/cldr/cldr2icu.pl	2009-02-21 03:47:57 UTC (rev 169)
@@ -2,6 +2,7 @@
 
 use utf8;
 use Encode;
+use Unicode::Normalize;
 
 opendir (DIR, ".");
 @xlits = grep /\.xml$/, readdir (DIR);
@@ -28,12 +29,12 @@
     while (<INF>) {
 	$line = $_;
 	$line =~ s/^\x{FEFF}//;
-	$line =~ s/\s*$//;
-	$line =~ s/^\s*//;
+	$line =~ s/[\t\r\n ]*$//;
+	$line =~ s/^[\t\r\n ]*//;
 	
 	$line =~ s/<[^<>]+?>//g;
 	
-	if ($line !~ /^\s*$/) {
+	if ($line !~ /^[\t\r\n ]*$/) {
 
 	    $line =~ s/↔/<>/g;
 	    $line =~ s/→/>/g;
@@ -44,7 +45,9 @@
 	    $line =~ s/&amp;/&/g;
 
 	    $line =~ s/\\u([0-9A-Fa-f]{1,6})/chr(hex($1))/eg;
-
+	    
+	    $line = NFD($line);
+	    
 	    print OUTF "$line\n";
 	}
     }

Modified: trunk/source/data/translit/crosswire/icu2cldr.pl
===================================================================
--- trunk/source/data/translit/crosswire/icu2cldr.pl	2009-02-21 03:16:40 UTC (rev 168)
+++ trunk/source/data/translit/crosswire/icu2cldr.pl	2009-02-21 03:47:57 UTC (rev 169)
@@ -2,6 +2,7 @@
 
 use utf8;
 use Encode;
+use Unicode::Normalize;
 
 opendir (DIR, ".");
 @xlits = grep /\.txt$/, readdir (DIR);
@@ -31,18 +32,20 @@
     while (<INF>) {
 	$line = $_;
 	$line =~ s/^\x{FEFF}//;
-	$line =~ s/\s*$//;
-	$line =~ s/^\s*//;
-	if ($line !~ /^\s*$/) {
+	$line =~ s/[\t\r\n ]*$//;
+	$line =~ s/^[\t\r\n ]*//;
+	if ($line !~ /^[\t\r\n ]*$/) {
 
 	    $line =~ s/([^\\])<>/$1↔/g;
 	    $line =~ s/([^\\])>/$1→/g;
 	    $line =~ s/([^\\])</$1←/g;
 
+	    $line =~ s/&/&amp;/g;
 	    $line =~ s/</&lt;/g;
 	    $line =~ s/>/&gt;/g;
-	    $line =~ s/&/&amp;/g;
 
+	    $line = NFD($line);
+
 	    if ($line =~ /^\#/) {
 		print OUTF "<comment>$line<\/comment>\n";
 	    }




More information about the sword-cvs mailing list