[sword-svn] r303 - trunk/modules/portuguese

refdoc at crosswire.org refdoc at crosswire.org
Tue Sep 14 16:16:34 MST 2010


Author: refdoc
Date: 2010-09-14 16:16:34 -0700 (Tue, 14 Sep 2010)
New Revision: 303

Modified:
   trunk/modules/portuguese/transform.pl
Log:
updated, produces now usfm for the text part


Modified: trunk/modules/portuguese/transform.pl
===================================================================
--- trunk/modules/portuguese/transform.pl	2010-09-09 22:47:43 UTC (rev 302)
+++ trunk/modules/portuguese/transform.pl	2010-09-14 23:16:34 UTC (rev 303)
@@ -1,25 +1,48 @@
 #!/usr/bin/perl
 
+use XML::LibXSLT;
+use XML::LibXML;
 
 my @files=`ls -1 *.xml`;
-# my @ident=`cat books`;
 
 foreach (@files){ 
 
         my @lines;
         my @text;
         my $tag;
- #       my %vs;
- #       my %kw;
+ 
         
 	chop;
-	open SIMPLE, ">>$_.simple.xml";
+	open TEXT, ">>$_.text.xml";
+	open USFM, ">>$_.text.sfm";
+	open PREFACE, ">>$_.preface.xml";
+	
 	chomp(@lines=`cat $_`);
- #	@lines[1]="\\id @ident[$_-1] ";
+
  	
  	
  	foreach (@lines) {
  		
+		s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g;
+	}
+
+SPLIT:   foreach (@lines) {
+                
+                if (/chapter/) {
+                   print (PREFACE "</page></pdf2xml>");
+                   $text='<?xml version="1.0" encoding="utf-8" ?><pdf2xml><page>';
+                   last SPLIT;
+                   }
+                else {
+                   s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"intro_para\"\ $1/g;   
+                   print (PREFACE $_."\n");
+                   $_="";
+                }
+        }
+
+        
+        foreach (@lines) {
+
  		s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
 		s/(size=\"4\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"verse_no\"\ \ $1/g;
 		s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
@@ -33,11 +56,12 @@
 		# s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g;
 		s/<text.*?>/<text>\ $1/g;
 		s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"ref_key\"\ $1/g;
-		s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g;
-		s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"intro_para\"\ $1/g;
 		s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
                 s/$/\n/;		
-	}
+        }
+        
+
+        
 	
 	
 	foreach (@lines) {
@@ -51,7 +75,7 @@
 	}	 
 	
 
-	$text = join ("", at lines);
+	$text = $text.join ("", at lines);
 	
 	
 	$text =~ s/\n\s*<text>//g;
@@ -60,16 +84,27 @@
 	$text =~ s/(S)<\/maintext>\n\s*<Lords_Name>\s*(ENHOR)<\/Lords_Name>\n\s*<maintext>/<Lords_Name>$1$2<\/Lords_Name>/g;
 	$text =~ s/(<verse_no>.*?<\/verse_no>)\n\s*(<maintext>.*?<\/maintext>)\n\s*?:(<verse_no>)/<verse>$1$2<\/verse>\n<verse_no>/g;
 	
-	# @lines = split(/\n/,$text);
-	# foreach (@lines) {
-	
-		
+	# create an instance of XSL::XSLT processor
+        print TEXT $text;
+        close text;
+    
+        my $parser = new XML::LibXML;
+        my $xslt   = new XML::LibXSLT;
+          
+        my $source     = $parser->parse_string($text);
+        my $style_doc  = $parser->parse_file('transform.xsl');
 
+        my $stylesheet = $xslt->parse_stylesheet($style_doc);
+        my $results    = $stylesheet->transform($source);
+                    
+        print USFM $stylesheet->output_string($results);
+        
+        close USFM;
+       
+        $text="";
+       
 	
-	print (SIMPLE $text);
-	close SIMPLE;
 	
-	
 }
 
 




More information about the sword-cvs mailing list