[sword-svn] r68 - in trunk/modules/hebrew-wlc/WLC2OSIS: . WLC2OSIS WLC2OSIS/Parse WLC2OSIS/Translate WLC2OSIS/Utilities

mgruner at crosswire.org mgruner at crosswire.org
Fri Jul 7 09:50:42 MST 2006


Author: mgruner
Date: 2006-07-07 09:50:30 -0700 (Fri, 07 Jul 2006)
New Revision: 68

Removed:
   trunk/modules/hebrew-wlc/WLC2OSIS/Utilities/
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Header.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Utilities/FileRead.java
Modified:
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Tokenizer.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
Log:
update; unusable atm


Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-07-07 16:50:30 UTC (rev 68)
@@ -1,5 +1,6 @@
 package WLC2OSIS.Parse ;
 
+import java.io.*;
 import WLC2OSIS.* ;
 import WLC2OSIS.Translate.* ;
 import WLC2OSIS.Utilities.* ;
@@ -56,6 +57,8 @@
 int VerseNumber ;
 int WordNumber ;
 
+BufferedReader file;
+
 //-----------------------------------------------------------------------------
 public Parser(WLC2OSIS A) {
     this.A = A ;
@@ -86,37 +89,52 @@
     boolean PreviousEOL = true ;
     System.out.println("\n") ;
     
-    A.w = new XMLWriter(A.OutputDirectory, "WLC_OSIS") ;
-     
      // Write the header
-    Header.writeHeader(A, A.w) ;
+    Header.writeHeader(A, A.wlc) ;
+    Header.writeHeader(A, A.morph) ;
+    
+   	try{
+	    file = new BufferedReader( new FileReader( A.InputFilename ));
+	}
+	catch (IOException e) {
+		System.out.println("File not found: " + e) ;
+    }
      
-    for (int k = 0; k < A.InputChars.length ; k++){
+    while ( true ){
+    	s="";
+    	try{
+	    	s= file.readLine();
+	    }
+	  	catch (IOException e) {
+			System.out.println("Read error: " + e) ;
+			break;
+        }
     
-        s = t.nextToken() ;
-        System.out.println("processing: " + s);
-
-        if(s.compareTo(t.EOF) == 0){
-            break ;
-            }
-            
-//-----------------------------------------------------------------------------
+//        System.out.println("processing: " + s);
         
-//  Process a line identifier
-
-        if (PreviousEOL){
-            int ColonIndex = s.indexOf(':') ;
-            if(ColonIndex <=0 ){
-                System.out.println("Parser: Incorrect line identifier: " + s + " !") ;
-                break ;
-                }
-            PreviousEOL= false ;
+        if ( s.startsWith(">") ){ //ignore this line
+        	continue;
+        }
+        
+        java.util.regex.Pattern p = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)\\S*\\s(\\S+)\\s(\\S+)(?:@|%)(\\S+)");
+		java.util.regex.Matcher m = p.matcher( s );
+		if (!m.matches()){
+			System.out.println("No match!");
+			System.exit(1);
+		}
+		
+// Parse the identifier
+            String BookCode = m.group(1);
+            int Chapter = Integer.parseInt( m.group(2) );
+            int Verse   = Integer.parseInt( m.group(3) );
+            int wordNumber	= Integer.parseInt( m.group(4) );
+            int subWordNumber = Integer.parseInt( m.group(5) );
+            String expression = m.group(6);
+            String lemma	= m.group(7);
+            String grammar  = m.group(8);
             
-// Parse the identifier
-            String BookCode = s.substring(0,2) ;
-            int Chapter = Integer.parseInt( s.substring(2, ColonIndex) ) ;
-            int Verse = Integer.parseInt( s.substring(ColonIndex+1) ) ;
-
+        System.out.println(BookCode + " " + Chapter + " " + Verse + " " + wordNumber + " " + subWordNumber + " " +expression+" "+lemma+" "+grammar);
+/*
 // Change in Book, start a book.
             if(BookCode.compareTo(LastBookCode) != 0){
                 v.end() ;
@@ -145,18 +163,12 @@
                 v.end() ;
                 v.start() ;
                 LastVerse = Verse ;
-                }
-            }
+                }*/
 //-----------------------------------------------------------------------------
 
 //   Process a word.
               
-        else{
-            if(s.compareTo(t.EOL) == 0){
-                PreviousEOL = true ;
-                }
-            else{
-                if (s.length() > 1){
+/*                if (s.length() > 1){
                      w.process(s) ;
                      }
                 else{
@@ -173,11 +185,8 @@
                     else{  // It's a word
                         w.process(s) ;
                         }
-                     }
-                }
-            }
-        
-        }
+                     }*/
+	        }
     v.end() ;
     c.end() ;
     b.end() ;   

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Tokenizer.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Tokenizer.java	2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Tokenizer.java	2006-07-07 16:50:30 UTC (rev 68)
@@ -17,7 +17,7 @@
 public final String EOF = "***EOF***" ;
 public final String EOL = "***EOL***" ;
 WLC2OSIS A ;
-CharArrayReader car ;
+//CharArrayReader car ;
 StreamTokenizer st ;
 
 //-----------------------------------------------------------------------------
@@ -27,8 +27,13 @@
 
 // Set up the tokenizer
 
-    car = new CharArrayReader(A.InputChars) ;    
-    st = new StreamTokenizer( car) ;
+//    car = new CharArrayReader(A.InputChars) ;    
+    try{
+    	st = new StreamTokenizer( new FileInputStream( A.InputFilename ) ) ;
+    }
+    catch (IOException e) {
+    	System.exit(0);
+    }
     st.resetSyntax() ;
     st.wordChars(33, 126 ) ; // All printables are word characters
     st.ordinaryChar(63) ; // ? is a special symbol, the EOL marker.

Deleted: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Header.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Header.java	2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Header.java	2006-07-07 16:50:30 UTC (rev 68)
@@ -1,69 +0,0 @@
-package WLC2OSIS.Translate ;
-
-import WLC2OSIS.* ;
-import WLC2OSIS.Translate.* ;
-import WLC2OSIS.Utilities.* ;
-
-// import java.util.Date ;
-// import java.text.SimpleDateFormat ;
-//==============================================================================
-/**
- *  <b>Header information for Tanach.</b><p>
- */
-//==============================================================================
-public class Header{
-
-// static SimpleDateFormat DateFormat = new SimpleDateFormat("dd MMM yyyy") ;
-// static String DateTime ;
-
-public Header(){
-    }
-//-----------------------------------------------------------------------------
-
-// Writes the Notes to the XML file.
-
-public static void writeHeader(WLC2OSIS A, XMLWriter w) {
-//     Date DT = new Date() ;
-//     DateTime = DateFormat.format(DT) ;
-    A.w.openTag("osisText osisIDWork=\"WLC\" osisRefWork=\"bible\" xml:lang=\"he\"", 0) ;
-    A.w.openTag("header", 0) ;
-    
-    A.w.openTag("work osisWork=\"WLC\"", 1) ;
-    
-    A.w.writeString("title", 2, "Westminster Leningrad Codex");
-    A.w.writeAttributedString("contributor", 2, "role=\"encoder\"", "Martin Gruner");
-    A.w.writeAttributedString("type", 2, "type=\"OSIS\"", "Bible");
-    A.w.writeAttributedString("identifier", 2, "type=\"OSIS\"", "Bible.he.WLC.2004");
-    A.w.writeAttributedString("rights", 2, "type=\"x-copyright\"", 
-    	"The WLC is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)");
-    A.w.writeString("scope", 2, "Hebrew Bible, Old Testament");
-    A.w.writeString("refSystem", 2, "MT");
-        
-    A.w.closeTag("work", 1);
-    
-    A.w.closeTag("header", 0);
-    
-//     A.w.writeString("hebrewname", 1, H.Tnk) ;
-//     A.w.writeString("title", 1, A.Title) ;
-//     A.w.writeString("shortdescription", 1, A.ShortDescription)  ;
-//     for (int i =0; i < A.Description.length; i++){
-//         A.w.writeString("description", 1, A.Description[i]) ;
-//         }
-//     //A.w.writeString("date", 1, A.Date) ;
-//     A.w.writeString("transcriptiondate", 1, DateTime) ;
-//     A.w.writeString("copyright", 1, "\u00A9 C. V. Kimball 2004") ;
-//     A.w.writeString("filename", 1, A.InputFilename) ;
-//     
-//     A.w.closeTag("header", 0) ;
-    }
-
-//==============================================================================
-
-public static void writeFooter(WLC2OSIS A, XMLWriter w) {
-        
-    A.w.closeTag("osisText", 0);
-   
-    }
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-}//==============================================================================

Deleted: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Utilities/FileRead.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Utilities/FileRead.java	2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Utilities/FileRead.java	2006-07-07 16:50:30 UTC (rev 68)
@@ -1,106 +0,0 @@
-package WLC2OSIS.Utilities ;
-
-import WLC2OSIS.* ;
-import Utilities.FileChooser ;
-//import Utilities.Message ;
-
-import java.io.* ;
-import javax.swing.* ;
-import java.awt.* ;
-//==============================================================================
-/**
- *  <b>Reads the input file.</b>
- */
-//==============================================================================
-public class FileRead{
-
-
-WLC2OSIS A ;
-int InputLength ; 
-public byte[] InputBuffer ;
-File F ;
-FileInputStream FIS ;
-boolean Error ;
-//-----------------------------------------------------------------------------
-
-public FileRead(WLC2OSIS A ) {
-    this.A = A ;
-    InputBuffer = new byte[A.InputBufferSize] ;
-    Error = false ;
-    }    
-//------------------------------------------------------------------------------
-
-// Reads the specified file, forming the char[] InputChars and StringBuffer Input.
-
-public void read(String Filename){
-    Error = true ;
-        
-// Open the file
-
-    F = new File(Filename) ;
-    
-    try{
-        FIS = new FileInputStream(F);
-        if (FIS.available() > InputBuffer.length){
-            System.out.println(
-                "The input file length, " + FIS.available() 
-                    + " bytes,\nis too long for the internal buffer of "
-                        + InputBuffer.length + " bytes.") ;
-            return ;
-            }
-        }
-    catch(IOException e){
-        System.out.println(
-            "FileRead.read: Error in opening FileInputStream.\n\n"
-            + F.getPath() + "\n\n"
-            + e.toString() + "\nNo further action taken.") ;
-        return ;
-        }
-//------------------------------------------------------------------------------
-
-// Read the file
-
-    try{
-        InputLength = FIS.read(InputBuffer) ;
-        }
-    catch(IOException e){
-        System.out.println(
-            "FileRead.read: Error on read of input file.\n\n"
-            + F.getPath() + "\n\n"
-            + e.toString() + "\nNo further action taken.") ;
-        return;
-        }
-
-// Close the file.
-        
-    try{
-        FIS.close() ; 
-        }
-    catch(IOException e){
-        System.out.println(
-            "FileRead.read: Error on close of input file.\n\n"
-            + F.getPath() + "\n\n"
-            + e.toString() + "\nNo further action taken.") ;
-        }              
-    
-// Convert bytes to char[] array.
-
-    A.InputChars = new char[InputLength] ;
-    for (int k = 0; k < InputLength; k++){
-        short shrt = (short) InputBuffer[k] ;
-        A.InputChars[k] = (char) shrt ;
-        }
-
-    Error = false ;
-    }
-//------------------------------------------------------------------------------
-
-// Gets the error condition.
-
-public boolean getError(){
-    return Error ;
-    }
-//-----------------------------------------------------------------------------
-}
-//==============================================================================
-//==============================================================================

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java	2006-07-06 20:03:39 UTC (rev 67)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java	2006-07-07 16:50:30 UTC (rev 68)
@@ -70,10 +70,12 @@
 
 //-----------------------------------------------------------------------------
 
-public final int InputBufferSize = 40000000 ; // Length of input in bytes.
+//public final int InputBufferSize = 40000000 ; // Length of input in bytes.
 public char[] InputChars ;  // Input char[] array set by FileRead.
 public Parser p ;
-public XMLWriter w ;
+public XMLWriter wlc;
+public XMLWriter morph;
+
 public Fmt F = new Fmt() ;
 // public boolean StandAlone = true ;  // If used by another app, 
  
@@ -82,21 +84,43 @@
     InputFilename = file ;
     OutputDirectory = directory ;    
 
-// Identifying printouts
-
     F.bar("=", 80) ;
     System.out.println("\nWLC2OSIS: " + Title + " " + ShortDescription ) ;
     System.out.println("\nInput file:       " + InputFilename ) ;
-//    System.out.println(  "Input file date:  " + Date ) ;
     System.out.println("\nOutput directory: " + OutputDirectory ) ;
         
-    FileRead FR = new FileRead(this) ;
-    FR.read(InputFilename) ;  
+// Read, parse, and write the book files.
+
+    wlc = new XMLWriter(A.OutputDirectory, "wlc_osis") ;
+    morph = new XMLWriter(A.OutputDirectory, "morph_osis") ;
+
+
+    wlc.openTag("osisText osisIDWork=\"WLC\" osisRefWork=\"bible\" xml:lang=\"he\"", 0) ;
+    wlc.openTag("header", 0) ;
     
-// Read, parse, and write the book files.
+    wlc.openTag("work osisWork=\"WLC\"", 1) ;
+    
+    wlc.writeString("title", 2, "Westminster Leningrad Codex");
+    wlc.writeAttributedString("contributor", 2, "role=\"encoder\"", "Martin Gruner");
+    wlc.writeAttributedString("type", 2, "type=\"OSIS\"", "Bible");
+    wlc.writeAttributedString("identifier", 2, "type=\"OSIS\"", "Bible.he.WLC.2004");
+    wlc.writeAttributedString("rights", 2, "type=\"x-copyright\"", 
+      "The WLC is maintained by the Westminster Hebrew Institute, Philadelphia, PA (http://whi.wts.edu/WHI)");
+    wlc.writeString("scope", 2, "Hebrew Bible, Old Testament");
+    wlc.writeString("refSystem", 2, "MT");
+       
+    wlc.closeTag("work", 1);
+    wlc.closeTag("header", 0);
+
+
+
+
+
     p = new Parser(this) ;
     p.parse() ;    
 
+    wlc.closeTag("osisText", 0);
+
     done() ;
     } 
 



More information about the sword-cvs mailing list