[sword-svn] r72 - in trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS: . Parse Translate

mgruner at crosswire.org mgruner at crosswire.org
Mon Jul 10 12:20:17 MST 2006


Author: mgruner
Date: 2006-07-10 12:20:08 -0700 (Mon, 10 Jul 2006)
New Revision: 72

Modified:
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
Log:


Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-07-10 16:56:50 UTC (rev 71)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-07-10 19:20:08 UTC (rev 72)
@@ -19,7 +19,6 @@
 
 public Words w ;
 Translate T ;
-Markers m ;
 public WKQ wkq ;
 
 // Current state
@@ -32,8 +31,11 @@
 public int BookVerseCount ;
 public int BookChapterCount ;
 
-int WordNumber ;
+public String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
+public String MorphologicalSegmentEnd    = "</seg>" ;
+public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
 
+
 BufferedReader file;
 
 
@@ -44,17 +46,12 @@
     T = new Translate(A, this) ;
   
     w = new Words(A, this) ;
-    m = new Markers(A, this) ;
     wkq = new WKQ(this) ;
     new MC() ;
     Note.setNotes();
     }    
-//------------------------------------------------------------------------------
 
-// Gets the next token as a String.
 
-// EOF is indicated by a return of EOF.
-
 public void parse(){
     String s ;
     System.out.println("\n");
@@ -75,7 +72,7 @@
 	int oldSubWordNumber = 0;
 
 
-    java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(?:@|%)(\\S+)");
+    java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
      
     while ( true ){
     	s="";
@@ -115,7 +112,8 @@
 		String note = match.group(6);
 		String expression = match.group(7);
 		String lemma	= match.group(8);
-		String grammar  = match.group(9);
+		String separator = match.group(9);
+		String grammar  = match.group(10);
 		
 		if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
 		{
@@ -125,10 +123,12 @@
 		    		BookName.getBookName(bookNames, newBookCode).abbrev+"."+
 		    		newChapter+"."+
 		    		newVerse+"\"", 2);
-		    oldBookCode = newBookCode;
-		    oldChapter = newChapter;
-		    oldVerse = newVerse;
 		}
+		
+		//same verse, another word, add space
+		if ((oldVerse == newVerse) && (oldWordNumber != newWordNumber)){
+			A.writer.appendText(" ");
+		}
   
 //   Process a word.
               
@@ -143,19 +143,22 @@
                     else if( s.compareTo("S")==0){
                         m.samek() ;
                         }
-                    else if( s.compareTo("?")==0){
-                        m.line() ;       
-                        }
                     else{  // It's a word
                         w.process(s) ;
                         }
                      }*/
-	        }
+	    oldBookCode = newBookCode;
+	    oldChapter = newChapter;
+	    oldVerse = newVerse;
+	    oldWordNumber = newWordNumber;
+	    oldSubWordNumber = newSubWordNumber;
+
+    }
     
     A.writer.closeTag("verse", 2);
     
     return ;
-    }
+}
 //----------------------------------------------------------------------------
 
 // Counts the number of occurences of a character in a String.

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java	2006-07-10 16:56:50 UTC (rev 71)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java	2006-07-10 19:20:08 UTC (rev 72)
@@ -16,19 +16,31 @@
 boolean TrailingMaqaf ;
 String MaqafWord ;
 String MaqafWordType ;
+Markers m ;
 
+
 //-----------------------------------------------------------------------------
 
 public Words(WLC2OSIS A, Parser P ) {
     this.A = A ;
     this.P = P ;
+    m = new Markers(A, P) ;
+
     }    
 //------------------------------------------------------------------------------
 
 // Processes a word,
 public void process(String W){
-        P.wkq.process(W) ;
-    }
+	if( W.compareTo("P")==0){
+        m.pe();
+	}
+	else if( W.compareTo("S")==0){
+    	m.samek() ;
+		}
+	else{
+		P.wkq.process(W);
+	}
+}
 
 
 // Translates and writes a word (simple, ketib, qere) with exception markers.
@@ -43,7 +55,6 @@
 
     if (asteriskcount > 0){
         System.out.print("Words.write: Unexpected KQ character * ") ;
-//        P.printPosition() ;
         System.out.println("Word: " + W) ;
         }
 
@@ -55,9 +66,6 @@
            Word = MaqafWord+Word ;  // Combine them.
            }
        else{
-//           System.out.print("Words: Mismatched types for combining "
-//               + MaqafWordType + ", " + Type + " at " ) ;
-//           P.printPosition() ;
            writeWord(MaqafWord, MaqafWordType) ;
            TrailingMaqaf = false ;
            }
@@ -98,17 +106,18 @@
        }
        
     if (Type.charAt(0) == 'w') {
-        A.writer.appendText(A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " ") ;
+        A.writer.appendText(P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd) ;
     }
     else if (Type.charAt(0) == 'k') {
-        A.writer.appendText("[" + A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " " + H.kaf + "] ") ;
+        A.writer.appendText("[" + P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.kaf + "]") ;
     }
     else if (Type.charAt(0) == 'q') {
-        A.writer.appendText("("+A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " " + H.qof+ ") ") ;
+        A.writer.appendText("("+P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.qof+ ")") ;
     }
     else {
         System.out.println("Warning: unknown word type!");
 //		P.printPosition();
+        System.exit(0);
     }
     P.MarkerWritten = false ;
     }

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2006-07-10 16:56:50 UTC (rev 71)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2006-07-10 19:20:08 UTC (rev 72)
@@ -221,11 +221,11 @@
             }
 		//Mark morph segments when a maqef is present
 		else if ( (M.Name).compareTo("maqef") == 0 ){
-			S = S + A.MorphologicalSegmentEnd + M.Value + A.MorphologicalSegmentStart; 
+			S = S + P.MorphologicalSegmentEnd + M.Value + P.MorphologicalSegmentStart; 
 		}
 	    
         else if ((Type == MCO.MorphologicalDivision)){
-            S = S + A.MorphologicalDivisionMarker ;
+            S = S + P.MorphologicalDivisionMarker ;
             }
         else{
             S = S + M.Value ;

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java	2006-07-10 16:56:50 UTC (rev 71)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java	2006-07-10 19:20:08 UTC (rev 72)
@@ -10,13 +10,8 @@
 */
 //=================================================================================================
 
-public class WLC2OSIS /*implements Stoppable*/ {
+public class WLC2OSIS{
 
-// Definitions of input and output to be set by user.
-
-// public String ProgramDate = "30 May 2004" ;
-public String InputFilename;
-public String OutputDirectory ;
 // Define the title and descriptions.
 public String Title = "The Westminster Leningrad Codex (WLC)" ;
 
@@ -56,15 +51,13 @@
    "The book names in English and Hebrew of the Jewish Publication Society "
  + "(JPS) Tanach have been added."} ;  
 
-public String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
-public String MorphologicalSegmentEnd    = "</seg>" ;
-public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
 
-//-----------------------------------------------------------------------------
-
 public Parser p ;
 public XMLWriter writer;
+public String InputFilename;
+public String OutputDirectory ;
 
+
 public WLC2OSIS( String file, String directory ){
     
     InputFilename = file ;
@@ -101,15 +94,7 @@
     writer.closeTag("osisText", 0);
 	writer.close();
 
-    done() ;
-    } 
-
-//------------------------------------------------------------------------------
-/**
-  *    Universal exit.
-  */
-public void done(){
-    System.out.println("\nwriter2OSIS: Normal end.") ;
-    System.exit(0) ;
-    }
+    System.out.println("\nWLC2OSIS: Normal end.") ;
 }
+
+}
\ No newline at end of file



More information about the sword-cvs mailing list