[sword-svn] r74 - in trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS: Parse Translate

mgruner at crosswire.org mgruner at crosswire.org
Thu Jul 27 12:53:25 MST 2006


Author: mgruner
Date: 2006-07-27 12:53:15 -0700 (Thu, 27 Jul 2006)
New Revision: 74

Removed:
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
Modified:
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
Log:
some work, still not functional

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java	2006-07-11 20:02:44 UTC (rev 73)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Markers.java	2006-07-27 19:53:15 UTC (rev 74)
@@ -27,44 +27,18 @@
 
 // Samek
 public void samek(){ //parasah setumah, closed paragraph == small space in line
-    testMaqafWord() ;
-//     A.wlc.writeMarker("samekh", 4) ;
     A.writer.appendText("   " + H.samekh + "   ") ;
-    P.MarkerWritten = true ;
+//    P.MarkerWritten = true ;
     }
 //------------------------------------------------------------------------------
 
 // Pe
 public void pe(){ // parasah petuhah, open paragraph == new line
-    testMaqafWord() ;
-//     A.writer.writeMarker("pe", 4) ;
     A.writer.appendText(" " + H.pe + " " + "<p/>") ;
-    P.MarkerWritten = true ;
+//    P.MarkerWritten = true ;
     }
 //------------------------------------------------------------------------------
 
-// Line
-public void line(){
-    System.out.println("Markers: End-of-line encountered!") ;
-    }
-//----------------------------------------------------------------------------
-/**
- *  Tests for a preceding trailing maqaf word and writes it. 
- *
- *  Before any marker is written, the TrailingMaqaf flag must be
- *  tested.  If a trailing maqaf word precedes the mark, it must be
- *  written before the marker.
- *
- *  Apparemtly ONLY EOLs cause this test to be activated.
- */
-void testMaqafWord(){
-   if (P.w.TrailingMaqaf){
-//       System.out.print("Markers: Marker follows trailing maqaf at ") ;
-//           P.printPosition() ;
-       P.w.writeWord(P.w.MaqafWord, P.w.MaqafWordType) ;
-       P.w.TrailingMaqaf = false ;
-       }
-   }
 }
 //==============================================================================
 //==============================================================================

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-07-11 20:02:44 UTC (rev 73)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-07-27 19:53:15 UTC (rev 74)
@@ -3,50 +3,22 @@
 import java.io.*;
 import WLC2OSIS.* ;
 import WLC2OSIS.Translate.* ;
-//==============================================================================
-/**
- *  <b>Parser dispatches tokens to  Books, Chapters, Markers, Tanach,
- *     Verses, and Words start/end methods, special to WLC. </b>
- *
- *  Extensively modified for WLC.
- */
-//==============================================================================
+
 public class Parser{
 
 WLC2OSIS A ;
-
-//  Working classes
-
-public Words w ;
 Translate T ;
-public WKQ wkq ;
 
-// Current state
+public final String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
+public final String MorphologicalSegmentEnd    = "</seg>" ;
+public final String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
 
-public boolean MarkerWritten ;  // Indicates a marker has been written
-                                // between two words.
-
-// Assorted counts
-public int ChapterVerseCount ;
-public int BookVerseCount ;
-public int BookChapterCount ;
-
-public String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
-public String MorphologicalSegmentEnd    = "</seg>" ;
-public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
-
-
-BufferedReader file;
-
-
 //-----------------------------------------------------------------------------
 public Parser(WLC2OSIS A, boolean wlc_only) {
     this.A = A ;
 
     T = new Translate(A, this) ;
   
-    w = new Words(A, this) ;
-    wkq = new WKQ(this) ;
     new MC() ;
     Note.setNotes();
     }    
@@ -55,21 +27,29 @@
 public void parse(){
     String s ;
     System.out.println("\n");
-    
+
+    BufferedReader file;
+
    	try{
 	    file = new BufferedReader( new FileReader( A.InputFilename ));
 	}
 	catch (IOException e) {
+		file = null;
 		System.out.println("File not found: " + e) ;
     }
 	
 	BookName[] bookNames = BookName.setBookNames();
 	
 	String oldBookCode = "";
+	String newBookCode = "";
 	int oldChapter = 0;
+	int newChapter = 0;
 	int oldVerse = 0;
+	int newVerse = 0;
 	int oldWordNumber = 0;
+	int newWordNumber = 0;
 	int oldSubWordNumber = 0;
+	int newSubWordNumber = 0;
 
 
     java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s(\\S+)(@|%)(\\S+)");
@@ -103,18 +83,19 @@
 			System.exit(1);
 		}
 		
-// Parse the identifier
-		String newBookCode = match.group(1);
-		int newChapter = Integer.parseInt( match.group(2) );
-		int newVerse   = Integer.parseInt( match.group(3) );
-		int newWordNumber	= Integer.parseInt( match.group(4) );
-		int newSubWordNumber = Integer.parseInt( match.group(5) );
+		// Parse the identifier
+		newBookCode = match.group(1);
+		newChapter = Integer.parseInt( match.group(2) );
+		newVerse   = Integer.parseInt( match.group(3) );
+		newWordNumber	= Integer.parseInt( match.group(4) );
+		newSubWordNumber = Integer.parseInt( match.group(5) );
 		String note = match.group(6);
-		String expression = match.group(7);
+		String word = match.group(7);
 		String lemma	= match.group(8);
 		String separator = match.group(9);
-		String grammar  = match.group(10);
+		String morph  = match.group(10);
 		
+		// Verse changed, close old and open new
 		if ((newBookCode != oldBookCode) || (newChapter != oldChapter) || (newVerse != oldVerse))
 		{
 			if (oldVerse >= 1) A.writer.closeTag("verse", 2);
@@ -130,16 +111,26 @@
 			A.writer.appendText(" ");
 		}
 		
-		System.out.println("Expression: " + expression);
+		//System.out.println("Expression: " + word);
 		
-		w.process(expression);
-  
-	    oldBookCode = newBookCode;
+		// Paragraph marker found
+		if (morph == "x"){
+			System.out.println("paragraph marker found!");
+			if (word == "P"){
+				A.writer.appendText("  "+constructWord(word, lemma, morph)+"<p/>");
+			}
+			else if (word == "S"){
+				A.writer.appendText("  "+constructWord(word, lemma, morph)+"  ");
+			}
+			else {System.out.println("Unknown marker."); System.exit(1);}
+		}
+		
+		//remember
+		oldBookCode = newBookCode;
 	    oldChapter = newChapter;
 	    oldVerse = newVerse;
 	    oldWordNumber = newWordNumber;
 	    oldSubWordNumber = newSubWordNumber;
-
     }
     
     A.writer.closeTag("verse", 2);
@@ -148,18 +139,8 @@
 }
 //----------------------------------------------------------------------------
 
-// Counts the number of occurences of a character in a String.
+public String constructWord(String word, String lemma, String morph){
+	return "<seg type=\"x-morph\" lemma=\""+lemma+"\" morph=\""+morph+"\">"+word+"</seg>";
+}
 
-public int countChar(String W, char c){
-   int Count = 0 ;
-   for(int k=0; k < W.length(); k++){
-       if(W.charAt(k)==c){
-           Count++ ;
-           }
-       }
-   return Count ;
-   }
-//----------------------------------------------------------------------------
-
-
 }

Deleted: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java	2006-07-11 20:02:44 UTC (rev 73)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java	2006-07-27 19:53:15 UTC (rev 74)
@@ -1,127 +0,0 @@
-package WLC2OSIS.Parse ;
-
-import WLC2OSIS.* ;
-import WLC2OSIS.Translate.H ;
-//==============================================================================
-/**
- *  <b>Processes words, sending them to the Translate class
- *  after their word, qere, ketiv properties have been determined.</b>
- */
-//==============================================================================
-public class Words{
-
-WLC2OSIS A ;
-Parser P ;
-
-boolean TrailingMaqaf ;
-String MaqafWord ;
-String MaqafWordType ;
-Markers m ;
-
-
-//-----------------------------------------------------------------------------
-
-public Words(WLC2OSIS A, Parser P ) {
-    this.A = A ;
-    this.P = P ;
-    m = new Markers(A, P) ;
-
-    }    
-//------------------------------------------------------------------------------
-
-// Processes a word,
-public void process(String W){
-	if( W.compareTo("P")==0){
-        m.pe();
-	}
-	else if( W.compareTo("S")==0){
-    	m.samek() ;
-		}
-	else{
-		P.wkq.process(W);
-	}
-}
-
-
-// Translates and writes a word (simple, ketib, qere) with exception markers.
-// All returns leave P.MarkerWritten = false ;
-public void write(String W, String Type) {
-
-   String Word = P.T.translate(W) ;
-   
-// Check for any KQ markers which should NOT be here!
-
-    int asteriskcount = P.countChar(W, '*') ;
-
-    if (asteriskcount > 0){
-        System.out.print("Words.write: Unexpected KQ character * ") ;
-        System.out.println("Word: " + W) ;
-        }
-
-// Look for a case in which there's been trailing maqaf
-// without an intervening marker.
-
-   if(TrailingMaqaf & !P.MarkerWritten ){
-       if(MaqafWordType.charAt(0)== Type.charAt(0) ){
-           Word = MaqafWord+Word ;  // Combine them.
-           }
-       else{
-           writeWord(MaqafWord, MaqafWordType) ;
-           TrailingMaqaf = false ;
-           }
-       }
-   
-// Check for a trailing maqaf.
-// Don't write the word here.
-
-   TrailingMaqaf = false ;
-   if( Word.charAt(Word.length()-1) == H.maqaf){
-       TrailingMaqaf = true ;
-       MaqafWord = Word ;
-       MaqafWordType = Type ;
-       P.MarkerWritten = false ;
-       return ;
-       }
-
-   writeWord(Word, Type) ;
-   } 
-//----------------------------------------------------------------------------------
-
-public void writeWord(String Word, String Type) {
-
-// Check for any exception markers ]x 
-   
-   String Out = "" ;
-   for (int k=0 ; k < Word.length() ; k++){
-       char c = Word.charAt(k) ;
-       if(c == ']'){
-           k++ ;
-           char ExceptionValue = Word.charAt(k) ;
-           Out = Out + "<x>" + ExceptionValue +"</x>" ;
-	   System.out.println("Exception occured");
-           }
-       else{
-           Out = Out + c ;
-           }
-       }
-       
-    if (Type.charAt(0) == 'w') {
-        A.writer.appendText(P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd) ;
-    }
-    else if (Type.charAt(0) == 'k') {
-        A.writer.appendText("[" + P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.kaf + "]") ;
-    }
-    else if (Type.charAt(0) == 'q') {
-        A.writer.appendText("("+P.MorphologicalSegmentStart + Out + P.MorphologicalSegmentEnd + " " + H.qof+ ")") ;
-    }
-    else {
-        System.out.println("Warning: unknown word type!");
-//		P.printPosition();
-        System.exit(0);
-    }
-    P.MarkerWritten = false ;
-    }
-}
-
-//==============================================================================
-//==============================================================================

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2006-07-11 20:02:44 UTC (rev 73)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2006-07-27 19:53:15 UTC (rev 74)
@@ -30,7 +30,7 @@
 //------------------------------------------------------------------------------
 
 // Translates an MC word (not qere or ketib) to a Unicode String.
-// Notes are included as <note type="textual">text of note</x>.
+// Notes are included as <note type="textual">text of note</note>.
 
 public String translate(String W){
     len = W.length() ;



More information about the sword-cvs mailing list