[sword-svn] r79 - in trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS: Parse Translate

mgruner at www.crosswire.org mgruner at www.crosswire.org
Fri Sep 15 09:40:26 MST 2006


Author: mgruner
Date: 2006-09-15 09:40:20 -0700 (Fri, 15 Sep 2006)
New Revision: 79

Modified:
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
Log:
about ready
will soon send Kirk a demo OSIS file


Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-09-14 19:58:01 UTC (rev 78)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Parser.java	2006-09-15 16:40:20 UTC (rev 79)
@@ -49,15 +49,14 @@
 	int oldWordNumber = 0;
 	int newWordNumber = 0;
 
-//																		book	chap	vs	  word#	subword#  note		ketivquere			word homonym lang  lemma
-    java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s([*]+)?([^* ]+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
+//																		book	chap	vs	  word#	subword#  note		word	lemma	homonym lang  morph
+    java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\w\\w)(\\d+):(\\d+),(\\d+)\\.(\\d+)(\\]\\S)?\\s(\\S+)\\s([^_]+)(_\\d+)?(@|%)(\\S+)");
      
     while ( true ){
     	s="";
     	try{
-	    	s= file.readLine();
-	    	system.out.println(s);
-	    	continue;
+	    	s = file.readLine();
+	    	System.out.println(s);
 	    }
 	  	catch (IOException e) {
 			System.out.println("Read error: " + e) ;
@@ -96,18 +95,15 @@
 		newWordNumber	= Integer.parseInt( match.group(4) );
 		//newSubWordNumber = Integer.parseInt( match.group(5) ); not used
 		String note = match.group(6);
-		String ketivqere = match.group(7);
-		String word = match.group(8);
-		String lemma	= match.group(9);
-		String homonym = match.group(10);
+		String word = match.group(7);
+		String lemma	= match.group(8);
+		String homonym = match.group(9);
 		if (homonym != null){
 			homonym = homonym.substring(1); //"_1" to "1"
 		}
-		String separator = match.group(11);
-		String morph  = match.group(12);
+		String separator = match.group(10);
+		String morph  = match.group(11);
 		
-		System.out.println(s);
-		
 		// Verse changed, close old and open new
 		if ((!newBookCode.equals(oldBookCode)) || (newChapter != oldChapter) || (newVerse != oldVerse))
 		{
@@ -148,28 +144,37 @@
 		
 		//special case: nonprinting article, leave out for now
 		// TODO: FIX
-		if (word.equals("_")){
+		if (word.equals("_") || word.equals("*_") || word.equals("**_")){
 			continue;
 		}
-		
-//		System.out.println("s: " + s);
-		
+		//Qere / Ketiv only
+		else if (word.equals("**qq")){
+			A.writer.appendText("**<note type=\"textual\" xml:lang=\"en\">Ketiv without Quere.</note>");
+			continue;
+		}
+		else if (word.equals("*kk")){
+			A.writer.appendText("*<note type=\"textual\" xml:lang=\"en\">Qere without Ketiv.</note>");
+			continue;
+		}
+
 		// Paragraph marker found
 		if (morph.compareTo("x") == 0){
 			if (word.compareTo("P") == 0){ //
-				//A.writer.appendText("  "+constructWord(word, lemma, homonym, morph)+"<p/>");
+				A.writer.appendText("  "+constructWord(word, lemma, homonym, morph)+"<p/>");
 			}
 			else if (word.compareTo("S") == 0){ //
-				//A.writer.appendText("  "+constructWord(word, lemma, homonym, morph)+"  ");
+				A.writer.appendText("  "+constructWord(word, lemma, homonym, morph)+"  ");
 			}
 			else if (word.compareTo("N") == 0){ //inverted nun
-				//A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
+				A.writer.appendText(" "+constructWord(word, lemma, homonym, morph)+" ");
 			}
 			else {System.out.println("Unknown paragraph marker: " + s); System.exit(1);}
 		}
 		
+
+		
 		//now the text itself
-		//A.writer.appendText( constructWord(word, lemma, homonym, morph) );
+		A.writer.appendText( constructWord(word, lemma, homonym, morph) );
 		
 		//Note found
 		if (note != null && note.length() > 0){
@@ -185,12 +190,12 @@
 //----------------------------------------------------------------------------
 
 public String constructWord(String word, String lemma, String homonym, String morph){
-	String result = "<seg type=\"x-morph\" lemma=\""+T.translate(lemma) + "\" ";
+	String result = "<seg type=\"x-morph\" lemma=\""+T.convertCompoundWord(lemma) + "\" ";
 	if (homonym != null) {
 		result += "homonym=\""+homonym + "\" ";
 	}
 	result += "morph=\""+morph+"\">";
-	result += T.translate(word)+"</seg>";
+	result += T.convertCompoundWord(word)+"</seg>";
 	return result;
 }
 

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2006-09-14 19:58:01 UTC (rev 78)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2006-09-15 16:40:20 UTC (rev 79)
@@ -12,15 +12,16 @@
 //==============================================================================
 public class Translate{
 
-Parser P ;
-WLC2OSIS A ;
+private
+	Parser P ;
+	WLC2OSIS A ;
 
-MCO M ;
-MCO Mark ;
-Vector MCOs, OrderedMCOs ;
-int Type, I, k1, len ;
-int[] ConsonantPositions = new int[100] ;
-char c, c1 ;
+	MCO M;
+	MCO Mark ;
+	char c, c1 ;
+	int Type, I, k1, len ;
+	Vector MCOs, OrderedMCOs ;
+	int[] ConsonantPositions = new int[100] ;
 //-----------------------------------------------------------------------------
 
 public Translate(WLC2OSIS A, Parser P) {
@@ -29,11 +30,34 @@
     }    
 //------------------------------------------------------------------------------
 
+public String convertCompoundWord(String W){
+	if (W.contains("~")){ //compound word without maqqef
+		String[] tmp = W.split("~");
+		return convertWord(tmp[0]) + " " + convertWord(tmp[1]);
+	}
+	else if (W.contains("-") && !W.endsWith("-")){ //compound word with maqqef
+		String[] tmp = W.split("-");
+		return convertWord(tmp[0]) + convertWord("-") + convertWord(tmp[1]);
+	}
+	else{
+		return convertWord(W);
+	}
+}
+
+public String convertWord(String W){
+	if (W.startsWith("**"))
+		return "**<note type=\"textual\" xml:lang=\"en\">Quere.</note>"+convertChars(W.substring(2));
+	else if (W.startsWith("*"))
+		return "*<note type=\"textual\" xml:lang=\"en\">Ketiv.</note>"+convertChars(W.substring(1));
+	else return convertChars(W);
+}
+
 // Translates an MC word (not qere or ketib) to a Unicode String.
 // Notes are included as <note type="textual">text of note</note>.
 
-public String translate(String W){
-    len = W.length() ;
+public String convertChars(String W){
+
+	len = W.length() ;
         
 // Convert characters in String to MCO objects, expanding
 // ConsonantMarks, Numbers, and Notes as necessary.
@@ -41,9 +65,8 @@
 
     MCOs = new Vector() ;
     for (int k = 0; k < len; k++) {
-        c = W.charAt(k) ;
+        c = W.charAt(k) ;        
         
-        
         M = (MCO) (MC.getMCO(c)).clone() ;
         Type = M.Type ;
 
@@ -53,6 +76,7 @@
                 + "\nWord: " + W 
                 + "\nCharacter: " + c
                 + "\n                                     " ) ;
+            System.exit(1);
              }
         else if(Type <= 5){  // These types need no expansion.
             MCOs.add(M) ;




More information about the sword-cvs mailing list