[sword-svn] r39 - in trunk/modules: hebrew-wlc/WLC2OSIS/WLC2OSIS hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate mt-lxx-parallel

mgruner at crosswire.org mgruner at crosswire.org
Fri Jun 3 02:41:17 MST 2005


Author: mgruner
Date: 2005-06-03 02:41:16 -0700 (Fri, 03 Jun 2005)
New Revision: 39

Modified:
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
   trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
   trunk/modules/mt-lxx-parallel/prepare_files.cpp
   trunk/modules/mt-lxx-parallel/run.sh
Log:
further work on WLC update. Morphological segmentation works in BibleTime already!



Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java	2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Parse/Words.java	2005-06-03 09:41:16 UTC (rev 39)
@@ -114,13 +114,13 @@
        }
        
     if (Type.charAt(0) == 'w') {
-        A.w.appendText("<seg>" + Out + "</seg> ") ;
+        A.w.appendText(A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " ") ;
     }
     else if (Type.charAt(0) == 'k') {
-        A.w.appendText("[<seg>" + Out + "</seg> " + H.kaf + "] ") ;
+        A.w.appendText("[" + A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " " + H.kaf + "] ") ;
     }
     else if (Type.charAt(0) == 'q') {
-        A.w.appendText("(<seg>" + Out + "</seg> " + H.qof+ ") ") ;
+        A.w.appendText("("+A.MorphologicalSegmentStart + Out + A.MorphologicalSegmentEnd + " " + H.qof+ ") ") ;
     }
     else {
         System.out.println("Warning: unknown word type!");

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/Translate/Translate.java	2005-06-03 09:41:16 UTC (rev 39)
@@ -247,11 +247,13 @@
         if (Type == MCO.Note){
             S = S + "<note type=\"textual\" xml:lang=\"en\">"+ Note.Notes.get( M.Value)+ "</note>";
             }
+				//Mark morph segments when a maqef is present
+				else if ( (M.Name).compareTo("maqef") == 0 ){
+						S = S + A.MorphologicalSegmentEnd + M.Value + A.MorphologicalSegmentStart; 
+				}
 	    
-    // MG              DISABLE MORPH DIVISION!!!!!!!!!!!!!!!!
-	    
         else if ((Type == MCO.MorphologicalDivision)){
-//             S = S + A.MorphologicalDivisionMarker ;
+            S = S + A.MorphologicalDivisionMarker ;
             }
         else{
             S = S + M.Value ;

Modified: trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java
===================================================================
--- trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java	2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/hebrew-wlc/WLC2OSIS/WLC2OSIS/WLC2OSIS.java	2005-06-03 09:41:16 UTC (rev 39)
@@ -67,7 +67,10 @@
    "The book names in English and Hebrew of the Jewish Publication Society "
  + "(JPS) Tanach have been added."} ;  
 
-//public char MorphologicalDivisionMarker = '/' ;
+public String MorphologicalSegmentStart  = "<seg type=\"morph\">" ;
+public String MorphologicalSegmentEnd    = "</seg>" ;
+public String MorphologicalDivisionMarker = MorphologicalSegmentEnd + MorphologicalSegmentStart;
+
 //-----------------------------------------------------------------------------
 
 public final int InputBufferSize = 4000000 ; // Length of input in bytes.

Modified: trunk/modules/mt-lxx-parallel/prepare_files.cpp
===================================================================
--- trunk/modules/mt-lxx-parallel/prepare_files.cpp	2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/mt-lxx-parallel/prepare_files.cpp	2005-06-03 09:41:16 UTC (rev 39)
@@ -20,16 +20,21 @@
 void processDaniel();
 bool processverseDaniel(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse);
 
+void processJudges();
+bool processverseJudges(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse);
+
+
 void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse);
 int  strfind(const char * source, const char * str, int pos);
 void strcatrange(const char * source, char * destination, int start, int size);
 
-char bufa[500000], bufb[500000], dest[100000];
+char bufa[500000], bufb[500000], dest[200000];
 
 int main(int argc, char * argv[], char * envp[])
 {
 	processJoshua();
 	processDaniel();
+	processJudges();
 	
 	return 1;
 }
@@ -60,7 +65,7 @@
 
 	output = fopen("Joshua_processed.par", "w+");
 	if (!output){
-		printf("Unable to open/create Joshua.par\n");
+		printf("Unable to open/create Joshua_processed.par\n");
 		return ;
 	}
 
@@ -113,7 +118,7 @@
 
 	output = fopen("Daniel_processed.par", "w+");
 	if (!output){
-		printf("Unable to open/create Daniel.par\n");
+		printf("Unable to open/create Daniel_processed.par\n");
 		return;
 	}
 
@@ -139,6 +144,60 @@
 	printf("\nFinished Daniel\n");
 }
 
+void processJudges(){
+	FILE * input, * output;
+	int chapter, verse;
+
+	input = fopen("08.JudgesB.par", "r");
+	
+	if (!input){
+		printf("Unable to open 08.JudgesB.par\n");
+		return;
+	}
+	readfile(input, bufa, false);
+	fclose(input);
+	
+	input = fopen("09.JudgesA.par", "r");
+	if (!input){
+		printf("Unable to open 09.JudgesA.par\n");
+		return;
+	}
+	readfile(input, bufb, false);
+	fclose(input);
+
+	output = fopen("Judges_processed.par", "w+");
+
+	if (!output){
+		printf("Unable to open/create Judges_processed.par\n");
+		return;
+	}
+
+	chapter = 1;
+	verse = 1;
+	
+	// Break only when no entries for the current chapter can be found in either file.
+	while (checkforchapter(bufa, "JudgB", chapter) == 1 || checkforchapter(bufb, "JudgA", chapter)){
+
+		while (verse < 200){
+			// It is possible that both files may fail to include the current verse,
+			// to avoid premature termination of the process try to find 1-200.
+			dest[0] = '\0';
+
+			if (processverseJudges(bufb, bufa, dest, chapter, verse)){
+				fputs(dest, output);
+				printf("%i:%i\n", chapter, verse);
+			}
+			verse ++;
+		}
+		chapter ++;
+		verse = 1;
+	}
+		
+	fclose(output);
+	printf("\nFinished Judges\n");
+}
+
+
 void readfile(FILE * fs, char * destination, bool bfix)
 {
 	// Read a source file completely into memory.
@@ -264,6 +323,48 @@
 	return true;
 }
 
+bool processverseJudges(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse)
+{
+	char title[32], *posa, *posb;
+
+	// Check each buffer to see if it contains the current verse.
+	sprintf(title, "JudgA %i:%i\n", chapter, verse);
+	posa = strstr(sourcea, title);
+
+	sprintf(title, "JudgB %i:%i\n", chapter, verse);
+	posb = strstr(sourceb, title);
+
+	if (!posa && !posb)
+		return false;
+
+	sprintf(destination, "Judg %i:%i", chapter, verse);
+
+	if (posa){
+		// If both files contain the verse identify which
+		// file it came from.
+		if (posb)
+			strcat(destination, "\nCodex Alexandrinus:");
+
+		sprintf(title, "JudgA %i:%i\n", chapter, verse);
+
+		// The file may contain multiple entries for the verse,
+		// collectverses will grab all entries.
+		collectverses(sourcea, destination, "JudgA", title, chapter, verse);
+	}
+
+	if (posb){
+		// As above.
+		if (posa)
+			strcat(destination, "\nCodex Vaticanus:");
+
+		sprintf(title, "JudgB %i:%i\n", chapter, verse);
+		collectverses(sourceb, destination, "JudgB", title, chapter, verse);
+	}
+
+	strcat(destination, "\n");
+	return true;
+}
+
 void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse)
 {
 	// Find all verses that have the specified id in source,
@@ -291,9 +392,9 @@
 
 		hits ++;
 	}
-
 }
 
+
 int strfind(const char * source, const char * str, int pos)
 {
 	// Get the index position of from strstr instead of a memory pointer.

Modified: trunk/modules/mt-lxx-parallel/run.sh
===================================================================
--- trunk/modules/mt-lxx-parallel/run.sh	2005-06-01 20:02:04 UTC (rev 38)
+++ trunk/modules/mt-lxx-parallel/run.sh	2005-06-03 09:41:16 UTC (rev 39)
@@ -60,5 +60,5 @@
 cd $TEMP_DIR; 
 prepare_files;
 #These are not needed in TEMP_DIR any more
-rm "06.JoshB.par" "07.JoshA.par" "45.DanielOG.par" "46.DanielTh.par" "prepare_files"
+rm "06.JoshB.par" "07.JoshA.par" "08.JudgesB.par" "09.JudgesA.par" "45.DanielOG.par" "46.DanielTh.par" "prepare_files"
 



More information about the sword-cvs mailing list