[sword-svn] r1812 - in trunk/src/modules: . filters

scribe at crosswire.org scribe at crosswire.org
Mon May 16 02:34:51 MST 2005


Author: scribe
Date: 2005-05-16 02:34:50 -0700 (Mon, 16 May 2005)
New Revision: 1812

Modified:
   trunk/src/modules/filters/gbfstrongs.cpp
   trunk/src/modules/filters/osisstrongs.cpp
   trunk/src/modules/filters/osiswordjs.cpp
   trunk/src/modules/filters/thmlstrongs.cpp
   trunk/src/modules/swmodule.cpp
Log:
Fixed and made consistent all lemma and morph parsing


Modified: trunk/src/modules/filters/gbfstrongs.cpp
===================================================================
--- trunk/src/modules/filters/gbfstrongs.cpp	2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/filters/gbfstrongs.cpp	2005-05-16 09:34:50 UTC (rev 1812)
@@ -58,17 +58,17 @@
 		}
 		if (*from == '>') {	// process tokens
 			intoken = false;
-
 			if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) {	// Strongs
 				if (module->isProcessEntryAttributes()) {
 					valto = val;
-					for (unsigned int i = 2; ((token[i]) && (i < 150)); i++)
+					for (unsigned int i = 1; ((token[i]) && (i < 150)); i++)
 						*valto++ = token[i];
 					*valto = 0;
 					if (atoi((!isdigit(*val))?val+1:val) < 5627) {
 						// normal strongs number
 						sprintf(wordstr, "%03d", word++);
-						module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+						module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
+						module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
 						tmp = "";
 						tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
 						module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
@@ -78,8 +78,10 @@
 						// verb morph
 						sprintf(wordstr, "%03d", word-1);
 						module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+						module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
 					}
 				}
+
 				if (!option) {
 					if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
 						if (lastspace)
@@ -89,6 +91,18 @@
 					continue;
 				}
 			}
+			if (module->isProcessEntryAttributes()) {
+				if ((*token == 'W') && (token[1] == 'T')) {	// Strongs
+					valto = val;
+					for (unsigned int i = 2; ((token[i]) && (i < 150)); i++)
+						*valto++ = token[i];
+					*valto = 0;
+					sprintf(wordstr, "%03d", word-1);
+					module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "GBFMorph";
+					module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+					newText = true;
+				}
+			}
 			// if not a strongs token, keep token in text
 			text += '<';
 			text += token;

Modified: trunk/src/modules/filters/osisstrongs.cpp
===================================================================
--- trunk/src/modules/filters/osisstrongs.cpp	2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/filters/osisstrongs.cpp	2005-05-16 09:34:50 UTC (rev 1812)
@@ -8,7 +8,8 @@
 #include <stdlib.h>
 #include <osisstrongs.h>
 #include <swmodule.h>
-#include <ctype.h>
+#include <versekey.h>
+#include <utilxml.h>
 
 SWORD_NAMESPACE_START
 
@@ -33,11 +34,12 @@
 	int tokpos = 0;
 	bool intoken = false;
 	bool lastspace = false;
-	int word = 1;
+	int wordNum = 1;
 	char val[128];
 	char wordstr[5];
 	char *valto;
 	char *ch;
+	const char *wordStart = 0;
 
 	const SWBuf orig = text;
 	const char * from = orig.c_str();
@@ -54,53 +56,67 @@
 		if (*from == '>') {	// process tokens
 			intoken = false;
 			if ((*token == 'w') && (token[1] == ' ')) {	// Word
-				*wordstr = 0;
 				if (module->isProcessEntryAttributes()) {
-					valto = val;
-					char *num = strstr(token, "lemma=\"x-Strongs:");					
-					int strongMarkerLength = 17;
-					if (!num) { //try alternative strong marker value
-						num = strstr(token, "lemma=\"strong:");
-						strongMarkerLength = 14;
+					wordStart = from+1;
+					char gh = 0;
+					VerseKey *vkey = 0;
+					if (key) {
+						vkey = SWDYNAMIC_CAST(VerseKey, key);
 					}
+					XMLTag wtag(token);
+					SWBuf lemma      = wtag.getAttribute("lemma");
+					SWBuf morph      = wtag.getAttribute("morph");
+					SWBuf src        = wtag.getAttribute("src");
+					SWBuf morphClass = "";
+					SWBuf lemmaClass = "";
 
-					if (num) {
-						for (num+=strongMarkerLength; ((*num) && (*num != '\"')); num++) {
-							*valto++ = *num;
-						}
-						*valto = 0;
-						
-						if (atoi((!isdigit(*val))?val+1:val) < 5627) {
-							// normal strongs number
-							sprintf(wordstr, "%03d", word++);
-							module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
-							
-							//now try to find the end tag to get the text between <w> and </w> to set the entry attribute
-							
-							const char* startTagEnd = strstr(from, ">"); //end of the opening tag
-							if (startTagEnd) {
-								startTagEnd++;
-								
-								const char* endTagStart = strstr(startTagEnd, "</w>"); //end of the opening tag
-								if (endTagStart && endTagStart > startTagEnd) { //content in between
-									SWBuf tmp;
-									tmp.append(startTagEnd, endTagStart - startTagEnd);
-									module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
-								}
+
+					const char *m = strchr(morph.c_str(), ':');
+					if (m) {
+						int len = m-morph.c_str();
+						morphClass.append(morph.c_str(), len);
+						morph << len+1;
+					}
+					m = strchr(lemma.c_str(), ':');
+					if (m) {
+						int len = m-lemma.c_str();
+						lemmaClass.append(lemma.c_str(), len);
+						lemma << len+1;
+					}
+
+					if ((lemmaClass == "x-Strongs") || (lemmaClass == "strong")) {
+						gh = isdigit(lemma[0]) ? 0:lemma[0];
+						if (!gh) {
+							if (vkey) {
+								gh = vkey->Testament() ? 'H' : 'G';
 							}
 						}
-						else {
-							// verb morph
-							sprintf(wordstr, "%03d", word-1);
-							module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
-						}
+						else lemma << 1;
+						lemmaClass = "strong";
 					}
-				}
-				if (wordstr) {
+					if ((morphClass == "x-Robinsons") || (morphClass == "x-Robinson") || (morphClass == "Robinson")) {
+						morphClass = "robinson";
+					}
+
+					sprintf(wordstr, "%03d", wordNum);
+					if (gh) lemma.insert(0,gh);
+					if (lemma.length())
+					module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma;
+					if (lemmaClass.length())
+					module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass;
+					if (morph.length())
+					module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph;
+					if (morphClass.length())
+					module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass;
+					if (src.length())
+						module->getEntryAttributes()["Word"][wordstr]["Src"] = src;
 					strcat(token, " wn=\"");
 					strcat(token, wordstr);
 					strcat(token, "\"");
+
+					wordNum++;
 				}
+
 				if (!option) {
 					char *num = strstr(token, "lemma=\"x-Strongs:");
 					if (num) {
@@ -114,6 +130,17 @@
 					}
 				}
 			}
+			if ((*token == '/') && (token[1] == 'w')) {	// Word End
+				if (module->isProcessEntryAttributes()) {
+					if (wordStart) {
+						SWBuf tmp;
+						tmp.append(wordStart, (from-wordStart)-3);
+						sprintf(wordstr, "%03d", wordNum-1);
+						module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+					}
+				}
+				wordStart = 0;
+			}
 			
 			// if not a strongs token, keep token in text
 			text.append('<');

Modified: trunk/src/modules/filters/osiswordjs.cpp
===================================================================
--- trunk/src/modules/filters/osiswordjs.cpp	2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/filters/osiswordjs.cpp	2005-05-16 09:34:50 UTC (rev 1812)
@@ -125,11 +125,8 @@
 					const char *m = strchr(morph.c_str(), ':');
 					if (m) m++;
 					else m = morph.c_str();
-					text.appendFormatted("<span onclick=\"p(\'%s\', \'%s\', '%s', '%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m);
+					text.appendFormatted("<span onclick=\"p(\'%s\',\'%s\','%s','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m);
 					wordNum++;
-
-
-
 				}
 				if ((*token == '/') && (token[1] == 'w') && option) {	// Word
 					text += "</w></span>";

Modified: trunk/src/modules/filters/thmlstrongs.cpp
===================================================================
--- trunk/src/modules/filters/thmlstrongs.cpp	2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/filters/thmlstrongs.cpp	2005-05-16 09:34:50 UTC (rev 1812)
@@ -68,8 +68,9 @@
 					*valto = 0;
 					if (atoi((!isdigit(*val))?val+1:val) < 5627) {
 						// normal strongs number
-						sprintf(wordstr, "%03d", word++);
-						module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+						sprintf(wordstr, "%03d", word);
+						module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
+						module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
 						tmp = "";
 						tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
 						module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
@@ -77,9 +78,11 @@
 					}
 					else {
 						// verb morph
-						sprintf(wordstr, "%03d", word-1);
+						sprintf(wordstr, "%03d", word);
 						module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+						module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
 					}
+					word++;
 				}
 
 				if (!option) {	// if we don't want strongs
@@ -100,6 +103,9 @@
 								*valto++ = ch[i];
 							*valto = 0;
 							sprintf(wordstr, "%03d", word-1);
+							if ((!stricmp(val, "Robinsons")) || (!stricmp(val, "Robinson"))) {
+								strcpy(val, "robinson");
+							}
 							module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val;
 						}
 						if (!strncmp(ch, "value=\"", 7)) {
@@ -111,6 +117,7 @@
 							module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
 						}
 					}
+					newText = true;
 				}
 			}
 			// if not a strongs token, keep token in text

Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp	2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/swmodule.cpp	2005-05-16 09:34:50 UTC (rev 1812)
@@ -373,7 +373,7 @@
  *				>=0 - regex
  *				-1  - phrase
  *				-2  - multiword
- *				-3  - entryAttrib (eg. Word//Strongs/G1234/)
+ *				-3  - entryAttrib (eg. Word//Lemma/G1234/)
  *				-4  - clucene
  * 	flags		- options flags for search
  *	justCheckIfSupported	- if set, don't search, only tell if this
@@ -976,7 +976,7 @@
 			words = getEntryAttributes().find("Word");
 			if (words != getEntryAttributes().end()) {
 				for (word = words->second.begin();word != words->second.end(); word++) {
-					strongVal = word->second.find("Strongs");
+					strongVal = word->second.find("Lemma");
 					if (strongVal != word->second.end()) {
 						// cheeze.  skip empty article tags that weren't assigned to any text
 						if (strongVal->second == "G3588") {
@@ -994,7 +994,7 @@
 			doc->add( Field::UnIndexed(_T("key"), keyText ) );
 			doc->add( Field::UnStored(_T("content"), content) );
 			if (strong.length() > 0)
-				doc->add( Field::UnStored(_T("strong"), strong) );
+				doc->add( Field::UnStored(_T("lemma"), strong) );
 			writer->addDocument(*doc);
 			delete doc;
 		}



More information about the sword-cvs mailing list