[sword-svn] r1946 - in trunk: include src/modules/filters src/utilfuns utilities utilities/diatheke

Sat Jul 15 13:41:30 MST 2006

Author: scribe
Date: 2006-07-15 13:41:24 -0700 (Sat, 15 Jul 2006)
New Revision: 1946

Modified:
   trunk/include/osishtmlhref.h
   trunk/include/osisrtf.h
   trunk/include/osiswebif.h
   trunk/include/utilxml.h
   trunk/src/modules/filters/osishtmlhref.cpp
   trunk/src/modules/filters/osisplain.cpp
   trunk/src/modules/filters/osisrtf.cpp
   trunk/src/modules/filters/osiswebif.cpp
   trunk/src/utilfuns/utilxml.cpp
   trunk/utilities/diatheke/osiscgi.cpp
   trunk/utilities/osis2mod.cpp
Log:
Updated support for more OSIS markup, including q marker (DM Smith)
Basic filter cleanups


Modified: trunk/include/osishtmlhref.h
===================================================================

--- trunk/include/osishtmlhref.h	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/include/osishtmlhref.h	2006-07-15 20:41:24 UTC (rev 1946)
@@ -34,7 +34,12 @@
 	public:
 		bool osisQToTick;
 		bool inBold;
+		bool inQuote;
 		bool inName;
+		bool providesQuote;
+		SWBuf quoteMark;
+		SWBuf wordsOfChristStart;
+		SWBuf wordsOfChristEnd;
 		SWBuf lastTransChange;
 		SWBuf w;
 		SWBuf fn;

Modified: trunk/include/osisrtf.h
===================================================================
--- trunk/include/osisrtf.h	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/include/osisrtf.h	2006-07-15 20:41:24 UTC (rev 1946)
@@ -37,6 +37,8 @@
 		bool BiblicalText;
 		bool inXRefNote;
 		bool inQuote;
+		bool providesQuote;
+		SWBuf quoteMark;
 		SWBuf w;
 		SWBuf version;
 		MyUserData(const SWModule *module, const SWKey *key);

Modified: trunk/include/osiswebif.h
===================================================================
--- trunk/include/osiswebif.h	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/include/osiswebif.h	2006-07-15 20:41:24 UTC (rev 1946)
@@ -34,6 +34,7 @@
 
 protected:
 	virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData);
+	virtual BasicFilterUserData *createUserData(const SWModule *module, const SWKey *key);
 public:
 	OSISWEBIF();
 	void setJavascript(bool mode) { javascript = mode; }

Modified: trunk/include/utilxml.h
===================================================================
--- trunk/include/utilxml.h	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/include/utilxml.h	2006-07-15 20:41:24 UTC (rev 1946)
@@ -65,6 +65,8 @@
 		if (!parsed)
 			parse();
 		empty = value;
+		if (value)
+			endTag = false;
 	}
 
 	inline bool isEndTag() const { return endTag; }

Modified: trunk/src/modules/filters/osishtmlhref.cpp
===================================================================
--- trunk/src/modules/filters/osishtmlhref.cpp	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/src/modules/filters/osishtmlhref.cpp	2006-07-15 20:41:24 UTC (rev 1946)
@@ -9,8 +9,7 @@
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
+ *   the Free Software Foundation version 2 of the License.                *
  *                                                                         *
  ***************************************************************************/
 
@@ -26,6 +25,9 @@
 
 
 OSISHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+	providesQuote = false;
+	wordsOfChristStart = "<font color=\"red\"> ";
+	wordsOfChristEnd   = "</font> ";
 	if (module) {
 		osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
 		version = module->Name();
@@ -46,18 +48,25 @@
 
 	setEscapeStringCaseSensitive(true);
 
-	addEscapeStringSubstitute("amp",  "&");
-	addEscapeStringSubstitute("apos", "'");
-	addEscapeStringSubstitute("lt",   "<");
-	addEscapeStringSubstitute("gt",   ">");
-	addEscapeStringSubstitute("quot", "\"");
-	
+//   commenting these out.  If someone is sure we shouldn't
+//   convert these since we are outputing to a markup that
+//   recognizes them, then please delete these lines
+//   addEscapeStringSubstitute("amp",  "&");
+//   addEscapeStringSubstitute("apos", "'");
+//   addEscapeStringSubstitute("lt",   "<");
+//   addEscapeStringSubstitute("gt",   ">");
+//   addEscapeStringSubstitute("quot", "\"");
+
 	setTokenCaseSensitive(true);
 	
 	addTokenSubstitute("lg",  "<br />");
 	addTokenSubstitute("/lg", "<br />");
 }
 
+// though this might be slightly slower, possibly causing an extra bool check, this is a renderFilter
+// so speed isn't the absolute highest priority, and this is a very minor possible hit
+static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; }
+static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; }
 
 bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
   // manually process if it wasn't a simple substitution
@@ -90,14 +99,14 @@
 				if (attrib = tag.getAttribute("xlit")) {
 					val = strchr(attrib, ':');
 					val = (val) ? (val + 1) : attrib;
-					if (!u->suspendTextPassThru)
-						buf.appendFormatted(" %s", val);
+					outText(" ", buf, u);
+					outText(val, buf, u);
 				}
 				if (attrib = tag.getAttribute("gloss")) {
 					val = strchr(attrib, ':');
 					val = (val) ? (val + 1) : attrib;
-					if (!u->suspendTextPassThru)
-						buf.appendFormatted(" %s", val);
+					outText(" ", buf, u);
+					outText(val, buf, u);
 				}
 				if (attrib = tag.getAttribute("lemma")) {
 					int count = tag.getAttributePartCount("lemma", ' ');
@@ -118,11 +127,12 @@
 						//if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
 						//	show = false;
 						//else {
-							if (!u->suspendTextPassThru)
+							if (!u->suspendTextPassThru) {
 								buf.appendFormatted(" <small><em>&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\">%s</a>&gt;</em></small> ", 
 										(gh.length()) ? gh.c_str() : "", 
 										URL::encode(val2).c_str(), 
 										val2);
+							}
 						//}
 						
 					} while (++i < count);
@@ -142,19 +152,20 @@
 							const char *val2 = val;
 							if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
 								val2+=2;
-							if (!u->suspendTextPassThru)
+							if (!u->suspendTextPassThru) {
 								buf.appendFormatted(" <small><em>(<a href=\"passagestudy.jsp?action=showMorph&type=%s&value=%s\">%s</a>)</em></small> ", 
 										URL::encode(tag.getAttribute("morph")).c_str(),
 										URL::encode(val).c_str(), 
 										val2);
+							}
 						} while (++i < count);
 					//}
 				}
 				if (attrib = tag.getAttribute("POS")) {
 					val = strchr(attrib, ':');
 					val = (val) ? (val + 1) : attrib;
-					if (!u->suspendTextPassThru)
-						buf.appendFormatted(" %s", val);
+					outText(" ", buf, u);
+					outText(val, buf, u);
 				}
 
 				/*if (endTag)
@@ -203,17 +214,14 @@
 		// <p> paragraph tag
 		else if (!strcmp(tag.getName(), "p")) {
 			if ((!tag.isEndTag()) && (!tag.isEmpty())) {	// non-empty start tag
-				if (!u->suspendTextPassThru)
-					buf += "<!P><br />";
+				outText("<!P><br />", buf, u);
 			}
 			else if (tag.isEndTag()) {	// end tag
-				if (!u->suspendTextPassThru)
-					buf += "<!/P><br />";
+				outText("<!/P><br />", buf, u);
 				userData->supressAdjacentWhitespace = true;
 			}
 			else {					// empty paragraph break marker
-				if (!u->suspendTextPassThru)
-					buf += "<!P><br />";
+				outText("<!P><br />", buf, u);
 				userData->supressAdjacentWhitespace = true;
 			}
 		}
@@ -221,68 +229,65 @@
 		// <reference> tag
 		else if (!strcmp(tag.getName(), "reference")) {			
 			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				if (!u->suspendTextPassThru)
-					buf += "<a href=\"\">";
+				outText("<a href=\"\">", buf, u);
 			}
 			else if (tag.isEndTag()) {
-				if (!u->suspendTextPassThru)
-					buf += "</a>";
+				outText("</a>", buf, u);
 			}
 		}
 
 		// <l> poetry, etc
 		else if (!strcmp(tag.getName(), "l")) {
-			if (tag.isEmpty()) {
-				if (!u->suspendTextPassThru)
-					buf += "<br />";
+			// end line marker
+			if (tag.getAttribute("eID")) {
+				outText("<br />", buf, u);
 			}
+			// <l/> without eID or sID
+			// Note: this is improper osis. This should be <lb/>
+			else if (tag.isEmpty() && !tag.getAttribute("sID")) {
+				outText("<br />", buf, u);
+			}
+			// end of the line
 			else if (tag.isEndTag()) {
-				if (!u->suspendTextPassThru)
-					buf += "<br />";
+				outText("<br />", buf, u);
 			}
-			else if (tag.getAttribute("sID")) {	// empty line marker
-				if (!u->suspendTextPassThru)
-					buf += "<br />";
-			}
 		}
 
+		// <lb.../>
+		else if (!strcmp(tag.getName(), "lb")) {
+			outText("<br />", buf, u);
+			userData->supressAdjacentWhitespace = true;
+		}
 		// <milestone type="line"/>
 		else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type"))) {
 			if(!strcmp(tag.getAttribute("type"), "line")) {
-				if (!u->suspendTextPassThru)
-					buf += "<br />";
-					userData->supressAdjacentWhitespace = true;
+				outText("<br />", buf, u);
+				userData->supressAdjacentWhitespace = true;
 			}
 			else if(!strcmp(tag.getAttribute("type"),"x-p"))  {
-			//	buf += tag.getAttribute("marker");
 				if( tag.getAttribute("marker"))
-					buf += tag.getAttribute("marker");
-				else
-					buf +=  "<!p>";
+					outText(tag.getAttribute("marker"), buf, u);
+				else outText("<!p>", buf, u);
 			}
 		}
 
 		// <title>
 		else if (!strcmp(tag.getName(), "title")) {
 			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				if (!u->suspendTextPassThru)
-					buf += "<b>";
+				outText("<b>", buf, u);
 			}
 			else if (tag.isEndTag()) {
-				if (!u->suspendTextPassThru)
-					buf += "</b><br />";
+				outText("</b><br />", buf, u);
 			}
 		}
 
 		// <catchWord> & <rdg> tags (italicize)
 		else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) {
 			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				if (!u->suspendTextPassThru)
-					buf += "<i>";
+				outText("<i>", buf, u);
 			}
 			else if (tag.isEndTag()) {
-				if (!u->suspendTextPassThru)
-					buf += "</i>";
+				outText("</i>", buf, u);
 			}
 		}
 
@@ -301,11 +306,12 @@
 					char firstChar = *u->lastTextNode.c_str();
 					const char *name = u->lastTextNode.c_str();
 					++name;
-					buf += firstChar;
-					buf += "<font size=\"-1\">";
+					outText(firstChar, buf, u);
+					outText("<font size=\"-1\">", buf, u);
+					
 					for(int i=0;i<strlen(name);i++)
-						buf += toupper(name[i]);
-					buf += "</font>";
+						outText(toupper(name[i]), buf, u);
+					outText("</font>", buf, u);
 					u->inName = false;
 					u->suspendTextPassThru = false;
 				}
@@ -317,25 +323,20 @@
 			SWBuf type = tag.getAttribute("type");
 			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
 				if (type == "b" || type == "x-b") {
-					if (!u->suspendTextPassThru)
-						buf += "<b>";
+					outText("<b>", buf, u);
 					u->inBold = true;
 				}
 				else {	// all other types
-					if (!u->suspendTextPassThru)
-						buf += "<i>";
+					outText("<i>", buf, u);
 					u->inBold = false;
 				}
 			}
 			else if (tag.isEndTag()) {
 				if(u->inBold) {
-					if (!u->suspendTextPassThru)
-						buf += "</b>";
+					outText("</b>", buf, u);
 					u->inBold = false;
 				}
-				else
-					if (!u->suspendTextPassThru)
-						 buf += "</i>";
+				else outText("</i>", buf, u);
 			}
 		}
 
@@ -344,50 +345,88 @@
 			SWBuf type = tag.getAttribute("type");
 			SWBuf who = tag.getAttribute("who");
 			const char *lev = tag.getAttribute("level");
+			const char *mark = tag.getAttribute("marker");
 			int level = (lev) ? atoi(lev) : 1;
 
-			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				/*buf += "{";*/
+			// open <q> or <q sID... />
+			if ((!tag.isEmpty()) || (tag.getAttribute("sID"))) {
 
+				// Honor the marker attribute, ignoring the osisQToTick
+				u->providesQuote = false;
+				if (mark) {
+					if (*mark) {
+						outText(mark, buf, u);
+					}
+					u->quoteMark = mark;
+					u->providesQuote = true;
+				}
 				//alternate " and '
-				if (u->osisQToTick)
-					if (!u->suspendTextPassThru)
-						buf += (level % 2) ? '\"' : '\'';
+				else if (u->osisQToTick)
+					outText((level % 2) ? '\"' : '\'', buf, u);
 				
-				if (who == "Jesus") {
-					if (!u->suspendTextPassThru)
-						buf += "<font color=\"red\">";
+				if (who == "Jesus" && !u->suspendTextPassThru) {
+					outText(u->wordsOfChristStart, buf, u);
+					u->inQuote = true;
 				}
 			}
-			else if (tag.isEndTag()) {
-				//alternate " and '
-				if (u->osisQToTick)
-					if (!u->suspendTextPassThru)
-						buf += (level % 2) ? '\"' : '\'';
-				//buf += "</font>";
+			// close </q> or <q eID... />
+			else if ((tag.isEndTag()) || (tag.getAttribute("eID"))) {
+				// if we've changed font color, we should put it back
+				if (u->inQuote) {
+					outText(u->wordsOfChristEnd, buf, u);
+					u->inQuote = false;
+				}
+				// first check to see if we've been given an explicit mark
+				if (mark) {
+					if (*mark) {
+						outText(mark, buf, u);
+					}
+				}
+				// next check to see if our opening q provided an explicit mark
+				else if (u->providesQuote) {
+					if (u->quoteMark.length()) {
+						outText(u->quoteMark, buf, u);
+					}
+				}
+				// finally, alternate " and ', if config says we should supply a mark
+				else if (u->osisQToTick)
+					outText((level % 2) ? '\"' : '\'', buf, u);
 			}
-			else {	// empty quote marker
-				//alternate " and '
-				if (u->osisQToTick)
-					if (!u->suspendTextPassThru)
-						buf += (level % 2) ? '\"' : '\'';
+		}
+
+		// <milestone type="cQuote" marker="x"/>
+		else if (!strcmp(tag.getName(), "milestone") && tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "cQuote")) {
+			const char *mark = tag.getAttribute("marker");
+			const char *lev = tag.getAttribute("level");
+			int level = (lev) ? atoi(lev) : 1;
+
+			// first check to see if we've been given an explicit mark
+			if (mark) {
+				if (*mark) {
+					outText(mark, buf, u);
+				}
 			}
+			// finally, alternate " and ', if config says we should supply a mark
+			else if (u->osisQToTick)
+				outText((level % 2) ? '\"' : '\'', buf, u);
 		}
 
 		// <transChange>
 		else if (!strcmp(tag.getName(), "transChange")) {
-			SWBuf type = tag.getAttribute("type");
-
 			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				SWBuf type = tag.getAttribute("type");
+				u->lastTransChange = type;
 
-// just do all transChange tags this way for now
-//				if (type == "supplied")
-					if (!u->suspendTextPassThru)
-						buf += "<i>";
+				// just do all transChange tags this way for now
+				if ((type == "added") || (type == "supplied"))
+					outText("<i>", buf, u);
+				else if (type == "tenseChange")
+					buf += "*";
 			}
 			else if (tag.isEndTag()) {
-				if (!u->suspendTextPassThru)
-					buf += "</i>";
+				SWBuf type = u->lastTransChange;
+				if ((type == "added") || (type == "supplied"))
+					outText("</i>", buf, u);
 			}
 			else {	// empty transChange marker?
 			}
@@ -408,11 +447,9 @@
 			filepath += src;
 
 // we do this because BibleCS looks for this EXACT format for an image tag
-				if (!u->suspendTextPassThru) {
-					buf+="<image src=\"";
-					buf+=filepath;
-					buf+="\" />";
-				}
+			outText("<image src=\"", buf, u);
+			outText(filepath, buf, u);
+			outText("\" />", buf, u);
 		}
 
 		else {

Modified: trunk/src/modules/filters/osisplain.cpp
===================================================================
--- trunk/src/modules/filters/osisplain.cpp	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/src/modules/filters/osisplain.cpp	2006-07-15 20:41:24 UTC (rev 1946)
@@ -155,6 +155,12 @@
 				buf.append('\n');
 		}
 
+                // <lb .../>
+                else if (!strncmp(token, "lb", 2)) {
+			userData->supressAdjacentWhitespace = true;
+			buf.append('\n');
+		}
+
                 // <milestone type="line"/>
                 else if (!strncmp(token, "milestone", 9)) {
 			const char* type = strstr(token+10, "type=\"");

Modified: trunk/src/modules/filters/osisrtf.cpp
===================================================================
--- trunk/src/modules/filters/osisrtf.cpp	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/src/modules/filters/osisrtf.cpp	2006-07-15 20:41:24 UTC (rev 1946)
@@ -9,8 +9,7 @@
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
+ *   the Free Software Foundation version 2 of the License.                *
  *                                                                         *
  ***************************************************************************/
 
@@ -26,9 +25,10 @@
 
 
 OSISRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
-	inXRefNote = false;
-	BiblicalText = false;
-	inQuote = false;
+	inXRefNote    = false;
+	BiblicalText  = false;
+	inQuote       = false;
+	providesQuote = false;
 	if (module) {
 		version = module->Name();
 		BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
@@ -203,19 +203,23 @@
 
 		// <l> poetry
 		else if (!strcmp(tag.getName(), "l")) {
-			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				buf += "";
+			// end line marker
+			if (tag.getAttribute("eID")) {
+				buf += "{\\par}";
 			}
-			else if (tag.isEndTag()) {
+			// <l/> without eID or sID
+			// Note: this is improper osis. This should be <lb/>
+			else if (tag.isEmpty() && !tag.getAttribute("sID")) {
 				buf += "{\\par}";
 			}
-			else if (tag.getAttribute("sID")) {	// empty line marker
+			// end of the line
+			else if (tag.isEndTag()) {
 				buf += "{\\par}";
 			}
 		}
 
-		// <milestone type="line"/>
-		else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) {
+		// <milestone type="line"/> or <lb.../>
+		else if ((!strcmp(tag.getName(), "lb")) || ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line")))) {
 			buf += "{\\par}";
 			userData->supressAdjacentWhitespace = true;
 		}
@@ -259,39 +263,70 @@
 			SWBuf type = tag.getAttribute("type");
 			SWBuf who = tag.getAttribute("who");
 			const char *lev = tag.getAttribute("level");
+			const char *mark = tag.getAttribute("marker");
 			int level = (lev) ? atoi(lev) : 1;
 
-			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				buf += "{";
+			// open <q> or <q sID... />
+			if ((!tag.isEmpty()) || (tag.getAttribute("sID"))) {
 
+				// Honor the marker attribute, ignoring the osisQToTick
+				u->providesQuote = false;
+				if (mark) {
+					if (*mark) {
+						buf += mark;
+					}
+					u->quoteMark = mark;
+					u->providesQuote = true;
+				}
 				//alternate " and '
-				if (u->osisQToTick)
+				else if (u->osisQToTick)
 					buf += (level % 2) ? '\"' : '\'';
 
-				if (who == "Jesus")
+				if (who == "Jesus") {
 					buf += "\\cf6 ";
+					u->inQuote = true;
+				}
 			}
-			else if (tag.isEndTag()) {
-				//alternate " and '
-				if (u->osisQToTick)
+			// close </q> or <q eID... />
+			else if ((tag.isEndTag()) || (tag.getAttribute("eID"))) {
+				// if we've changed color, we should put it back
+				if (u->inQuote) {
+					buf += "\\cf0 ";
+					u->inQuote = false;
+				}
+				// first check to see if we've been given an explicit mark
+				if (mark) {
+					if (*mark) {
+						buf += mark;
+					}
+				}
+				// next check to see if our opening q provided an explicit mark
+				else if (u->providesQuote) {
+					if (u->quoteMark.length()) {
+						buf += u->quoteMark;
+					}
+				}
+				// finally, alternate " and ', if config says we should supply a mark
+				else if (u->osisQToTick)
 					buf += (level % 2) ? '\"' : '\'';
-				buf += "}";
 			}
-			else {	// empty quote marker
-				//alternate " and '
-				if (u->osisQToTick)
-					buf += (level % 2) ? '\"' : '\'';
-				if (!u->inQuote) {
-					if (who == "Jesus")
-						buf += "\\cf6 ";
-					u->inQuote = 1;
+		}
+
+		// <milestone type="cQuote" marker="x"/>
+		else if (!strcmp(tag.getName(), "milestone") && tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "cQuote")) {
+			const char *mark = tag.getAttribute("marker");
+			const char *lev = tag.getAttribute("level");
+			int level = (lev) ? atoi(lev) : 1;
+
+			// first check to see if we've been given an explicit mark
+			if (mark) {
+				if (*mark) {
+					buf += mark;
 				}
-				else {
-					if (who == "Jesus")
-						buf += "\\cf0 ";
-					u->inQuote = 0;
-				}
 			}
+			// finally, alternate " and ', if config says we should supply a mark
+			else if (u->osisQToTick)
+				buf += (level % 2) ? '\"' : '\'';
 		}
 
 		// <transChange>

Modified: trunk/src/modules/filters/osiswebif.cpp
===================================================================
--- trunk/src/modules/filters/osiswebif.cpp	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/src/modules/filters/osiswebif.cpp	2006-07-15 20:41:24 UTC (rev 1946)
@@ -30,6 +30,15 @@
 OSISWEBIF::OSISWEBIF() : baseURL(""), passageStudyURL(baseURL + "passagestudy.jsp"), javascript(false) {
 }
 
+
+BasicFilterUserData *OSISWEBIF::createUserData(const SWModule *module, const SWKey *key) {
+	MyUserData *u = new MyUserData(module, key);
+	u->wordsOfChristStart = "<span class=\"wordsOfJesus\"> ";
+	u->wordsOfChristEnd   = "</span> ";
+	return u;
+}
+
+
 bool OSISWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
   // manually process if it wasn't a simple substitution
 	if (!substituteToken(buf, token)) {
@@ -158,96 +167,6 @@
 			}
 		}
 
-		// <catchWord> & <rdg> tags (italicize)
-		else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) {
-			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				if (!u->suspendTextPassThru)
-					buf += "<i>";
-			}
-			else if (tag.isEndTag()) {
-				if (!u->suspendTextPassThru)
-					buf += "</i>";
-			}
-		}
-
-                // <hi> text highlighting
-		else if (!strcmp(tag.getName(), "hi")) {
-			SWBuf type = tag.getAttribute("type");
-			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				if (type == "b" || type == "x-b") {
-					if (!u->suspendTextPassThru)
-						buf += "<b>";
-					u->inBold = true;
-				}
-				else {	// all other types
-					if (!u->suspendTextPassThru)
-						buf += "<i>";
-					u->inBold = false;
-				}
-			}
-			else if (tag.isEndTag()) {
-				if(u->inBold) {
-					if (!u->suspendTextPassThru)
-						buf += "</b>";
-				}
-				else {
-					if (!u->suspendTextPassThru)
-						 buf += "</i>";
-				}
-			}
-		}
-
-		// <q> quote
-		else if (!strcmp(tag.getName(), "q")) {
-			SWBuf type = tag.getAttribute("type");
-			SWBuf who = tag.getAttribute("who");
-			const char *lev = tag.getAttribute("level");
-			int level = (lev) ? atoi(lev) : 1;
-			
-			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				/*buf += "{";*/
-
-				//alternate " and '
-				if (u->osisQToTick)
-					buf += (level % 2) ? '\"' : '\'';
-				
-				if (who == "Jesus") {
-					buf += "<span class=\"wordsOfJesus\"> ";
-				}
-			}
-			else if (tag.isEndTag()) {
-				//alternate " and '
-				if (u->osisQToTick)
-					buf += (level % 2) ? '\"' : '\'';
-				buf += "</span>";
-			}
-			else {	// empty quote marker
-				//alternate " and '
-				if (u->osisQToTick)
-					buf += (level % 2) ? '\"' : '\'';
-			}
-		}
-
-		// <transChange>
-		else if (!strcmp(tag.getName(), "transChange")) {
-			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
-				SWBuf type = tag.getAttribute("type");
-				u->lastTransChange = type;
-
-				// just do all transChange tags this way for now
-				if ((type == "added") || (type == "supplied"))
-					buf += "<i>";
-				else if (type == "tenseChange")
-					buf += "*";
-			}
-			else if (tag.isEndTag()) {
-				SWBuf type = u->lastTransChange;
-				if ((type == "added") || (type == "supplied"))
-					buf += "</i>";
-			}
-			else {	// empty transChange marker?
-			}
-		}
 		// ok to leave these in
 		else if (!strcmp(tag.getName(), "div")) {
 			buf += tag;
@@ -258,6 +177,13 @@
 		else if (!strcmp(tag.getName(), "br")) {
 			buf += tag;
 		}
+
+		// handled appropriately in base class
+		// <catchWord> & <rdg> tags (italicize)
+		// <hi> text highlighting
+		// <q> quote
+		// <milestone type="cQuote" marker="x"/>
+		// <transChange>
 		else {
 			return OSISHTMLHREF::handleToken(buf, token, userData);
 		}

Modified: trunk/src/utilfuns/utilxml.cpp
===================================================================
--- trunk/src/utilfuns/utilxml.cpp	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/src/utilfuns/utilxml.cpp	2006-07-15 20:41:24 UTC (rev 1946)
@@ -41,7 +41,10 @@
 			for (; ((buf[i]) && (!isalpha(buf[i]))); i++);
 			if (buf[i]) {		// we have an attribute name
 				start = i;
+				// Deprecated: check for following whitespacee
+				// Should be: for (; (buf[i] && buf[i] != '='; i++);
 				for (; ((buf[i]) && (!strchr(" =", buf[i]))); i++);
+
 				if (i-start) {
 					if (name)
 						delete [] name;
@@ -49,23 +52,47 @@
 					strncpy(name, buf+start, i-start);
 					name[i-start] = 0;
 				}
-				for (; ((buf[i]) && (strchr(" =\"\'", buf[i]))); i++);
+
+				// The following does not allow for empty attributes
+				//for (; ((buf[i]) && (strchr(" =\"\'", buf[i]))); i++);
+
+				// skip space preceding the = sign
+				// Deprecated: this is not part of the xml spec
+				for (; buf[i] == ' '; i++) ;
+
+				// skip the = sign
+				i++;
+
+				// skip space following the = sign
+				// Deprecated: this is not part of the xml spec
+				for (; buf[i] == ' '; i++) ;
+
+				// remember and skip the quote sign
+				char quoteChar = buf[i];
+				i++;
+
 				if (buf[i]) {	// we have attribute value
 					start = i;
-					for (; ((buf[i]) && (buf[i] != '\"') && (buf[i] != '\'')); i++);
-					if (i-start) {
+					// Skip until matching quote character
+					for (; ((buf[i]) && (buf[i] != quoteChar)); i++);
+
+					// Allow for empty quotes
+					//if (i-start) {
 						if (value)
 							delete [] value;
 						value = new char [ (i-start) + 1 ];
-						strncpy(value, buf+start, i-start);
+						if (i-start) {
+							strncpy(value, buf+start, i-start);
+						}
 						value[i-start] = 0;
 						attributes[name] = value;
-					}
+					//}
 				}
 			}
 		}
-		if (!buf[i])
-			break;
+		if (buf[i])
+			buf[i] = ' ';
+		else break;
 	}
 	for (;i;i--) {
 		if (buf[i] == '/')

Modified: trunk/utilities/diatheke/osiscgi.cpp
===================================================================
--- trunk/utilities/diatheke/osiscgi.cpp	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/utilities/diatheke/osiscgi.cpp	2006-07-15 20:41:24 UTC (rev 1946)
@@ -226,8 +226,8 @@
 			}
 		}
 
-		// <milestone type="line"/>
-		else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) {
+		// <milestone type="line"/> or <lb.../>
+		else if ((!strcmp(tag.getName(), "lb")) || ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line")))) {
 			buf += "<br />";
 			userData->supressAdjacentWhitespace = true;
 		}

Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp	2006-07-10 20:24:28 UTC (rev 1945)
+++ trunk/utilities/osis2mod.cpp	2006-07-15 20:41:24 UTC (rev 1946)
@@ -32,6 +32,7 @@
 
 SWText *module = 0;
 VerseKey *currentVerse = 0;
+char activeOsisID[255];
 
 
 // remove subverse elements from osisIDs
@@ -58,7 +59,7 @@
 	test = buf;
 
 	if (vk.Testament() && vk.Book() && vk.Chapter() && vk.Verse()) { // if we're not a heading
-//		std::cerr << (const char*)vk << " == "  << (const char*)test << std::endl;
+//		cout << (const char*)vk << " == "  << (const char*)test << endl;
 		return (vk == test);
 	}
 	else return true;	// no check if we're a heading... Probably bad.
@@ -79,28 +80,57 @@
 }
 
 
-void writeEntry(VerseKey &key, SWBuf &text, bool suppressOutput = false) {
-//	cout << "Verse: " << key << "\n";
-//	cout << "TEXT: " << text << "\n\n";
+void writeEntry(VerseKey &key, SWBuf &text, bool force = false) {
+	static SWBuf activeVerseText;
+        char keyOsisID[255];
+	strcpy(keyOsisID, key.getOSISRef());
+
+	// set keyOsisID to anything that an osisID cannot be.
+	if (force) {
+		strcpy(keyOsisID, "-force");
+	}
+
+	static VerseKey lastKey;
+	lastKey.AutoNormalize(0);
+	lastKey.Headings(1);
+
 	VerseKey saveKey;
 	saveKey.AutoNormalize(0);
 	saveKey.Headings(1);
 	saveKey = key;
 
-	if (!isKJVRef(key)) {
-		makeKJVRef(key);
-	}
+	// If we have seen a verse and the supplied one is different then we output the collected one.
+	if (*activeOsisID && strcmp(activeOsisID, keyOsisID)) {
 
-	SWBuf currentText = module->getRawEntry();
-	if (currentText.length()) {
-		if (!suppressOutput) {
-			cout << "Appending entry: " << key << endl;
+		key = lastKey;
+
+		if (!isKJVRef(key)) {
+			makeKJVRef(key);
 		}
-		text = currentText + " " + text;
+
+		SWBuf currentText = module->getRawEntry();
+		if (currentText.length()) {
+			cout << "Appending entry: " << key.getOSISRef() << ": " << activeVerseText << endl;
+			activeVerseText = currentText + " " + activeVerseText;
+		}
+
+//		cout << "Write: " << activeOsisID << ":" << key.getOSISRef() << ": " << activeVerseText << endl;
+
+		module->setEntry(activeVerseText);
+		activeVerseText = "";
 	}
-	module->setEntry(text);
 
+	if (activeVerseText.length()) {
+		activeVerseText += " ";
+		activeVerseText += text;
+	}
+	else {
+		activeVerseText = text;
+	}
+
 	key = saveKey;
+	lastKey = key;
+	strcpy(activeOsisID, keyOsisID);
 }
 
 
@@ -138,7 +168,7 @@
 	static bool inBook = false;
 	static bool inChapter = false;
 	static bool inVerse = true;
-	
+
 	static SWBuf header = "";
 
 	// Used to remember titles that need to be handle specially
@@ -175,6 +205,7 @@
 		lastTitle = "";
 		inTitle = true;
 		tagStack.push(token);
+//		cout << "push " << token->getName() << endl;
 		titleDepth = tagStack.size();
 		return false; 
 	}
@@ -183,10 +214,12 @@
 		lastTitle.append(text.c_str() + titleOffset); //<title ...> up to the end </title>
 		lastTitle.append(*token); //</title>
 
-// 		printf("lastTitle:	%s\n", lastTitle.c_str());
-// 		printf("text-lastTitle:	%s\n", text.c_str()+titleOffset);
+// 		cout << "lastTitle:      " << lastTitle.c_str() << endl;
+// 		cout << "text-lastTitle: " << text.c_str()+titleOffset << endl;
+//		cout << "text:           " << text.c_str() << endl;
 		inTitle = false;
 		titleDepth = 0;
+//		cout << "pop " << tagStack.top()->getName() << endl;
 		tagStack.pop();
 		return false; // don't add </title> to the text itself
 	}
@@ -200,11 +233,12 @@
 		// Remember non-empty start tags
 		if (!token->isEmpty()) {
 			tagStack.push(token);
+//			cout << "push " << token->getName() << endl;
 		}
 
 		//-- WITH OSIS ID -------------------------------------------------------------------------
 		if (token->getAttribute("osisID")) {
-	
+
 			// BOOK START
 			if ((!strcmp(tokenName, "div")) && (!strcmp(typeAttr, "book"))) {
 				inVerse = false;
@@ -248,7 +282,7 @@
 				text = "";
 				chapterDepth = tagStack.size();
 				verseDepth = 0;
-				
+
 				return true;
 			}
 
@@ -256,29 +290,24 @@
 			else if (!strcmp(tokenName, "verse")) {
 				inVerse = true;
 				if (inChapterHeader) {
+					SWBuf heading = text;
+
 					//make sure we don't insert the preverse title which belongs to the first verse of this chapter!
 					// Did we have a preverse title?
 					if (lastTitle.length())
 					{
 						//Was the preVerse title in the header (error if not)?
-						const char* header = text.c_str();
+						const char* header = heading.c_str();
 						const char* preVerse = strstr(header, lastTitle);
 						if (preVerse) {
 							if (preVerse == header) {
-								; // do nothing
+								heading = ""; // do nothing
 							}
 							else {
-								int preVerseLen = strlen(preVerse);
-								int headerLen = strlen(header);
-								// Was it the last thing?
-								if (header == preVerseLen + preVerse - headerLen) {
-									// Remove it from the end of the header.
-									text.setSize(headerLen - preVerseLen);
-								}
-								// It was not the last thing so it cannot be preVerse
-								else {
-									lastTitle = "";
-								}
+								// remove everything before the title from the beginning.
+								text = preVerse;
+								// Remove text from the end of the header.
+								heading.setSize(preVerse - header);
 							}
 						}
 						else {
@@ -286,10 +315,11 @@
 						}
 					}
 
-//					cout << "CHAPTER HEADING "<< text.c_str() << endl;
-					writeEntry(*currentVerse, text);
-			
-					text = "";
+					if (heading.length()) {
+//						cout << "CHAPTER HEADING "<< heading.c_str() << endl;
+						writeEntry(*currentVerse, heading);
+					}
+
 					inChapterHeader = false;
 				}
 
@@ -318,6 +348,26 @@
 				return true;
 			}
 		}
+		// Handle stuff between the verses
+		// Whitespace producing empty tokens are appended to prior entry
+		// Also the quote
+		// This is a hack to get ESV to work
+		else if (!inTitle && !inVerse && token->isEmpty()) { // && !inBookHeader && !inChapterHeader) {
+			if (!strcmp(tokenName, "p") ||
+					!strcmp(tokenName, "div") ||
+					!strcmp(tokenName, "q")  ||
+					!strcmp(tokenName, "l") ||
+					!strcmp(tokenName, "lb") ||
+					!strcmp(tokenName, "lg")
+					) {
+//					if (token) {
+//						cout << "start token " << *token << ":" << text.c_str() << endl;
+//					}
+				SWBuf tmp = token->toString();
+				writeEntry(*currentVerse, tmp);
+				return true;
+			}
+		}
 	}
 
 //-- END TAG ---------------------------------------------------------------------------------------------
@@ -329,15 +379,19 @@
 			exit(1);
 		}
 
-		XMLTag *topToken = tagStack.top();
-		tagDepth = tagStack.size();
-		tagStack.pop();
+		XMLTag *topToken = 0;
+		if (!token->isEmpty()) {
+			topToken = tagStack.top();
+			tagDepth = tagStack.size();
+//			cout << "pop " << topToken->getName() << endl;
+			tagStack.pop();
 
-		if (strcmp(topToken->getName(), tokenName)) {
-			cout << "Expected " << topToken->getName() << " found " << tokenName << endl;
-			exit(1);
+			if (strcmp(topToken->getName(), tokenName)) {
+				cout << "Error: " << *currentVerse << ": Expected " << topToken->getName() << " found " << tokenName << endl;
+				exit(1);
+			}
 		}
-		
+
 		// VERSE END
 		if (!strcmp(tokenName, "verse")) {
 			inVerse = false;
@@ -349,11 +403,11 @@
 			if (lastTitle.length()) {
 				const char* end = strchr(lastTitle, '>');
 //				cout << lastTitle << endl;
-//	 			printf("length=%d, tag; %s\n", end+1 - lastTitle.c_str(), lastTitle.c_str());
+//	 			cout << "length=" << int(end+1 - lastTitle.c_str()) << ", tag:" << lastTitle.c_str() << endl;
 
 				SWBuf titleTagText;
 				titleTagText.append(lastTitle.c_str(), end+1 - lastTitle.c_str());
-//				printf("tagText: %s\n", titleTagText.c_str());
+//				cout << "tagText: " << titleTagText.c_str() << endl;;
 
 				XMLTag titleTag(titleTagText);
 				titleTag.setAttribute("type", "section");
@@ -367,7 +421,7 @@
 					temp.append(pos+lastTitle.length());
 					text = temp;
 				}
-			
+
 				//if a title was already inserted at the beginning insert this one after that first title
 				int titlePos = 0;
 				if (!strncmp(text.c_str(),"<title ",7)) {
@@ -402,7 +456,8 @@
 			verseDepth = 0;
 			return true;
 		}
-		else if (!inVerse) {
+		else if (!inTitle && !inVerse && !inBookHeader && !inChapterHeader) {
+//			cout << "End tag not in verse: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
 			// Is this the end of a chapter.
 			if (tagDepth == chapterDepth && (!strcmp(tokenName, "div") || !strcmp(tokenName, "chapter"))) {
 				chapterDepth = 0;
@@ -412,6 +467,7 @@
 			}
 			// Or is it the end of a book
 			else if (tagDepth == bookDepth && (!strcmp(tokenName, "div"))) {
+//				cout << "Saw an end div: " << *topToken << endl;
 				bookDepth = 0;
 				chapterDepth = 0;
 				verseDepth = 0;
@@ -421,13 +477,17 @@
 			// OTHER MISC END TAGS WHEN !INVERSE
 			// Test that is between verses, or after the last is appended to the preceeding verse.
 			else if (!strcmp(tokenName, "p") ||
-					 !strcmp(tokenName, "div") ||
-					 !strcmp(tokenName, "q")  ||
-					 !strcmp(tokenName, "l") ||
-					 !strcmp(tokenName, "lg")
-					  ) {
+					!strcmp(tokenName, "div") ||
+					!strcmp(tokenName, "q")  ||
+					!strcmp(tokenName, "l") ||
+					!strcmp(tokenName, "lb") ||
+					!strcmp(tokenName, "lg")
+					) {
+//				if (topToken) {
+//					cout << "start token " << *topToken << endl;
+//				}
 				text.append(*token);
-				writeEntry(*currentVerse, text, true);
+				writeEntry(*currentVerse, text);
 				text = "";
 				return true;
 			}
@@ -436,6 +496,50 @@
 	return false;
 }
 
+XMLTag* transform(XMLTag* t) {
+	static std::stack<XMLTag*> tagStack;
+	static int sID = 1;
+	char buf[11];
+
+	// Support simplification transformations
+	if (!t->isEmpty()) {
+		if (!t->isEndTag()) {
+			tagStack.push(t);
+			// Transform <q> into <q sID=""/> except for <q who="Jesus">
+			if ((!strcmp(t->getName(), "q")) && (!t->getAttribute("who") || strcmp(t->getAttribute("who"), "Jesus"))) {
+				t->setEmpty(true);
+				sprintf(buf, "q%d", sID++);
+				t->setAttribute("sID", buf);
+			}
+
+			// Transform <p> into <lb type="x-begin-paragraph"/>
+			else if (!strcmp(t->getName(), "p")) {
+				// note there is no process that should care about type, it is there for reversability
+				t->setText("<lb type=\"x-begin-paragraph\" />");
+			}
+		}
+		else {
+			XMLTag *topToken = tagStack.top();
+			tagStack.pop();
+
+			// If we have found an end tag for a <q> that was transformed then transform this one as well.
+			if ((!strcmp(t->getName(), "q")) && (!strcmp(topToken->getName(), "q")) && (!topToken->getAttribute("who") || strcmp(topToken->getAttribute("who"), "Jesus"))) {
+				// make this a clone of the start tag with sID changed to eID
+				*t = *topToken;
+				t->setAttribute("eID", t->getAttribute("sID"));
+				t->setAttribute("sID", 0);
+			}
+
+			// Look for paragraph tags.
+			// If we have found an end tag for a <p> that was transformed then transform this as well.
+			else if ((!strcmp(t->getName(), "p")) && (!strcmp(topToken->getName(), "lb"))) {
+				t->setText("<lb type=\"x-end-paragraph\" />");
+			}
+		}
+	}
+	return t;
+}
+
 int main(int argc, char **argv) {
 
 	// Let's test our command line arguments
@@ -454,8 +558,8 @@
 	string cipherKey = "";
 	SWCompress *compressor = 0;
 // 	SWModule *outModule    = 0;
-	
 
+
 	if (argc > 4) {
 		compType = atoi(argv[4]);
 		if (argc > 5) {
@@ -472,8 +576,8 @@
 		case 2: compressor = new ZipCompress(); break;
 	}
 
-//	cerr << "path: " << argv[1] << " osisDoc: " << argv[2] << " create: " << argv[3] << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << "\n";
-//	cerr << "";
+//	cout << "path: " << argv[1] << " osisDoc: " << argv[2] << " create: " << argv[3] << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << "\n";
+//	cout << "";
 //	exit(-3);
 
 
@@ -498,7 +602,7 @@
 		fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]);
 		exit(-2);
 	}
-	
+
 	// Do some initialization stuff
 	SWBuf buffer;
 
@@ -521,8 +625,8 @@
 		fprintf(stderr, "The module is not writable. Writing text to it will not work.\nExiting.\n" );
 		exit(-1);
 	}
-	
 
+	activeOsisID[0] = '\0';
 
 	currentVerse = new VerseKey();
 	currentVerse->AutoNormalize(0);
@@ -554,8 +658,9 @@
 				if ((isalpha(token[1])) || (isalpha(token[2]))) {
 					//cout << "Handle:" << token.c_str() << endl;
 					XMLTag *t = new XMLTag(token.c_str());
-					if (!handleToken(text, t)) {
-						text.append(token);
+
+					if (!handleToken(text, transform(t))) {
+						text.append(*t);
 					}
 				}
 				continue;
@@ -566,10 +671,14 @@
 			else	
 				text.append(*from);
 		}
-		
+
 		if (intoken)
 			token.append("\n");
 	}
+
+	// Force the last entry from the buffer.
+	text = "";
+	writeEntry(*currentVerse, text, true);
 	delete module;
 	delete currentVerse;
 	if (cipherFilter)