[sword-svn] r2435 - trunk/utilities

Sat Jun 6 07:32:14 MST 2009

Author: dmsmith
Date: 2009-06-06 07:32:13 -0700 (Sat, 06 Jun 2009)
New Revision: 2435

Modified:
   trunk/utilities/osis2mod.cpp
Log:
Made title without a type attribute, or with type="sub" be ignored in determining pre-verse boundary.
Added more DEBUG(TITLE) diagnostics.
Fixed an improper usage of SWBuf, where it was being checked as if it were a char*.
Changed strcmp calls to == when first argument was an SWBuf.


Modified: trunk/utilities/osis2mod.cpp
===================================================================

--- trunk/utilities/osis2mod.cpp	2009-06-04 09:59:29 UTC (rev 2434)
+++ trunk/utilities/osis2mod.cpp	2009-06-06 14:32:13 UTC (rev 2435)
@@ -572,11 +572,11 @@
 //		false if the what has been seen is to be accumulated and considered later.
 bool handleToken(SWBuf &text, XMLTag token) {
 
-	// Everything between the begin book tag and the first begin chapter tag is inBookHeader
-	static bool               inBookHeader    = false;
+	// Everything between the begin book tag and the first begin chapter tag is inBookIntro
+	static bool               inBookIntro     = false;
 
-	// Everything between the begin chapter tag and the first begin verse tag is inChapterHeader
-	static bool               inChapterHeader = false;
+	// Everything between the begin chapter tag and the first begin verse tag is inChapterIntro
+	static bool               inChapterIntro  = false;
 
 	// Flags indicating whether we are processing the content of a chapter
 	static bool               inChapter       = false;
@@ -637,7 +637,7 @@
 
 		// throw away everything up to the first div
 		if (!firstDiv) {
-			if (!strcmp(tokenName, "div")) {
+			if (tokenName == "div") {
 				if (debug & DEBUG_OTHER) {
 					cout << "DEBUG(FOUND): Found first div and pitching prior material: " << text << endl;
 				}
@@ -657,13 +657,13 @@
 		if (token.getAttribute("osisID") || token.getAttribute("annotateRef")) {
 
 			// BOOK START, <div type="book" ...>
-			if ((!strcmp(tokenName, "div")) && (typeAttr && !strcmp(typeAttr, "book"))) {
-				if (inBookHeader || inChapterHeader) {	// this one should never happen, but just in case
+			if (tokenName == "div" && typeAttr == "book") {
+				if (inBookIntro || inChapterIntro) {	// this one should never happen, but just in case
 
 					if (debug & DEBUG_TITLE) {
-						cout << "DEBUG(TITLE): " << currentOsisID << ": OOPS HEADING " << endl;
-						cout << "\tinChapterHeader = " << inChapterHeader << endl;
-						cout << "\tinBookHeader = " << inBookHeader << endl;
+						cout << "DEBUG(TITLE): " << currentOsisID << ": OOPS INTRO " << endl;
+						cout << "\tinChapterIntro = " << inChapterIntro << endl;
+						cout << "\tinBookIntro = " << inBookIntro << endl;
 					}
 
 					currentVerse.Testament(0);
@@ -681,9 +681,13 @@
 				inChapter       = false;
 				inVerse         = false;
 				inPreVerse      = false;
-				inBookHeader    = true;
-				inChapterHeader = false;
+				inBookIntro     = true;
+				inChapterIntro  = false;
 
+				if (debug & DEBUG_TITLE) {
+					cout << "DEBUG(TITLE): " << currentOsisID << ": Looking for book introduction" << endl;
+				}
+
 				bookDepth       = tagStack.size();
 				chapterDepth    = 0;
 				verseDepth      = 0;
@@ -699,13 +703,13 @@
 				return false;
 			}
 
-			// CHAPTER START, <div type="chapter" ...> or <chapter ...>
-			if (((!strcmp(tokenName, "div")) && (typeAttr && !strcmp(typeAttr, "chapter"))) ||
-			     (!strcmp(tokenName, "chapter"))
-			   ) {
-				if (inBookHeader) {
+			// CHAPTER START, <chapter> or <div type="chapter" ...>
+			if ((tokenName == "chapter") ||
+			    (tokenName == "div" && typeAttr == "chapter")
+			) {
+				if (inBookIntro) {
 					if (debug & DEBUG_TITLE) {
-						cout << "DEBUG(TITLE): " << currentOsisID << ": BOOK HEADING "<< text.c_str() << endl;
+						cout << "DEBUG(TITLE): " << currentOsisID << ": BOOK INTRO "<< text << endl;
 					}
 
 					writeEntry(text);
@@ -724,9 +728,13 @@
 				inChapter       = true;
 				inVerse         = false;
 				inPreVerse      = false;
-				inBookHeader    = false;
-				inChapterHeader = true;
+				inBookIntro     = false;
+				inChapterIntro  = true;
 
+				if (debug & DEBUG_TITLE) {
+					cout << "DEBUG(TITLE): " << currentOsisID << ": Looking for chapter introduction" << endl;
+				}
+
 				chapterDepth    = tagStack.size();
 				verseDepth      = 0;
 
@@ -734,25 +742,25 @@
 			}
 
 			// VERSE, <verse ...> OR COMMENTARY START, <div annotateType="xxx" ...>
-			if (!strcmp(tokenName, "verse") ||
-			   (!strcmp(tokenName, "div") && token.getAttribute("annotateType"))) {
+			if ((tokenName == "verse") ||
+			    (tokenName == "div" && token.getAttribute("annotateType"))
+			) {
 				if (debug & DEBUG_OTHER) {
 					cout << "DEBUG(FOUND): Entering verse" << endl;
 				}
 
-				if (inChapterHeader) {
-					SWBuf heading = text;
-					text = "";
+				if (inChapterIntro) {
+					if (debug & DEBUG_TITLE) {
+						cout << "DEBUG(TITLE): " << currentOsisID << ": Done looking for chapter introduction" << endl;
+					}
 
-					if (heading.length()) {
+					if (text.length()) {
 						if (debug & DEBUG_TITLE) {
-							cout << "DEBUG(TITLE): " << currentOsisID << ": CHAPTER HEADING "<< heading.c_str() << endl;
+							cout << "DEBUG(TITLE): " << currentOsisID << ": CHAPTER INTRO "<< text << endl;
 						}
 
-						writeEntry(heading);
+						writeEntry(text);
 					}
-
-					inChapterHeader = false;
 				}
 
 				// Did we have pre-verse material that needs to be marked?
@@ -763,7 +771,7 @@
 				}
 
 				// Get osisID for verse or annotateRef for commentary
-				SWBuf keyVal = token.getAttribute(strcmp(tokenName, "verse") ? "annotateRef" : "osisID");
+				SWBuf keyVal = token.getAttribute(tokenName == "verse" ? "osisID" : "annotateRef");
 
 				// Massage the key into a form that ParseVerseList can accept
 				prepareSWVerseKey(keyVal);
@@ -788,7 +796,7 @@
 					}
 				}
 				else {
-					cout << "ERROR(REF): Invalid osisID/annotateRef: " << token.getAttribute(strcmp(tokenName, "verse") ? "annotateRef" : "osisID") << endl;
+					cout << "ERROR(REF): Invalid osisID/annotateRef: " << token.getAttribute((tokenName == "verse") ? "osisID" : "annotateRef") << endl;
 				}
 
 				strcpy(currentOsisID, currentVerse.getOSISRef());
@@ -801,12 +809,12 @@
 				sidVerse        = token.getAttribute("sID");
 				inVerse         = true;
 				inPreVerse      = false;
-				inBookHeader    = false;
-				inChapterHeader = false;
+				inBookIntro     = false;
+				inChapterIntro  = false;
 				verseDepth      = tagStack.size();
 
 				// Include the token if it is not a verse
-				if (strcmp(tokenName, "verse")) {
+				if (tokenName != "verse") {
 					text.append(token);
 				}
 				else if (debug & DEBUG_VERSE)
@@ -834,7 +842,7 @@
 		// Handle WOC quotes.
 		// Note this requires transformBSP to make them into milestones
 		// Otherwise have to do it here
-		if (!strcmp(tokenName, "q")) {
+		if (tokenName == "q") {
 			quoteStack.push(token);
 
 			if (debug & DEBUG_QUOTE) {
@@ -866,30 +874,35 @@
 		// 1) Between the opening of a book and the first chapter, all the material is handled as an introduction to the book.
 		// 2) Between the opening of a chapter and the first verse, the material is split between the introduction of the chapter
 		//    and the first verse of the chapter.
-		//    A <div> with a type other than section will be taken as a chapter introduction.
-		//    A <title> of type acrostic, psalm or no type, will be taken as a title for the verse.
-		//    A <title> of type main or chapter will be seen as a chapter title.
+		//    A <div> with a type of section will be taken as surrounding verses.
+		//    A <title> of type other than main, chapter or sub, will be taken as a title for the verse.
+		//    Once one of these conditions is met, the division between chapter introduction and pre-verse is set.
 		// 3) Between verses, the material is split between the prior verse and the next verse.
 		//    Basically, while end and empty tags are found, they belong to the prior verse.
 		//    Once a begin tag is found, it belongs to the next verse.
-		// If the title has an attribute type of "main" or "chapter"
-		// it belongs to its <div> or <chapter> and is treated as part of its heading
-		// Otherwise if it a title in a chapter before the first the first verse it
-		// is put into the verse as a preverse title.
-
-		if (!inPreVerse && !inBookHeader) {
-			if (inChapterHeader) {
+		if (!inPreVerse && !inBookIntro) {
+			if (inChapterIntro) {
 				// Determine when we are no longer in a chapter heading, but in pre-verse material:
 				// If we see one of the following:
 				// 	a section div
-				// 	a title that is not main or chapter
-				if ((!strcmp(tokenName, "div") && (typeAttr && !strcmp(typeAttr, "section"))) ||
-				    (!strcmp(tokenName, "title") && (!typeAttr || (strcmp(typeAttr, "main") && strcmp(typeAttr, "chapter"))))
-				   ) {
-					// Since we have found the boundary, we need to write out the chapter heading
-					writeEntry(text);
+				// 	a title that is not main, chapter or sub or unclassified (no type attribute)
+				if ((tokenName == "div" && typeAttr == "section") ||
+				    (tokenName == "title" && typeAttr.length() != 0 && typeAttr != "main" && typeAttr != "chapter" && typeAttr != "sub")
+				) {
+					if (debug & DEBUG_TITLE) {
+						cout << "DEBUG(TITLE): " << currentOsisID << ": Done looking for chapter introduction" << endl;
+					}
+
+					if (text.length()) {
+						if (debug & DEBUG_TITLE) {
+							cout << "DEBUG(TITLE): " << currentOsisID << ": CHAPTER INTRO "<< text << endl;
+						}
+
+						// Since we have found the boundary, we need to write out the chapter heading
+						writeEntry(text);
+					}
 					// And we are no longer in the chapter heading
-					inChapterHeader = false;
+					inChapterIntro  = false;
 					// But rather, we are now in pre-verse material
 					inPreVerse      = true;
 				}
@@ -906,7 +919,7 @@
 		}
 
 		if (debug & DEBUG_INTERVERSE) {
-			if (!inVerse && !inBookHeader && !inChapterHeader) {
+			if (!inVerse && !inBookIntro && !inChapterIntro) {
 				cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse start token " << token << ":" << text.c_str() << endl;
 			}
 		}
@@ -933,7 +946,7 @@
 
 			tagStack.pop();
 
-			if (strcmp(topToken.getName(), tokenName)) {
+			if (tokenName != topToken.getName()) {
 				cout << "FATAL(NESTING): " << currentOsisID << ": Expected " << topToken.getName() << " found " << tokenName << endl;
 //				exit(EXIT_BAD_NESTING);	// (OSK) I'm sure this validity check is a good idea, but there's a but somewhere that's killing the converter here.
 						// So I'm disabling this line. Unvalidated OSIS files shouldn't be run through the converter anyway.
@@ -948,7 +961,9 @@
 		}
 
 		// VERSE and COMMENTARY END
-		if ((!strcmp(tokenName, "verse")) || ((!strcmp(tokenName, "div")) && (eidAttr == sidVerse))) {
+		if ((tokenName == "verse") ||
+		    (tokenName == "div" && eidAttr == sidVerse)
+		) {
 
 			if (tagDepth != verseDepth) {
 				cout << "WARNING(NESTING): verse " << currentOsisID << " is not well formed:(" << verseDepth << "," << tagDepth << ")" << endl;
@@ -961,7 +976,7 @@
 
 
 			// Include the token if it is not a verse
-			if (strcmp(tokenName, "verse")) {
+			if (tokenName != "verse") {
 				text.append(token);
 			}
 			else if (debug & DEBUG_VERSE)
@@ -989,7 +1004,7 @@
 		// Handle WOC quotes.
 		// Note this requires transformBSP to make them into milestones
 		// Otherwise have to manage it here
-		if (!strcmp(tokenName, "q")) {
+		if (tokenName == "q") {
 			XMLTag topToken = quoteStack.top();
 
 			if (debug & DEBUG_QUOTE) {
@@ -1039,9 +1054,11 @@
 
 		// Look for the end of document, book and chapter
 		// Also for material that goes with last entry
-		if (!inVerse && !inBookHeader && !inChapterHeader) {
+		if (!inVerse && !inBookIntro && !inChapterIntro) {
 			// Is this the end of a chapter.
-			if (((!strcmp(tokenName, "div")) && (eidAttr == sidChapter)) || (!strcmp(tokenName, "chapter"))) {
+			if ((tokenName == "chapter") ||
+			    (tokenName == "div" && eidAttr == sidChapter)
+			) {
 				text.append(token);
 				writeEntry(text);
 				inChapter    = false;
@@ -1052,7 +1069,7 @@
 			}
 
 			// Is it the end of a book
-			if ((!strcmp(tokenName, "div")) && (eidAttr == sidBook)) {
+			if (tokenName == "div" && eidAttr == sidBook) {
 				text.append(token);
 				writeEntry(text);
 				bookDepth    = 0;
@@ -1062,7 +1079,7 @@
 			}
 
 			// Do not include the end of an osis document
-			if (!strcmp(tokenName, "osisText") || !strcmp(tokenName, "osis")) {
+			if (tokenName == "osisText" || tokenName == "osis") {
 				bookDepth    = 0;
 				chapterDepth = 0;
 				verseDepth   = 0;
@@ -1122,10 +1139,10 @@
 		return t;
 	}
 
-	const char* tagName = t.getName();
+	SWBuf tagName = t.getName();
 	if (!t.isEndTag()) {
 		// Transform <p> into <div type="paragraph"> and milestone it
-		if (!strcmp(tagName, "p")) {
+		if (tagName == "p") {
 			t.setText("<div type=\"paragraph\" />");
 			sprintf(buf, "gen%d", sID++);
 			t.setAttribute("sID", buf);
@@ -1138,17 +1155,17 @@
 		//   abbr	When would this ever cross a boundary?
 		//   seg	as it is used for a divineName hack
 		//   foreign	so that it can be easily italicized
-		else if (!strcmp(tagName, "chapter") ||
-			 !strcmp(tagName, "closer")  ||
-			 !strcmp(tagName, "div")     ||
-			 !strcmp(tagName, "l")       ||
-			 !strcmp(tagName, "lg")      ||
-			 !strcmp(tagName, "q")       ||
-			 !strcmp(tagName, "salute")  ||
-			 !strcmp(tagName, "signed")  ||
-			 !strcmp(tagName, "speech")  ||
-			 !strcmp(tagName, "verse")
-			) {
+		else if (tagName == "chapter" ||
+			 tagName == "closer"  ||
+			 tagName == "div"     ||
+			 tagName == "l"       ||
+			 tagName == "lg"      ||
+			 tagName == "q"       ||
+			 tagName == "salute"  ||
+			 tagName == "signed"  ||
+			 tagName == "speech"  ||
+			 tagName == "verse"
+		) {
 			t.setEmpty(true);
 			sprintf(buf, "gen%d", sID++);
 			t.setAttribute("sID", buf);
@@ -1171,18 +1188,18 @@
 		bspTagStack.pop();
 
 		// Look for the milestoneable container tags handled above.
-		if (!strcmp(tagName, "chapter") ||
-			 !strcmp(tagName, "closer")  ||
-			 !strcmp(tagName, "div")     ||
-			 !strcmp(tagName, "l")       ||
-			 !strcmp(tagName, "lg")      ||
-			 !strcmp(tagName, "p")       ||
-			 !strcmp(tagName, "q")       ||
-			 !strcmp(tagName, "salute")  ||
-			 !strcmp(tagName, "signed")  ||
-			 !strcmp(tagName, "speech")  ||
-			 !strcmp(tagName, "verse")
-			) {
+		if (tagName == "chapter" ||
+		    tagName == "closer"  ||
+		    tagName == "div"     ||
+		    tagName == "l"       ||
+		    tagName == "lg"      ||
+		    tagName == "p"       ||
+		    tagName == "q"       ||
+		    tagName == "salute"  ||
+		    tagName == "signed"  ||
+		    tagName == "speech"  ||
+		    tagName == "verse"
+		) {
 			// make this a clone of the start tag with sID changed to eID
 			// Note: in the case of </p> the topToken is a <div type="paragraph">
 			t = topToken;