[sword-svn] r2036 - trunk/utilities

chrislit at www.crosswire.org chrislit at www.crosswire.org
Tue Apr 24 10:37:39 MST 2007


Author: chrislit
Date: 2007-04-24 10:37:38 -0700 (Tue, 24 Apr 2007)
New Revision: 2036

Modified:
   trunk/utilities/osis2mod.cpp
Log:
added ignoring of unknown books
added hack (disabled an exit call) that was quitting during conversion of valid books

Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp	2007-03-22 21:38:31 UTC (rev 2035)
+++ trunk/utilities/osis2mod.cpp	2007-04-24 17:37:38 UTC (rev 2036)
@@ -23,6 +23,7 @@
 #include <zipcomprs.h>
 #include <cipherfil.h>
 
+//#define DEBUG
 
 #ifndef NO_SWORD_NAMESPACE
 using namespace sword;
@@ -33,8 +34,30 @@
 SWText *module = 0;
 VerseKey *currentVerse = 0;
 char activeOsisID[255];
+char *osisabbrevs[] = {"Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg",
+        "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", "2Chr", "Ezra", "Neh",
+        "Esth", "Job", "Ps", "Prov", "Eccl", "Song", "Isa", "Jer", "Lam", "Ezek",
+        "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", "Nah", "Hab",
+        "Zeph", "Hag", "Zech", "Mal",
 
+        "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor", "Gal",
+        "Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Titus",
+        "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", "3John",
+        "Jude", "Rev"};
 
+static bool inCanonicalOSISBook = true; // osisID is for a book that is not in Sword's canon
+
+bool isOSISAbbrev(const char *buf) {
+        bool match = false;
+        for (int i = 0; i < 66; i++) {
+                if (!strcmp(buf, osisabbrevs[i])){
+                        match = true;
+                        break;
+                }
+        }
+        return match;
+}
+
 // remove subverse elements from osisIDs
 void deleteSubverses(SWBuf &buf) {
 	for (int i = 0; buf[i]; i++) {
@@ -59,7 +82,9 @@
 	test = buf;
 
 	if (vk.Testament() && vk.Book() && vk.Chapter() && vk.Verse()) { // if we're not a heading
-//		cout << (const char*)vk << " == "  << (const char*)test << endl;
+#ifdef DEBUG
+		cout << (const char*)vk << " == "  << (const char*)test << endl;
+#endif
 		return (vk == test);
 	}
 	else return true;	// no check if we're a heading... Probably bad.
@@ -68,7 +93,9 @@
 
 void makeKJVRef(VerseKey &key) {
 	cout << "re-versified " << key;
-//	cout << "\tC" << (int)(key.builtin_books[key.Testament()-1][key.Book()-1].chapmax) << ":V" << (int)(key.builtin_books[key.Testament()-1][key.Book()-1].versemax[key.Chapter()-1]);
+#ifdef DEBUG
+	cout << "\tC" << (int)(key.builtin_books[key.Testament()-1][key.Book()-1].chapmax) << ":V" << (int)(key.builtin_books[key.Testament()-1][key.Book()-1].versemax[key.Chapter()-1]);
+#endif
 	if (key.Chapter() > key.builtin_books[key.Testament()-1][key.Book()-1].chapmax) {
 		key.Chapter(key.builtin_books[key.Testament()-1][key.Book()-1].chapmax);
 		key.Verse(key.builtin_books[key.Testament()-1][key.Book()-1].versemax[key.Chapter()-1]);
@@ -83,60 +110,65 @@
 void writeEntry(VerseKey &key, SWBuf &text, bool force = false) {
 	static SWBuf activeVerseText;
 	char keyOsisID[255];
-	strcpy(keyOsisID, key.getOSISRef());
 
-	// set keyOsisID to anything that an osisID cannot be.
-	if (force) {
-		strcpy(keyOsisID, "-force");
-	}
+        if (inCanonicalOSISBook) {
+        	strcpy(keyOsisID, key.getOSISRef());
 
-	static VerseKey lastKey;
-	lastKey.AutoNormalize(0);
-	lastKey.Headings(1);
+	        // set keyOsisID to anything that an osisID cannot be.
+        	if (force) {
+	        	strcpy(keyOsisID, "-force");
+        	}
 
-	VerseKey saveKey;
-	saveKey.AutoNormalize(0);
-	saveKey.Headings(1);
-	saveKey = key;
+	        static VerseKey lastKey;
+        	lastKey.AutoNormalize(0);
+	        lastKey.Headings(1);
 
-	// If we have seen a verse and the supplied one is different then we output the collected one.
-	if (*activeOsisID && strcmp(activeOsisID, keyOsisID)) {
+        	VerseKey saveKey;
+        	saveKey.AutoNormalize(0);
+	        saveKey.Headings(1);
+        	saveKey = key;
 
-		key = lastKey;
+	        // If we have seen a verse and the supplied one is different then we output the collected one.
+        	if (*activeOsisID && strcmp(activeOsisID, keyOsisID)) {
 
-		if (!isKJVRef(key)) {
-			makeKJVRef(key);
-		}
+	        	key = lastKey;
 
-		SWBuf currentText = module->getRawEntry();
-		if (currentText.length()) {
-			cout << "Appending entry: " << key.getOSISRef() << ": " << activeVerseText << endl;
-			activeVerseText = currentText + " " + activeVerseText;
-		}
+		        if (!isKJVRef(key)) {
+			        makeKJVRef(key);
+        		}
 
-//		cout << "Write: " << activeOsisID << ":" << key.getOSISRef() << ": " << activeVerseText << endl;
+	        	SWBuf currentText = module->getRawEntry();
+		        if (currentText.length()) {
+			        cout << "Appending entry: " << key.getOSISRef() << ": " << activeVerseText << endl;
+        			activeVerseText = currentText + " " + activeVerseText;
+	        	}
 
-		module->setEntry(activeVerseText);
-		activeVerseText = "";
-	}
+#ifdef DEBUG
+        		cout << "Write: " << activeOsisID << ":" << key.getOSISRef() << ": " << activeVerseText << endl;
+#endif
 
-	if (activeVerseText.length()) {
-		activeVerseText += " ";
-		activeVerseText += text;
-	}
-	else {
-		activeVerseText = text;
-	}
+	        	module->setEntry(activeVerseText);
+		        activeVerseText = "";
+        	}
 
-	key = saveKey;
-	lastKey = key;
-	strcpy(activeOsisID, keyOsisID);
+	        if (activeVerseText.length()) {
+		        activeVerseText += " ";
+        		activeVerseText += text;
+	        }
+        	else {
+	        	activeVerseText = text;
+        	}
+
+	        key = saveKey;
+        	lastKey = key;
+	        strcpy(activeOsisID, keyOsisID);
+        }
 }
 
 
 void linkToEntry(VerseKey& dest) {
-//	cout << "Verse: " << key << "\n";
-//	cout << "TEXT: " << text << "\n\n";
+	//cout << "Verse: " << key << "\n";
+	//cout << "TEXT: " << text << "\n\n";
 	//SWBuf currentText = module->getRawEntry();
 	//if (currentText.length())
 	//	text = currentText + " " + text;
@@ -205,18 +237,22 @@
 		lastTitle = "";
 		inTitle = true;
 		tagStack.push(token);
-//		cout << "push " << token->getName() << endl;
+#ifdef DEBUG
+		cout << "push " << token->getName() << endl;
+#endif
 		titleDepth = tagStack.size();
-		return false; 
+		return false;
 	}
 	// Check titleDepth since titles can be nested. Don't want to quit too early.
 	else if (isEndTag && tagDepth == titleDepth && (!strcmp(tokenName, "title"))) {
 		lastTitle.append(text.c_str() + titleOffset); //<title ...> up to the end </title>
 		lastTitle.append(*token); //</title>
 
-// 		cout << "lastTitle:      " << lastTitle.c_str() << endl;
-// 		cout << "text-lastTitle: " << text.c_str()+titleOffset << endl;
-//		cout << "text:           " << text.c_str() << endl;
+#ifdef DEBUG
+ 		cout << "lastTitle:      " << lastTitle.c_str() << endl;
+ 		cout << "text-lastTitle: " << text.c_str()+titleOffset << endl;
+		cout << "text:           " << text.c_str() << endl;
+#endif
 		inTitle = false;
 		titleDepth = 0;
 		tagStack.pop();
@@ -232,7 +268,9 @@
 		// Remember non-empty start tags
 		if (!token->isEmpty()) {
 			tagStack.push(token);
-//			cout << "push " << token->getName() << endl;
+#ifdef DEBUG
+			cout << "push " << token->getName() << endl;
+#endif
 		}
 
 		//-- WITH OSIS ID -------------------------------------------------------------------------
@@ -242,7 +280,9 @@
 			if ((!strcmp(tokenName, "div")) && (!strcmp(typeAttr, "book"))) {
 				inVerse = false;
 				if (inBookHeader || inChapterHeader) {	// this one should never happen, but just in case
-//					cout << "HEADING ";
+#ifdef DEBUG
+					cout << "HEADING ";
+#endif
 					currentVerse->Testament(0);
 					currentVerse->Book(0);
 					currentVerse->Chapter(0);
@@ -260,6 +300,8 @@
 				chapterDepth = 0;
 				verseDepth = 0;
 
+                                inCanonicalOSISBook = isOSISAbbrev(token->getAttribute("osisID"));
+
 				return true;
 			}
 
@@ -269,7 +311,9 @@
 					 ) {
 				inVerse = false;
 				if (inBookHeader) {
-//					cout << "BOOK HEADING "<< text.c_str() << endl;
+#ifdef DEBUG
+					cout << "BOOK HEADING "<< text.c_str() << endl;
+#endif
 					writeEntry(*currentVerse, text);
 				}
 
@@ -318,7 +362,9 @@
 					}
 
 					if (heading.length()) {
-//						cout << "CHAPTER HEADING "<< heading.c_str() << endl;
+#ifdef DEBUG
+						cout << "CHAPTER HEADING "<< heading.c_str() << endl;
+#endif
 						writeEntry(*currentVerse, heading);
 					}
 
@@ -362,9 +408,11 @@
 					!strcmp(tokenName, "lb") ||
 					!strcmp(tokenName, "lg")
 					) {
-//					if (token) {
-//						cout << "start token " << *token << ":" << text.c_str() << endl;
-//					}
+#ifdef DEBUG
+					if (token) {
+						cout << "start token " << *token << ":" << text.c_str() << endl;
+					}
+#endif
 				SWBuf tmp = token->toString();
 				writeEntry(*currentVerse, tmp);
 				return true;
@@ -385,12 +433,15 @@
 		if (!token->isEmpty()) {
 			topToken = tagStack.top();
 			tagDepth = tagStack.size();
-//			cout << "pop " << topToken->getName() << endl;
+#ifdef DEBUG
+			cout << "pop " << topToken->getName() << endl;
+#endif
 			tagStack.pop();
 
 			if (strcmp(topToken->getName(), tokenName)) {
 				cout << "Error: " << *currentVerse << ": Expected " << topToken->getName() << " found " << tokenName << endl;
-				exit(1);
+//				exit(1);        // I'm sure this validity check is a good idea, but there's a but somewhere that's killing the converter here.
+                                                // So I'm disabling this line. Unvalidated OSIS files shouldn't be run through the converter anyway.
 			}
 		}
 
@@ -404,12 +455,16 @@
 
 			if (lastTitle.length()) {
 				const char* end = strchr(lastTitle, '>');
-//				cout << lastTitle << endl;
-//	 			cout << "length=" << int(end+1 - lastTitle.c_str()) << ", tag:" << lastTitle.c_str() << endl;
+#ifdef DEBUG
+				cout << lastTitle << endl;
+	 			cout << "length=" << int(end+1 - lastTitle.c_str()) << ", tag:" << lastTitle.c_str() << endl;
+#endif
 
 				SWBuf titleTagText;
 				titleTagText.append(lastTitle.c_str(), end+1 - lastTitle.c_str());
-//				cout << "tagText: " << titleTagText.c_str() << endl;;
+#ifdef DEBUG
+				cout << "tagText: " << titleTagText.c_str() << endl;;
+#endif
 
 				XMLTag titleTag(titleTagText);
 				titleTag.setAttribute("type", "section");
@@ -459,7 +514,9 @@
 			return true;
 		}
 		else if (!inTitle && !inVerse && !inBookHeader && !inChapterHeader) {
-//			cout << "End tag not in verse: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
+#ifdef DEBUG
+			cout << "End tag not in verse: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
+#endif
 			// Is this the end of a chapter.
 			if (tagDepth == chapterDepth && (!strcmp(tokenName, "div") || !strcmp(tokenName, "chapter"))) {
 				chapterDepth = 0;
@@ -542,7 +599,7 @@
 
 	// Let's test our command line arguments
 	if (argc < 3) {
-		fprintf(stderr, 
+		fprintf(stderr,
 "\nusage: osis2mod <output/path> <osisDoc> [createMod] [compressType [blockType [cipherKey]]]\n");
 		fprintf(stderr, "  createMod   : (default 0): 0 - create  1 - augment\n");
 		fprintf(stderr, "  compressType: (default 0): 0 - no compression  1 - LZSS    2 - Zip\n");
@@ -574,9 +631,11 @@
 		case 2: compressor = new ZipCompress(); break;
 	}
 
-//	cout << "path: " << argv[1] << " osisDoc: " << argv[2] << " create: " << argv[3] << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << "\n";
-//	cout << "";
+#ifdef DEBUG
+	cout << "path: " << argv[1] << " osisDoc: " << argv[2] << " create: " << argv[3] << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << "\n";
+	cout << "";
 //	exit(-3);
+#endif
 
 
 	if ((argc<4)||(!strcmp(argv[3], "0"))) {	// == 0 then create module




More information about the sword-cvs mailing list