[sword-svn] r3731 - in trunk: examples/cmdline include src/modules

scribe at crosswire.org scribe at crosswire.org
Mon May 4 16:59:18 MST 2020


Author: scribe
Date: 2020-05-04 16:59:18 -0700 (Mon, 04 May 2020)
New Revision: 3731

Modified:
   trunk/examples/cmdline/search.cpp
   trunk/include/swmodule.h
   trunk/src/modules/swmodule.cpp
Log:
Better documented search flags.  Added new flag SEARCHFLAG_STRICTBOUNDARIES to turn off checks across verse boundaries


Modified: trunk/examples/cmdline/search.cpp
===================================================================
--- trunk/examples/cmdline/search.cpp	2020-05-04 23:57:14 UTC (rev 3730)
+++ trunk/examples/cmdline/search.cpp	2020-05-04 23:59:18 UTC (rev 3731)
@@ -50,6 +50,8 @@
 int flags = 0
 // for case insensitivity
 | REG_ICASE
+// for enforcing strict verse boundaries
+| SEARCHFLAG_STRICTBOUNDARIES
 // for use with entryAttrib search type to match whole entry to value, e.g., G1234 and not G12345
 //| SEARCHFLAG_MATCHWHOLEENTRY
 ;

Modified: trunk/include/swmodule.h
===================================================================
--- trunk/include/swmodule.h	2020-05-04 23:57:14 UTC (rev 3730)
+++ trunk/include/swmodule.h	2020-05-04 23:59:18 UTC (rev 3731)
@@ -44,8 +44,13 @@
 class SWOptionFilter;
 class SWFilter;
 
+// used for matching whole entry (not substring) in entry attributes searches.
 #define SEARCHFLAG_MATCHWHOLEENTRY 4096
 
+// used for turning off the default behavior of SWORD to use a sliding search window 
+// which allows hits across verse boundaries.
+#define SEARCHFLAG_STRICTBOUNDARIES 8192
+
 #define SWMODULE_OPERATORS \
 	operator SWBuf() { return renderText(); } \
 	operator SWKey &() { return *getKey(); } \
@@ -388,7 +393,10 @@
 	 *			-3  - entryAttrib (eg. Word//Lemma./G1234/)	 (Lemma with dot means check components (Lemma.[1-9]) also)
 	 *			-4  - Lucene
 	 *			-5  - multilemma window; set 'flags' param to window size (NOT DONE)
-	 * @param flags options flags for search
+	 * @param flags bitwise options flags for search.  Each search type supports different options.
+	 * 			REG_ICASE	- perform case insensitive search.  Supported by most all search types
+	 * 			SEARCHFLAG_*	- SWORD-specific search flags for various search types.  See defines for details
+	 *
 	 * @param scope Key containing the scope. VerseKey or ListKey are useful here.
 	 * @param justCheckIfSupported If set, don't search but instead set this variable to true/false if the requested search is supported,
 	 * @param percent Callback function to get the current search status in %.

Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp	2020-05-04 23:57:14 UTC (rev 3730)
+++ trunk/src/modules/swmodule.cpp	2020-05-04 23:59:18 UTC (rev 3731)
@@ -387,6 +387,18 @@
 	SWBuf term = istr;
 	bool includeComponents = false;	// for entryAttrib e.g., /Lemma.1/ 
 
+	// this only works for 1 or 2 verses right now, and for some search types (regex and multi word).
+	// future plans are to extend functionality
+	// By default SWORD defaults to allowing searches to cross the artificial boundaries of verse markers
+	// Searching are done in a sliding window of 2 verses right now.
+	// To turn this off, include SEARCHFLAG_STRICTBOUNDARIES in search flags
+	int windowSize = 2;
+	if ((flags & SEARCHFLAG_STRICTBOUNDARIES) && (searchType == -2 || searchType > 0)) {
+		// remove custom SWORD flag to prevent possible overlap with unknown regex option
+		flags ^= SEARCHFLAG_STRICTBOUNDARIES;
+		windowSize = 1;
+	}
+
 	SWBuf target = getConfigEntry("AbsoluteDataPath");
 	if (!target.endsWith("/") && !target.endsWith("\\")) {
 		target.append('/');
@@ -653,6 +665,8 @@
 				"Serious error: new percentage complete is less than previous value\nindex: %d\nhighIndex: %d\nnewperc == %d%% is smaller than\nperc == %d%%",
 				key->getIndex(), highIndex, (int)newperc, (int )perc);
 		}
+
+		// regex
 		if (searchType >= 0) {
 			SWBuf textBuf = stripText();
 #ifdef USECXX11REGEX
@@ -683,23 +697,22 @@
 #endif
 				lastKey->clearBound();
 				listKey << *lastKey;
-				lastBuf = textBuf;
+				lastBuf = (windowSize > 1) ? textBuf : "";
 			}
 			else {
-				lastBuf = textBuf;
+				lastBuf = (windowSize > 1) ? textBuf : "";
 			}
 #if defined(USEICUREGEX)
 			}
 #endif
 		}
 
-		// phrase
 		else {
 			SWBuf textBuf;
 			switch (searchType) {
 
 			// phrase
-			case -1:
+			case -1: {
 				textBuf = stripText();
 				if ((flags & REG_ICASE) == REG_ICASE) textBuf.toUpper();
 				sres = strstr(textBuf.c_str(), term.c_str());
@@ -709,6 +722,7 @@
 					listKey << *resultKey;
 				}
 				break;
+			}
 
 			// multiword
 			case -2: { // enclose our allocations
@@ -754,19 +768,21 @@
 						++stripped;
 					} while ( (stripped < 2) && (foundWords == words.size()));
 					++multiVerse;
-				} while ( (multiVerse < 2) && (stripped != 2 || foundWords != words.size()));
+				} while ((windowSize > 1) && (multiVerse < 2) && (stripped != 2 || foundWords != words.size()));
 
 				if ((stripped == 2) && (foundWords == words.size())) { //we found the right words in both raw and stripped text, which means it's a valid result item
 					*resultKey = (multiVerse == 1) ? *getKey() : *lastKey;
 					resultKey->clearBound();
 					listKey << *resultKey;
 					lastBuf = "";
-					if (twoVerse == 2) {
+					// if we're searching windowSize > 1 and we had a hit which required the current verse
+					// let's start the next window with our current verse in case we have another hit adjacent
+					if (multiVerse == 2) {
 						lastBuf = textBuf;
 					}
 				}
 				else {
-					lastBuf = textBuf;
+					lastBuf = (windowSize > 1) ? textBuf : "";
 				}
 			}
 			break;




More information about the sword-cvs mailing list