[sword-cvs] sword/src/modules/texts/ztext ztext.cpp,1.41,1.42

sword@www.crosswire.org sword@www.crosswire.org
Thu, 28 Aug 2003 23:00:19 -0700


Update of /usr/local/cvsroot/sword/src/modules/texts/ztext
In directory www:/tmp/cvs-serv27914/src/modules/texts/ztext

Modified Files:
	ztext.cpp 
Log Message:
	Abstracted the search interface from SWModule
	Added experimental implementation of fast
		search framework to zText using
		clucene



Index: ztext.cpp
===================================================================
RCS file: /usr/local/cvsroot/sword/src/modules/texts/ztext/ztext.cpp,v
retrieving revision 1.41
retrieving revision 1.42
diff -u -d -r1.41 -r1.42
--- ztext.cpp	12 Aug 2003 05:36:31 -0000	1.41
+++ ztext.cpp	29 Aug 2003 06:00:16 -0000	1.42
@@ -19,6 +19,23 @@
 #include <ztext.h>
 //#include <zlib.h>
 
+#include <map>
+#include <list>
+#include <algorithm>
+#include <regex.h>	// GNU
+
+
+
+using std::map;
+using std::list;
+using std::find;
+
+#ifdef USELUCENE
+#include <CLucene/CLucene.h>
+using namespace lucene::search;
+using namespace lucene::queryParser;
+#endif
+
 SWORD_NAMESPACE_START
 
 /******************************************************************************
@@ -36,6 +53,17 @@
 		: zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang) {
 	blockType = iblockType;
 	lastWriteKey = 0;
+	SWBuf fname;
+	fname = path;
+	ir = 0;
+	is = 0;
+	char ch = fname.c_str()[strlen(fname.c_str())-1];
+	if ((ch != '/') && (ch != '\\'))
+		fname += "/lucene";
+	if (IndexReader::indexExists(fname.c_str())) {
+		ir = &IndexReader::open(fname);
+		is = new IndexSearcher(*ir);
+	}
 }
 
 
@@ -49,6 +77,12 @@
 
 	if (lastWriteKey)
 		delete lastWriteKey;
+
+	if (is)
+		((IndexSearcher *)is)->close();
+
+	if (ir)
+		delete (IndexReader *)ir;
 }
 
 
@@ -216,6 +250,195 @@
 		return tmpVK;
 	}
 	else	return *key;
+}
+
+
+
+typedef  map < SWBuf, list<long> > strlist;
+typedef list<long> longlist;
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+signed char zText::createSearchFramework() {
+#ifdef USELUCENE
+	SWKey *savekey = 0;
+	SWKey *searchkey = 0;
+	SWKey textkey;
+	char *word = 0;
+	char *wordBuf = 0;
+
+
+	// save key information so as not to disrupt original
+	// module position
+	if (!key->Persist()) {
+		savekey = CreateKey();
+		*savekey = *key;
+	}
+	else	savekey = key;
+
+	searchkey = (key->Persist())?key->clone():0;
+	if (searchkey) {
+		searchkey->Persist(1);
+		setKey(*searchkey);
+	}
+
+	// position module at the beginning
+	*this = TOP;
+
+	VerseKey *lkey = (VerseKey *)key;
+
+	// iterate thru each entry in module
+
+	IndexWriter* writer = NULL;
+	Directory* d = NULL;
+ 
+	lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer();
+	SWBuf target = path;
+	char ch = target.c_str()[strlen(target.c_str())-1];
+	if ((ch != '/') && (ch != '\\'))
+		target += "/lucene";
+
+	if (IndexReader::indexExists(target.c_str())) {
+		d = &FSDirectory::getDirectory(target.c_str(), false);
+		if (IndexReader::isLocked(*d)) {
+			IndexReader::unlock(*d);
+		}
+																		   
+		writer = new IndexWriter(*d, an, false);
+	} else {
+		d = &FSDirectory::getDirectory(target.c_str(), true);
+		writer = new IndexWriter( *d ,an, true);
+	}
+
+
+ 
+	while (!Error()) {
+		Document &doc = *new Document();
+		doc.add( Field::Text(_T("key"), (const char *)*lkey ) );
+		doc.add( Field::Text(_T("content"), StripText()) );
+		writer->addDocument(doc);
+		delete &doc;
+
+		(*this)++;
+	}
+
+	writer->optimize();
+	writer->close();
+	delete writer;
+	delete &an;
+
+	// reposition module back to where it was before we were called
+	setKey(*savekey);
+
+	if (!savekey->Persist())
+		delete savekey;
+
+	if (searchkey)
+		delete searchkey;
+
+	
+#endif
+	return 0;
+}
+
+
+/******************************************************************************
+ * SWModule::Search 	- Searches a module for a string
+ *
+ * ENT:	istr		- string for which to search
+ * 	searchType	- type of search to perform
+ *				>=0 - regex
+ *				-1  - phrase
+ *				-2  - multiword
+ * 	flags		- options flags for search
+ *	justCheckIfSupported	- if set, don't search, only tell if this
+ *							function supports requested search.
+ *
+ * RET: listkey set to verses that contain istr
+ */
+
+ListKey &zText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+#ifdef USELUCENE
+	listkey.ClearList();
+
+	if ((is) && (ir)) {
+
+		switch (searchType) {
+		case -3: {
+
+
+			// test to see if our scope for this search is bounded by a
+			// VerseKey
+			VerseKey *testKeyType = 0, vk;
+			try {
+				testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
+			}
+			catch ( ... ) {}
+			// if we don't have a VerseKey * decendant we can't handle
+			// because of scope.
+			// In the future, add bool SWKey::isValid(const char *tryString);
+			if (!testKeyType)
+				break;
+
+
+			// check if we just want to see if search is supported.
+			// If we've gotten this far, then it is supported.
+			if (justCheckIfSupported) {
+				*justCheckIfSupported = true;
+				return listkey;
+			}
+
+			(*percent)(10, percentUserData);
+
+			standard::StandardAnalyzer analyzer;
+			Query &q =  QueryParser::Parse(istr, _T("content"), analyzer);
+			(*percent)(20, percentUserData);
+			Hits &h = is->search(q);
+			(*percent)(80, percentUserData);
+
+
+			// iterate thru each good module position that meets the search
+			for (long i = 0; i < h.Length(); i++) {
+				Document &doc = h.doc(i);
+
+				// set a temporary verse key to this module position
+				vk = doc.get(_T("key"));
+
+				// check scope
+				// Try to set our scope key to this verse key
+				if (scope) {
+					*testKeyType = vk;
+
+					// check to see if it set ok and if so, add to our return list
+					if (*testKeyType == vk)
+						listkey << (const char *) vk;
+				}
+				else listkey << (const char*) vk;
+			}
+			(*percent)(98, percentUserData);
+
+			delete &h;
+			delete &q;
+
+			listkey = TOP;
+			(*percent)(100, percentUserData);
+			return listkey;
+		}
+
+		default:
+			break;
+		}
+	}
+
+	// check if we just want to see if search is supported
+	if (justCheckIfSupported) {
+		*justCheckIfSupported = false;
+		return listkey;
+	}
+#endif
+	// if we don't support this search, fall back to base class
+	return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
 }