[sword-svn] r2661 - in trunk: . m4 src/modules

scribe at crosswire.org scribe at crosswire.org
Sat Oct 29 08:16:41 MST 2011


Author: scribe
Date: 2011-10-29 08:16:41 -0700 (Sat, 29 Oct 2011)
New Revision: 2661

Modified:
   trunk/configure.ac
   trunk/m4/acx_clucene.m4
   trunk/src/modules/swmodule.cpp
Log:
Commiting patch from GHellings to support both CLucene 0.9x and 2.x
Leaving the CMake stuff for Greg to commit himself so CMake blame stays with him ;)


Modified: trunk/configure.ac
===================================================================
--- trunk/configure.ac	2011-10-25 18:09:21 UTC (rev 2660)
+++ trunk/configure.ac	2011-10-29 15:16:41 UTC (rev 2661)
@@ -39,7 +39,10 @@
 
 AC_C_BIGENDIAN
 
+PKG_CHECK_MODULES([CLUCENE2], [libclucene-core >= 2.3])
+if test "x$CLUCENE2_LIBS" = x; then
 ACX_CLUCENE
+fi
 
 # ---------------------------------------------------------------------
 # With options
@@ -57,7 +60,7 @@
 #AC_ARG_WITH(lucene,
 #	AC_HELP_STRING([--with-lucene],[include lucene support for searching (default=no)]),,with_lucene=no)
 AC_ARG_WITH([internalregex],
-	AS_HELP_STRING([--with-internalregex], [Compile using SWORD's internal copy of regex]))
+	AS_HELP_STRING([--with-internalregex], [Compile using SWORDs internal copy of regex]))
 
 
 # ---------------------------------------------------------------------
@@ -250,15 +253,23 @@
 # ---------------------------------------------------------------------
 
 with_clucene=no
-if test -z "$CLUCENE_LIBS"; then
-   echo "lucene searching options not available"
+if test "x$CLUCENE2_LIBS" != x; then
+   echo "clucene 2.x found - lucene searching options available"
+   AM_CXXFLAGS="$AM_CXXFLAGS $CLUCENE2_CFLAGS -DUSELUCENE -DCLUCENE2"
+   AM_CFLAGS="$AM_CFLAGS $CLUCENE2_CFLAGS -DUSELUCENE -DCLUCENE2"
+   LIBS="$LIBS $CLUCENE2_LIBS"
+   with_clucene="yes 2.x"
 else
-   echo "lucene found - lucene searching options available"
+if test "x$CLUCENE_LIBS" != x; then
+   echo "lucene 0.x found - lucene searching options available"
    AM_CXXFLAGS="$AM_CXXFLAGS $CLUCENE_CXXFLAGS -DUSELUCENE"
    AM_CFLAGS="$AM_CFLAGS -DUSELUCENE"
    LIBS="$LIBS $CLUCENE_LIBS"
-   with_clucene="yes"
+   with_clucene="yes 0.x"
+else
+   echo "lucene searching options not available"
 fi
+fi
 
 AC_CHECK_FUNCS(vsnprintf, [have_vsnprintf="yes"])
 
@@ -298,7 +309,6 @@
 AC_SUBST(target_mingw32)
 
 AC_SUBST(CURL_LIBS)
-AC_SUBST(CLUCENE_LIBS)
 AC_SUBST(ICU_LIBS)
 AC_SUBST(ICU_IOLIBS)
 
@@ -313,7 +323,7 @@
 AM_CONDITIONAL(HAVE_ICUSWORD, test x$with_icusword = xyes)
 AM_CONDITIONAL(HAVE_VSNPRINTF, test x$have_vsnprintf = xyes)
 
-AM_CONDITIONAL(USELUCENE, test x$with_clucene = xyes)
+AM_CONDITIONAL(USELUCENE, test "x$with_clucene" != xno)
 AM_CONDITIONAL(SHAREDLIB, test x$enable_shared = xyes)
 AM_CONDITIONAL(INSTCONF, test x$with_conf = xyes)
 AM_CONDITIONAL(WITHCURL, test x$with_curl = xyes)

Modified: trunk/m4/acx_clucene.m4
===================================================================
--- trunk/m4/acx_clucene.m4	2011-10-25 18:09:21 UTC (rev 2660)
+++ trunk/m4/acx_clucene.m4	2011-10-29 15:16:41 UTC (rev 2661)
@@ -9,7 +9,7 @@
 	AC_HELP_STRING([ --with-clucene=<path>],
 		[prefix of CLucene-Core installation. e.g. /usr/local or /usr]),,)
 
-AC_MSG_CHECKING([how to include clucene])
+AC_MSG_CHECKING([how to include clucene 0.x])
 if test "x$with_clucene" = "xno"; then
 	AC_MSG_RESULT(excluding support)
 else

Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp	2011-10-25 18:09:21 UTC (rev 2660)
+++ trunk/src/modules/swmodule.cpp	2011-10-29 15:16:41 UTC (rev 2661)
@@ -40,7 +40,6 @@
 
 #ifdef USELUCENE
 #include <CLucene.h>
-#include <CLucene/CLBackwards.h>
 
 //Lucence includes
 //#include "CLucene.h"
@@ -497,10 +496,6 @@
 
 #ifdef USELUCENE
 	if (searchType == -4) {	// lucene
-		//Buffers for the wchar<->utf8 char* conversion
-		const unsigned short int MAX_CONV_SIZE = 2047;
-		wchar_t wcharBuffer[MAX_CONV_SIZE + 1];
-		char utfBuffer[MAX_CONV_SIZE + 1];
 		
 		lucene::index::IndexReader    *ir = 0;
 		lucene::search::IndexSearcher *is = 0;
@@ -513,20 +508,18 @@
 
 			const TCHAR *stopWords[] = { 0 };
 			standard::StandardAnalyzer analyzer(stopWords);
-			lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8?
-			q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer);
+			q = QueryParser::parse((wchar_t *)utf8ToWChar(istr).getRawData(), _T("content"), &analyzer);
 			(*percent)(20, percentUserData);
 			h = is->search(q);
 			(*percent)(80, percentUserData);
 
 			// iterate thru each good module position that meets the search
 			bool checkBounds = getKey()->isBoundSet();
-			for (long i = 0; i < h->length(); i++) {
+			for (unsigned long i = 0; i < h->length(); i++) {
 				Document &doc = h->doc(i);
 
 				// set a temporary verse key to this module position
-				lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE);	
-				*resultKey = utfBuffer; //TODO Does a key always accept utf8?
+				*resultKey = wcharToUTF8(doc.get(_T("key"))); //TODO Does a key always accept utf8?
 
 				// check to see if it sets ok (within our bounds) and if not, skip
 				if (checkBounds) {
@@ -1022,7 +1015,6 @@
 	SWBuf c;
 
 	const int MAX_CONV_SIZE = 1024 * 1024;
-	wchar_t *wcharBuffer = new wchar_t[MAX_CONV_SIZE + 1];
 
 	// turn all filters to default values
 	StringList filterSettings;
@@ -1156,11 +1148,8 @@
 				}
 			}
 
-			lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8
-//			doc->add( *(new Field("key", wcharBuffer, Field::STORE_YES | Field::INDEX_TOKENIZED)));
-			doc->add( *Field::Text(_T("key"), wcharBuffer ) );
+			doc->add(*_CLNEW Field(_T("key"), (wchar_t *)utf8ToWChar(keyText).getRawData(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
 
-
 			if (includeKeyInSearch) {
 				c = keyText;
 				c += " ";
@@ -1168,12 +1157,10 @@
 				content = c.c_str();
 			}
 
-			lucene_utf8towcs(wcharBuffer, content, MAX_CONV_SIZE); //content must be utf8
-			doc->add( *Field::UnStored(_T("content"), wcharBuffer) );
+			doc->add(*_CLNEW Field(_T("content"), (wchar_t *)utf8ToWChar(content).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
 
 			if (strong.length() > 0) {
-				lucene_utf8towcs(wcharBuffer, strong, MAX_CONV_SIZE);
-				doc->add( *Field::UnStored(_T("lemma"), wcharBuffer) );
++				doc->add(*_CLNEW Field(_T("lemma"), (wchar_t *)utf8ToWChar(strong).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
 //printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str());
 			}
 
@@ -1290,16 +1277,11 @@
 
 		if (proxBuf.length() > 0) {
 
-			lucene_utf8towcs(wcharBuffer, proxBuf, MAX_CONV_SIZE); //keyText must be utf8
-
-//printf("proxBuf after (%s).\nprox: %s\nproxLem: %s\n", (const char *)*key, proxBuf.c_str(), proxLem.c_str());
-
-			doc->add( *Field::UnStored(_T("prox"), wcharBuffer) );
+			doc->add(*_CLNEW Field(_T("prox"), (wchar_t *)utf8ToWChar(proxBuf).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
 			good = true;
 		}
 		if (proxLem.length() > 0) {
-			lucene_utf8towcs(wcharBuffer, proxLem, MAX_CONV_SIZE); //keyText must be utf8
-			doc->add( *Field::UnStored(_T("proxlem"), wcharBuffer) );
+			doc->add(*_CLNEW Field(_T("proxlem"), (wchar_t *)utf8ToWChar(proxLem).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
 			good = true;
 		}
 		if (good) {
@@ -1317,20 +1299,32 @@
 	//coreWriter->optimize();
 	coreWriter->close();
 
+#ifdef CLUCENE2
+	d = FSDirectory::getDirectory(target.c_str());
+#endif
 	if (IndexReader::indexExists(target.c_str())) {
+#ifndef CLUCENE2
 		d = FSDirectory::getDirectory(target.c_str(), false);
+#endif
 		if (IndexReader::isLocked(d)) {
 			IndexReader::unlock(d);
 		}
-
 		fsWriter = new IndexWriter( d, an, false);
-	} else {
+	}
+	else {
+#ifndef CLUCENE2
 		d = FSDirectory::getDirectory(target.c_str(), true);
+#endif
 		fsWriter = new IndexWriter(d, an, true);
 	}
 
 	Directory *dirs[] = { ramDir, 0 };
+#ifdef CLUCENE2
+	lucene::util::ConstValueArray< lucene::store::Directory *>dirsa(dirs, 1);
+	fsWriter->addIndexes(dirsa);
+#else
 	fsWriter->addIndexes(dirs);
+#endif
 	fsWriter->close();
 
 	delete ramDir;
@@ -1357,8 +1351,6 @@
 		(*filter)->setOptionValue(*origVal++);
 	}
 
-	delete [] wcharBuffer;
-
 	return 0;
 #else
 	return SWSearchable::createSearchFramework(percent, percentUserData);




More information about the sword-cvs mailing list