[sword-svn] r3464 - in trunk: . examples/cmdline src/modules

scribe at crosswire.org scribe at crosswire.org
Sun May 21 00:38:20 MST 2017


Author: scribe
Date: 2017-05-21 00:38:20 -0700 (Sun, 21 May 2017)
New Revision: 3464

Modified:
   trunk/ChangeLog
   trunk/configure.ac
   trunk/examples/cmdline/search.cpp
   trunk/src/modules/swmodule.cpp
   trunk/usrinst.sh
Log:
Added ICU-REGEX option to use the ICU regex engine for searching


Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog	2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/ChangeLog	2017-05-21 07:38:20 UTC (rev 3464)
@@ -1,5 +1,8 @@
 API ChangeLog
 
+21-May-2017	Troy A. Griffitts <scribe at crosswire.org>
+	Added --with-icuregex option to use ICU regex engine
+
 24-Apr-2017	Troy A. Griffitts <scribe at crosswire.org>
 	Branching 1.8.x
 

Modified: trunk/configure.ac
===================================================================
--- trunk/configure.ac	2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/configure.ac	2017-05-21 07:38:20 UTC (rev 3464)
@@ -72,6 +72,8 @@
 #	AC_HELP_STRING([--with-lucene],[include lucene support for searching (default=no)]),,with_lucene=no)
 AC_ARG_WITH([internalregex],
 	AS_HELP_STRING([--with-internalregex], [Compile using SWORDs internal copy of regex]))
+AC_ARG_WITH([icuregex],
+	AS_HELP_STRING([--with-icuregex], [use ICU regex engine]))
 AC_ARG_WITH(xapian,
 	AC_HELP_STRING([--with-xapian],[use xapian search engine (default=yes)]),,with_xapian=yes)
 
@@ -306,13 +308,24 @@
 fi
 fi
 
-if test x$with_xapian  = xyes; then
+if test x$with_xapian = xyes; then
 	AC_LANG_CPLUSPLUS
 	AC_CHECK_LIB(xapian,main,,with_xapian="no")
 else
 	with_xapian="no"
 fi
 
+if test x$with_icuregex = xyes; then
+	if test x$with_icu = xno; then
+		with_icuregex="requested; but using ICU not enabled"
+	else
+		AM_CFLAGS="$AM_CFLAGS -DUSEICUREGEX"
+		AM_CXXFLAGS="$AM_CXXFLAGS -DUSEICUREGEX"
+	fi
+else
+	with_icuregex="no"
+fi
+
 if test x$with_xapian = xyes; then
 	AM_CFLAGS="$AM_CFLAGS -DUSEXAPIAN"
 	AM_CXXFLAGS="$AM_CXXFLAGS -DUSEXAPIAN"
@@ -425,6 +438,7 @@
 AM_CONDITIONAL(SHAREDLIB, test x$enable_shared = xyes)
 AM_CONDITIONAL(INSTCONF, test x$with_conf = xyes)
 AM_CONDITIONAL(USECXX11REGEX, test x$with_cxx11regex = xyes)
+AM_CONDITIONAL(USEICUREGEX, test x$with_icuregex = xyes)
 AM_CONDITIONAL(WITHCURL, test x$with_curl = xyes)
 AM_CONDITIONAL(WITHCURLSFTP, test x$with_curl_sftp = xyes)
 AM_CONDITIONAL(INTERNALFTPLIB, test x$with_internalftplib = xyes)
@@ -465,7 +479,8 @@
 echo     "     BZIP2:                $with_bzip2"
 echo     "     XZ:                   $with_xz"
 echo     "     ICUSWORD:             $with_icusword"
-echo     "     CXX11REGEX:           $with_cxx11regex"
+echo     "     CXX11-REGEX:          $with_cxx11regex"
+echo     "     ICU-REGEX:            $with_icuregex"
 echo     "     XAPIAN-CORE:          $with_xapian"
 
 

Modified: trunk/examples/cmdline/search.cpp
===================================================================
--- trunk/examples/cmdline/search.cpp	2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/examples/cmdline/search.cpp	2017-05-21 07:38:20 UTC (rev 3464)
@@ -89,7 +89,7 @@
 
 	SWBuf searchTerm = argv[2];
 	manager.setGlobalOption("Greek Accents", "Off");
-	manager.setGlobalOption("Strong's Numbers", "On");
+	manager.setGlobalOption("Strong's Numbers", "Off");
 	manager.setGlobalOption("Hebrew Vowel Points", "Off");
 	manager.filterText("Greek Accents", searchTerm);
 

Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp	2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/src/modules/swmodule.cpp	2017-05-21 07:38:20 UTC (rev 3464)
@@ -46,6 +46,9 @@
 #endif
 #elif defined(USEICUREGEX)
 #include <unicode/regex.h>
+#ifndef REG_ICASE
+#define REG_ICASE UREGEX_CASE_INSENSITIVE
+#endif
 #else
 #include <regex.h>	// GNU
 #endif
@@ -418,6 +421,8 @@
 	std::locale::global(std::locale("en_US.UTF-8"));
 
 	std::regex preg;
+#elif defined(USEICUREGEX)
+	RegexMatcher *matcher = 0;
 #else
 	regex_t preg;
 #endif
@@ -461,6 +466,14 @@
 	if (searchType >= 0) {
 #ifdef USECXX11REGEX
 		preg = std::regex((SWBuf(".*")+istr+".*").c_str(), std::regex_constants::extended | searchType | flags);
+#elif defined(USEICUREGEX)
+		UErrorCode        status    = U_ZERO_ERROR;
+		matcher = new RegexMatcher(istr, searchType | flags, status);
+		if (U_FAILURE(status)) {
+			SWLog::getSystemLog()->logError("Error compiling Regex: %d", status);
+			return listKey;
+		}
+
 #else
 		flags |=searchType|REG_NOSUB|REG_EXTENDED;
 		int err = regcomp(&preg, istr, flags);
@@ -648,6 +661,11 @@
 			SWBuf textBuf = stripText();
 #ifdef USECXX11REGEX
 			if (std::regex_match(std::string(textBuf.c_str()), preg)) {
+#elif defined(USEICUREGEX)
+			UnicodeString stringToTest = textBuf.c_str();
+			matcher->reset(stringToTest);
+
+			if (matcher->find()) {
 #else
 			if (!regexec(&preg, textBuf, 0, 0, 0)) {
 #endif
@@ -658,6 +676,12 @@
 			}
 #ifdef USECXX11REGEX
 			else if (std::regex_match(std::string((lastBuf + ' ' + textBuf).c_str()), preg)) {
+#elif defined(USEICUREGEX)
+			else {
+				stringToTest = (lastBuf + ' ' + textBuf).c_str();
+				matcher->reset(stringToTest);
+
+				if (matcher->find()) {
 #else
 			else if (!regexec(&preg, lastBuf + ' ' + textBuf, 0, 0, 0)) {
 #endif
@@ -668,6 +692,9 @@
 			else {
 				lastBuf = textBuf;
 			}
+#if defined(USEICUREGEX)
+			}
+#endif
 		}
 
 		// phrase
@@ -851,6 +878,8 @@
 	if (searchType >= 0) {
 #ifdef USECXX11REGEX
 		std::locale::global(oldLocale);
+#elif defined(USEICUREGEX)
+		delete matcher;
 #else
 		regfree(&preg);
 #endif

Modified: trunk/usrinst.sh
===================================================================
--- trunk/usrinst.sh	2017-05-17 21:21:16 UTC (rev 3463)
+++ trunk/usrinst.sh	2017-05-21 07:38:20 UTC (rev 3464)
@@ -35,6 +35,7 @@
 #OPTIONS="--enable-profile $OPTIONS"
 
 #OPTIONS="--with-cxx11regex $OPTIONS"
+OPTIONS="--with-icuregex $OPTIONS"
 #OPTIONS="--with-icusword $OPTIONS"
 #OPTIONS="--without-icu $OPTIONS"
 #OPTIONS="--without-clucene $OPTIONS"




More information about the sword-cvs mailing list