The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swmodule.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * swmodule.cpp - code for base class 'SWModule'. SWModule is the basis
4  * for all types of modules (e.g. texts, commentaries,
5  * maps, lexicons, etc.)
6  *
7  * $Id: swmodule.cpp 3846 2021-02-24 21:04:04Z scribe $
8  *
9  * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org)
10  * CrossWire Bible Society
11  * P. O. Box 2528
12  * Tempe, AZ 85280-2528
13  *
14  * This program is free software; you can redistribute it and/or modify it
15  * under the terms of the GNU General Public License as published by the
16  * Free Software Foundation version 2.
17  *
18  * This program is distributed in the hope that it will be useful, but
19  * WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  * General Public License for more details.
22  *
23  */
24 
25 
26 #include <vector>
27 
28 #include <swlog.h>
29 #include <sysdata.h>
30 #include <swmodule.h>
31 #include <utilstr.h>
32 #include <swfilter.h>
33 #include <versekey.h> // KLUDGE for Search
34 #include <treekeyidx.h> // KLUDGE for Search
35 #include <swoptfilter.h>
36 #include <filemgr.h>
37 #include <stringmgr.h>
38 #ifndef _MSC_VER
39 #include <iostream>
40 #endif
41 
42 #if defined(USECXX11REGEX)
43 #include <regex>
44 #ifndef REG_ICASE
45 #define REG_ICASE std::regex::icase
46 #endif
47 #elif defined(USEICUREGEX)
48 #include <unicode/regex.h>
49 #ifndef REG_ICASE
50 #define REG_ICASE UREGEX_CASE_INSENSITIVE
51 #endif
52 #else
53 #include <regex.h> // GNU
54 #endif
55 
56 #if defined USEXAPIAN
57 #include <xapian.h>
58 #elif defined USELUCENE
59 #include <CLucene.h>
60 
61 //Lucence includes
62 //#include "CLucene.h"
63 //#include "CLucene/util/Reader.h"
64 //#include "CLucene/util/Misc.h"
65 //#include "CLucene/util/dirent.h"
66 
67 using namespace lucene::index;
68 using namespace lucene::analysis;
69 using namespace lucene::util;
70 using namespace lucene::store;
71 using namespace lucene::document;
72 using namespace lucene::queryParser;
73 using namespace lucene::search;
74 #endif
75 
76 using std::vector;
77 
79 
81 
82 const signed int SWModule::SEARCHFLAG_MATCHWHOLEENTRY = 4096;
83 const signed int SWModule::SEARCHFLAG_STRICTBOUNDARIES = 8192;
84 
85 const signed int SWModule::SEARCHTYPE_REGEX = 0;
86 const signed int SWModule::SEARCHTYPE_PHRASE = -1;
87 const signed int SWModule::SEARCHTYPE_MULTIWORD = -2;
88 const signed int SWModule::SEARCHTYPE_ENTRYATTR = -3;
89 const signed int SWModule::SEARCHTYPE_EXTERNAL = -4;
90 
91 typedef std::list<SWBuf> StringList;
92 
93 /******************************************************************************
94  * SWModule Constructor - Initializes data for instance of SWModule
95  *
96  * ENT: imodname - Internal name for module
97  * imoddesc - Name to display to user for module
98  * idisp - Display object to use for displaying
99  * imodtype - Type of Module (All modules will be displayed with
100  * others of same type under their modtype heading
101  * unicode - if this module is unicode
102  */
103 
104 SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, const char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char *imodlang) {
105  key = createKey();
106  entryBuf = "";
107  config = &ownConfig;
108  modname = 0;
109  error = 0;
110  moddesc = 0;
111  modtype = 0;
112  modlang = 0;
113  this->encoding = encoding;
114  this->direction = direction;
115  this->markup = markup;
116  entrySize= -1;
117  disp = (idisp) ? idisp : &rawdisp;
118  stdstr(&modname, imodname);
119  stdstr(&moddesc, imoddesc);
120  stdstr(&modtype, imodtype);
121  stdstr(&modlang, imodlang);
122  stripFilters = new FilterList();
123  rawFilters = new FilterList();
124  renderFilters = new FilterList();
125  optionFilters = new OptionFilterList();
126  encodingFilters = new FilterList();
127  skipConsecutiveLinks = true;
128  procEntAttr = true;
129 }
130 
131 
132 /******************************************************************************
133  * SWModule Destructor - Cleans up instance of SWModule
134  */
135 
137 {
138  if (modname)
139  delete [] modname;
140  if (moddesc)
141  delete [] moddesc;
142  if (modtype)
143  delete [] modtype;
144  if (modlang)
145  delete [] modlang;
146 
147  if (key) {
148  if (!key->isPersist())
149  delete key;
150  }
151 
152  stripFilters->clear();
153  rawFilters->clear();
154  renderFilters->clear();
155  optionFilters->clear();
156  encodingFilters->clear();
157  entryAttributes.clear();
158 
159  delete stripFilters;
160  delete rawFilters;
161  delete renderFilters;
162  delete optionFilters;
163  delete encodingFilters;
164 }
165 
166 
167 /******************************************************************************
168  * SWModule::createKey - Allocates a key of specific type for module
169  *
170  * RET: pointer to allocated key
171  */
172 
174 {
175  return new SWKey();
176 }
177 
178 
179 /******************************************************************************
180  * SWModule::popError - Gets and clears error status
181  *
182  * RET: error status
183  */
184 
186 {
187  char retval = error;
188 
189  error = 0;
190  if (!retval) retval = key->popError();
191  return retval;
192 }
193 
194 
195 /******************************************************************************
196  * SWModule::Name - Sets/gets module name
197  *
198  * ENT: imodname - value which to set modname
199  * [0] - only get
200  *
201  * RET: pointer to modname
202  */
203 
204 const char *SWModule::getName() const {
205  return modname;
206 }
207 
208 
209 /******************************************************************************
210  * SWModule::Description - Sets/gets module description
211  *
212  * ENT: imoddesc - value which to set moddesc
213  * [0] - only get
214  *
215  * RET: pointer to moddesc
216  */
217 
218 const char *SWModule::getDescription() const {
219  return moddesc;
220 }
221 
222 
223 /******************************************************************************
224  * SWModule::Type - Sets/gets module type
225  *
226  * ENT: imodtype - value which to set modtype
227  * [0] - only get
228  *
229  * RET: pointer to modtype
230  */
231 
232 const char *SWModule::getType() const {
233  return modtype;
234 }
235 
236 /******************************************************************************
237  * SWModule::getDirection - Sets/gets module direction
238  *
239  * ENT: newdir - value which to set direction
240  * [-1] - only get
241  *
242  * RET: char direction
243  */
245  return direction;
246 }
247 
248 
249 /******************************************************************************
250  * SWModule::Disp - Sets/gets display driver
251  *
252  * ENT: idisp - value which to set disp
253  * [0] - only get
254  *
255  * RET: pointer to disp
256  */
257 
259  return disp;
260 }
261 
263  disp = idisp;
264 }
265 
266 /******************************************************************************
267  * * SWModule::Display - Calls this modules display object and passes itself
268  * *
269  * * RET: error status
270  * */
271 
273  disp->display(*this);
274  return 0;
275 }
276 
277 /******************************************************************************
278  * SWModule::getKey - Gets the key from this module that points to the position
279  * record
280  *
281  * RET: key object
282  */
283 
285  return key;
286 }
287 
288 
289 /******************************************************************************
290  * SWModule::setKey - Sets a key to this module for position to a particular
291  * record
292  *
293  * ENT: ikey - key with which to set this module
294  *
295  * RET: error status
296  */
297 
298 char SWModule::setKey(const SWKey *ikey) {
299  SWKey *oldKey = 0;
300 
301  if (key) {
302  if (!key->isPersist()) // if we have our own copy
303  oldKey = key;
304  }
305 
306  if (!ikey->isPersist()) { // if we are to keep our own copy
307  key = createKey();
308  *key = *ikey;
309  }
310  else key = (SWKey *)ikey; // if we are to just point to an external key
311 
312  if (oldKey)
313  delete oldKey;
314 
315  return error = key->getError();
316 }
317 
318 
319 /******************************************************************************
320  * SWModule::setPosition(SW_POSITION) - Positions this modules to an entry
321  *
322  * ENT: p - position (e.g. TOP, BOTTOM)
323  *
324  * RET: *this
325  */
326 
328  *key = p;
329  char saveError = key->popError();
330 
331  switch (p) {
332  case POS_TOP:
333  this->increment();
334  this->decrement();
335  break;
336 
337  case POS_BOTTOM:
338  this->decrement();
339  this->increment();
340  break;
341  }
342 
343  error = saveError;
344 }
345 
346 
347 /******************************************************************************
348  * SWModule::increment - Increments module key a number of entries
349  *
350  * ENT: increment - Number of entries to jump forward
351  *
352  * RET: *this
353  */
354 
355 void SWModule::increment(int steps) {
356  (*key) += steps;
357  error = key->popError();
358 }
359 
360 
361 /******************************************************************************
362  * SWModule::decrement - Decrements module key a number of entries
363  *
364  * ENT: decrement - Number of entries to jump backward
365  *
366  * RET: *this
367  */
368 
369 void SWModule::decrement(int steps) {
370  (*key) -= steps;
371  error = key->popError();
372 }
373 
374 
397 ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
398 
399  listKey.clear();
400  SWBuf term = istr;
401  bool includeComponents = false; // for entryAttrib e.g., /Lemma.1/
402 
403  // this only works for 1 or 2 verses right now, and for some search types (regex and multi word).
404  // future plans are to extend functionality
405  // By default SWORD defaults to allowing searches to cross the artificial boundaries of verse markers
406  // Searching are done in a sliding window of 2 verses right now.
407  // To turn this off, include SEARCHFLAG_STRICTBOUNDARIES in search flags
408  int windowSize = 2;
409  if ((flags & SEARCHFLAG_STRICTBOUNDARIES) && (searchType == SEARCHTYPE_MULTIWORD || searchType > 0)) {
410  // remove custom SWORD flag to prevent possible overlap with unknown regex option
411  flags ^= SEARCHFLAG_STRICTBOUNDARIES;
412  windowSize = 1;
413  }
414 
415  SWBuf target = getConfigEntry("AbsoluteDataPath");
416  if (!target.endsWith("/") && !target.endsWith("\\")) {
417  target.append('/');
418  }
419 #if defined USEXAPIAN
420  target.append("xapian");
421 #elif defined USELUCENE
422  target.append("lucene");
423 #endif
424  if (justCheckIfSupported) {
425  *justCheckIfSupported = (searchType >= SEARCHTYPE_ENTRYATTR);
426 #if defined USEXAPIAN
427  if ((searchType == SEARCHTYPE_EXTERNAL) && (FileMgr::existsDir(target))) {
428  *justCheckIfSupported = true;
429  }
430 #elif defined USELUCENE
431  if ((searchType == SEARCHTYPE_EXTERNAL) && (IndexReader::indexExists(target.c_str()))) {
432  *justCheckIfSupported = true;
433  }
434 #endif
435  return listKey;
436  }
437 
438  SWKey *saveKey = 0;
439  SWKey *searchKey = 0;
440  SWKey *resultKey = createKey();
441  SWKey *lastKey = createKey();
442  VerseKey *vkCheck = SWDYNAMIC_CAST(VerseKey, resultKey);
443  SWBuf lastBuf = "";
444 
445 #ifdef USECXX11REGEX
446  std::locale oldLocale;
447  std::locale::global(std::locale("en_US.UTF-8"));
448 
449  std::regex preg;
450 #elif defined(USEICUREGEX)
451  icu::RegexMatcher *matcher = 0;
452 #else
453  regex_t preg;
454 #endif
455 
456  vector<SWBuf> words;
457  vector<SWBuf> window;
458  const char *sres;
459  terminateSearch = false;
460  char perc = 1;
461  bool savePEA = isProcessEntryAttributes();
462 
463  // determine if we might be doing special strip searches. useful for knowing if we can use shortcuts
464  bool specialStrips = (getConfigEntry("LocalStripFilter")
465  || (getConfig().has("GlobalOptionFilter", "UTF8GreekAccents"))
466  || (getConfig().has("GlobalOptionFilter", "UTF8HebrewPoints"))
467  || (getConfig().has("GlobalOptionFilter", "UTF8ArabicPoints"))
468  || (strchr(istr, '<')));
469 
470  setProcessEntryAttributes(searchType == SEARCHTYPE_ENTRYATTR);
471 
472 
473  if (!key->isPersist()) {
474  saveKey = createKey();
475  *saveKey = *key;
476  }
477  else saveKey = key;
478 
479  searchKey = (scope)?scope->clone():(key->isPersist())?key->clone():0;
480  if (searchKey) {
481  searchKey->setPersist(true);
482  setKey(*searchKey);
483  }
484 
485  (*percent)(perc, percentUserData);
486 
487  *this = BOTTOM;
488  long highIndex = key->getIndex();
489  if (!highIndex)
490  highIndex = 1; // avoid division by zero errors.
491  *this = TOP;
492  if (searchType >= 0) {
493 #ifdef USECXX11REGEX
494  preg = std::regex((SWBuf(".*")+istr+".*").c_str(), std::regex_constants::extended | searchType | flags);
495 #elif defined(USEICUREGEX)
496  UErrorCode status = U_ZERO_ERROR;
497  matcher = new icu::RegexMatcher(istr, searchType | flags, status);
498  if (U_FAILURE(status)) {
499  SWLog::getSystemLog()->logError("Error compiling Regex: %d", status);
500  return listKey;
501  }
502 
503 #else
504  flags |=searchType|REG_NOSUB|REG_EXTENDED;
505  int err = regcomp(&preg, istr, flags);
506  if (err) {
507  SWLog::getSystemLog()->logError("Error compiling Regex: %d", err);
508  return listKey;
509  }
510 #endif
511  }
512 
513  (*percent)(++perc, percentUserData);
514 
515 
516 #if defined USEXAPIAN || defined USELUCENE
517  (*percent)(10, percentUserData);
518  if (searchType == SEARCHTYPE_EXTERNAL) { // indexed search
519 #if defined USEXAPIAN
520  SWTRY {
521  Xapian::Database database(target.c_str());
522  Xapian::QueryParser queryParser;
523  queryParser.set_default_op(Xapian::Query::OP_AND);
524  SWTRY {
525  queryParser.set_stemmer(Xapian::Stem(getLanguage()));
526  } SWCATCH(...) {}
527  queryParser.set_stemming_strategy(queryParser.STEM_SOME);
528  queryParser.add_prefix("content", "C");
529  queryParser.add_prefix("lemma", "L");
530  queryParser.add_prefix("morph", "M");
531  queryParser.add_prefix("prox", "P");
532  queryParser.add_prefix("proxlem", "PL");
533  queryParser.add_prefix("proxmorph", "PM");
534 
535 #elif defined USELUCENE
536 
537  lucene::index::IndexReader *ir = 0;
538  lucene::search::IndexSearcher *is = 0;
539  Query *q = 0;
540  Hits *h = 0;
541  SWTRY {
542  ir = IndexReader::open(target);
543  is = new IndexSearcher(ir);
544  const TCHAR *stopWords[] = { 0 };
545  standard::StandardAnalyzer analyzer(stopWords);
546 #endif
547 
548  // parse the query
549 #if defined USEXAPIAN
550  Xapian::Query q = queryParser.parse_query(istr);
551  Xapian::Enquire enquire = Xapian::Enquire(database);
552 #elif defined USELUCENE
553  q = QueryParser::parse((wchar_t *)utf8ToWChar(istr).getRawData(), _T("content"), &analyzer);
554 #endif
555  (*percent)(20, percentUserData);
556 
557  // perform the search
558 #if defined USEXAPIAN
559  enquire.set_query(q);
560  Xapian::MSet h = enquire.get_mset(0, 99999);
561 #elif defined USELUCENE
562  h = is->search(q);
563 #endif
564  (*percent)(80, percentUserData);
565 
566  // iterate thru each good module position that meets the search
567  bool checkBounds = getKey()->isBoundSet();
568 #if defined USEXAPIAN
569  Xapian::MSetIterator i;
570  for (i = h.begin(); i != h.end(); ++i) {
571 // cout << "Document ID " << *i << "\t";
572  SW_u64 score = i.get_percent();
573  Xapian::Document doc = i.get_document();
574  *resultKey = doc.get_data().c_str();
575 #elif defined USELUCENE
576  for (unsigned long i = 0; i < (unsigned long)h->length(); i++) {
577  Document &doc = h->doc(i);
578  // set a temporary verse key to this module position
579  *resultKey = wcharToUTF8(doc.get(_T("key"))); //TODO Does a key always accept utf8?
580  SW_u64 score = (SW_u64)((SW_u32)(h->score(i) * 100));
581 #endif
582 
583  // check to see if it sets ok (within our bounds) and if not, skip
584  if (checkBounds) {
585  *getKey() = *resultKey;
586  if (*getKey() != *resultKey) {
587  continue;
588  }
589  }
590  listKey << *resultKey;
591  listKey.getElement()->userData = score;
592  }
593  (*percent)(98, percentUserData);
594  }
595  SWCATCH (...) {
596 #if defined USEXAPIAN
597 #elif defined USELUCENE
598  q = 0;
599 #endif
600  // invalid clucene query
601  }
602 #if defined USEXAPIAN
603 #elif defined USELUCENE
604  delete h;
605  delete q;
606 
607  delete is;
608  if (ir) {
609  ir->close();
610  }
611 #endif
612  }
613 #endif
614 
615  // some pre-loop processing
616  switch (searchType) {
617 
618  case SEARCHTYPE_PHRASE:
619  // let's see if we're told to ignore case. If so, then we'll touppstr our term
620  if ((flags & REG_ICASE) == REG_ICASE) term.toUpper();
621  break;
622 
623  case SEARCHTYPE_MULTIWORD:
624  case -5:
625  // let's break the term down into our words vector
626  while (1) {
627  const char *word = term.stripPrefix(' ');
628  if (!word) {
629  words.push_back(term);
630  break;
631  }
632  words.push_back(word);
633  }
634  if ((flags & REG_ICASE) == REG_ICASE) {
635  for (unsigned int i = 0; i < words.size(); i++) {
636  words[i].toUpper();
637  }
638  }
639  break;
640 
641  // entry attributes
642  case SEARCHTYPE_ENTRYATTR:
643  // let's break the attribute segs down. We'll reuse our words vector for each segment
644  while (1) {
645  const char *word = term.stripPrefix('/');
646  if (!word) {
647  words.push_back(term);
648  break;
649  }
650  words.push_back(word);
651  }
652  if ((words.size()>2) && words[2].endsWith(".")) {
653  includeComponents = true;
654  words[2]--;
655  }
656  break;
657  }
658 
659 
660  // our main loop to iterate the module and find the stuff
661  perc = 5;
662  (*percent)(perc, percentUserData);
663 
664 
665  while ((searchType != SEARCHTYPE_EXTERNAL) && !popError() && !terminateSearch) {
666  long mindex = key->getIndex();
667  float per = (float)mindex / highIndex;
668  per *= 93;
669  per += 5;
670  char newperc = (char)per;
671  if (newperc > perc) {
672  perc = newperc;
673  (*percent)(perc, percentUserData);
674  }
675  else if (newperc < perc) {
677  "Serious error: new percentage complete is less than previous value\nindex: %d\nhighIndex: %d\nnewperc == %d%% is smaller than\nperc == %d%%",
678  key->getIndex(), highIndex, (int)newperc, (int )perc);
679  }
680 
681  // regex
682  if (searchType >= 0) {
683  SWBuf textBuf = stripText();
684 #ifdef USECXX11REGEX
685  if (std::regex_match(std::string(textBuf.c_str()), preg)) {
686 #elif defined(USEICUREGEX)
687  icu::UnicodeString stringToTest = textBuf.c_str();
688  matcher->reset(stringToTest);
689 
690  if (matcher->find()) {
691 #else
692  if (!regexec(&preg, textBuf, 0, 0, 0)) {
693 #endif
694  *resultKey = *getKey();
695  resultKey->clearBounds();
696  listKey << *resultKey;
697  lastBuf = "";
698  }
699 #ifdef USECXX11REGEX
700  else if (std::regex_match(std::string((lastBuf + ' ' + textBuf).c_str()), preg)) {
701 #elif defined(USEICUREGEX)
702  else {
703  stringToTest = (lastBuf + ' ' + textBuf).c_str();
704  matcher->reset(stringToTest);
705 
706  if (matcher->find()) {
707 #else
708  else if (!regexec(&preg, lastBuf + ' ' + textBuf, 0, 0, 0)) {
709 #endif
710  lastKey->clearBounds();
711  if (vkCheck) {
712  resultKey->clearBounds();
713  *resultKey = *getKey();
714  vkCheck->setUpperBound(resultKey);
715  vkCheck->setLowerBound(lastKey);
716  }
717  else {
718  *resultKey = *lastKey;
719  resultKey->clearBounds();
720  }
721  listKey << *resultKey;
722  lastBuf = (windowSize > 1) ? textBuf.c_str() : "";
723  }
724  else {
725  lastBuf = (windowSize > 1) ? textBuf.c_str() : "";
726  }
727 #if defined(USEICUREGEX)
728  }
729 #endif
730  }
731 
732  else {
733  SWBuf textBuf;
734  switch (searchType) {
735 
736  case SEARCHTYPE_PHRASE: {
737  textBuf = stripText();
738  if ((flags & REG_ICASE) == REG_ICASE) textBuf.toUpper();
739  sres = strstr(textBuf.c_str(), term.c_str());
740  if (sres) { //it's also in the stripText(), so we have a valid search result item now
741  *resultKey = *getKey();
742  resultKey->clearBounds();
743  listKey << *resultKey;
744  }
745  break;
746  }
747 
748  case SEARCHTYPE_MULTIWORD: { // enclose our allocations
749  int stripped = 0;
750  int multiVerse = 0;
751  unsigned int foundWords = 0;
752  textBuf = getRawEntry();
753  SWBuf testBuf;
754 
755  // Here we loop twice, once for the current verse, to see if we have a simple match within our verse.
756  // This always takes precedence over a windowed search. If we match a window, but also one verse within
757  // our window matches by itself, prefer the single verse as the hit address-- the larger window is not needed.
758  //
759  // The second loop includes our current verse within the context of the sliding window
760  // Currrently that window size is set to 2 verses, but future plans include allowing this to be configurable
761  //
762  do {
763  // Herein lies optimization.
764  //
765  // First we check getRawEntry because it's the fastest;
766  // it might return false positives because all the markup is include, but is the quickest
767  // way to eliminate a verse. If it passes, then we do the real work to strip the markup and
768  // really test the verse for our keywords.
769  //
770  stripped = 0;
771  do {
772  if (stripped||specialStrips||multiVerse) {
773  testBuf = multiVerse ? lastBuf + ' ' + textBuf : textBuf;
774  if (stripped) testBuf = stripText(testBuf);
775  }
776  else testBuf.setSize(0);
777  foundWords = 0;
778 
779  if ((flags & REG_ICASE) == REG_ICASE) testBuf.size() ? testBuf.toUpper() : textBuf.toUpper();
780  for (unsigned int i = 0; i < words.size(); i++) {
781  sres = strstr(testBuf.size() ? testBuf.c_str() : textBuf.c_str(), words[i].c_str());
782  if (!sres) {
783  break; //for loop
784  }
785  foundWords++;
786  }
787 
788  ++stripped;
789  } while ( (stripped < 2) && (foundWords == words.size()));
790  ++multiVerse;
791  } while ((windowSize > 1) && (multiVerse < 2) && (stripped != 2 || foundWords != words.size()));
792 
793  if ((stripped == 2) && (foundWords == words.size())) { //we found the right words in both raw and stripped text, which means it's a valid result item
794  lastKey->clearBounds();
795  resultKey->clearBounds();
796  *resultKey = (multiVerse > 1 && !vkCheck) ? *lastKey : *getKey();
797  if (multiVerse > 1 && vkCheck) {
798  vkCheck->setUpperBound(resultKey);
799  vkCheck->setLowerBound(lastKey);
800  }
801  else {
802  resultKey->clearBounds();
803  }
804  listKey << *resultKey;
805  lastBuf = "";
806  // if we're searching windowSize > 1 and we had a hit which required the current verse
807  // let's start the next window with our current verse in case we have another hit adjacent
808  if (multiVerse == 2) {
809  lastBuf = textBuf;
810  }
811  }
812  else {
813  lastBuf = (windowSize > 1) ? textBuf.c_str() : "";
814  }
815  }
816  break;
817 
818  case SEARCHTYPE_ENTRYATTR: {
819  renderText(); // force parse
820  AttributeTypeList &entryAttribs = getEntryAttributes();
821  AttributeTypeList::iterator i1Start, i1End;
822  AttributeList::iterator i2Start, i2End;
823  AttributeValue::iterator i3Start, i3End;
824 
825  if ((words.size()) && (words[0].length())) {
826 // cout << "Word: " << words[0] << endl;
827  for (i1Start = entryAttribs.begin(); i1Start != entryAttribs.end(); ++i1Start) {
828 // cout << "stuff: " << i1Start->first.c_str() << endl;
829  }
830  i1Start = entryAttribs.find(words[0]);
831  i1End = i1Start;
832  if (i1End != entryAttribs.end()) {
833  i1End++;
834  }
835  }
836  else {
837  i1Start = entryAttribs.begin();
838  i1End = entryAttribs.end();
839  }
840  for (;i1Start != i1End; i1Start++) {
841  if ((words.size()>1) && (words[1].length())) {
842  i2Start = i1Start->second.find(words[1]);
843  i2End = i2Start;
844  if (i2End != i1Start->second.end())
845  i2End++;
846  }
847  else {
848  i2Start = i1Start->second.begin();
849  i2End = i1Start->second.end();
850  }
851  for (;i2Start != i2End; i2Start++) {
852  if ((words.size()>2) && (words[2].length()) && (!includeComponents)) {
853  i3Start = i2Start->second.find(words[2]);
854  i3End = i3Start;
855  if (i3End != i2Start->second.end())
856  i3End++;
857  }
858  else {
859  i3Start = i2Start->second.begin();
860  i3End = i2Start->second.end();
861  }
862  for (;i3Start != i3End; i3Start++) {
863  if ((words.size()>3) && (words[3].length())) {
864  if (includeComponents) {
865  SWBuf key = i3Start->first.c_str();
866  key = key.stripPrefix('.', true);
867  // we're iterating all 3 level keys, so be sure we match our
868  // prefix (e.g., Lemma, Lemma.1, Lemma.2, etc.)
869  if (key != words[2]) continue;
870  }
871  if (flags & SEARCHFLAG_MATCHWHOLEENTRY) {
872  bool found = !(((flags & REG_ICASE) == REG_ICASE) ? sword::stricmp(i3Start->second.c_str(), words[3]) : strcmp(i3Start->second.c_str(), words[3]));
873  sres = (found) ? i3Start->second.c_str() : 0;
874  }
875  else {
876  sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(i3Start->second.c_str(), words[3]) : strstr(i3Start->second.c_str(), words[3]);
877  }
878  if (sres) {
879  *resultKey = *getKey();
880  resultKey->clearBounds();
881  listKey << *resultKey;
882  break;
883  }
884  }
885  }
886  if (i3Start != i3End)
887  break;
888  }
889  if (i2Start != i2End)
890  break;
891  }
892  break;
893  }
894  // NOT DONE
895  case -5:
896  AttributeList &words = getEntryAttributes()["Word"];
897  SWBuf kjvWord = "";
898  SWBuf bibWord = "";
899  for (AttributeList::iterator it = words.begin(); it != words.end(); it++) {
900  int parts = atoi(it->second["PartCount"]);
901  SWBuf lemma = "";
902  SWBuf morph = "";
903  for (int i = 1; i <= parts; i++) {
904  SWBuf key = "";
905  key = (parts == 1) ? "Lemma" : SWBuf().setFormatted("Lemma.%d", i).c_str();
906  AttributeValue::iterator li = it->second.find(key);
907  if (li != it->second.end()) {
908  if (i > 1) lemma += " ";
909  key = (parts == 1) ? "LemmaClass" : SWBuf().setFormatted("LemmaClass.%d", i).c_str();
910  AttributeValue::iterator lci = it->second.find(key);
911  if (lci != it->second.end()) {
912  lemma += lci->second + ":";
913  }
914  lemma += li->second;
915  }
916  key = (parts == 1) ? "Morph" : SWBuf().setFormatted("Morph.%d", i).c_str();
917  li = it->second.find(key);
918  // silly. sometimes morph counts don't equal lemma counts
919  if (i == 1 && parts != 1 && li == it->second.end()) {
920  li = it->second.find("Morph");
921  }
922  if (li != it->second.end()) {
923  if (i > 1) morph += " ";
924  key = (parts == 1) ? "MorphClass" : SWBuf().setFormatted("MorphClass.%d", i).c_str();
925  AttributeValue::iterator lci = it->second.find(key);
926  // silly. sometimes morph counts don't equal lemma counts
927  if (i == 1 && parts != 1 && lci == it->second.end()) {
928  lci = it->second.find("MorphClass");
929  }
930  if (lci != it->second.end()) {
931  morph += lci->second + ":";
932  }
933  morph += li->second;
934  }
935  // TODO: add src tags and maybe other attributes
936  }
937  while (window.size() < (unsigned)flags) {
938 
939  }
940  }
941  break;
942  } // end switch
943  }
944  *lastKey = *getKey();
945  (*this)++;
946  }
947 
948 
949  // cleaup work
950  if (searchType >= 0) {
951 #ifdef USECXX11REGEX
952  std::locale::global(oldLocale);
953 #elif defined(USEICUREGEX)
954  delete matcher;
955 #else
956  regfree(&preg);
957 #endif
958  }
959 
960  setKey(*saveKey);
961 
962  if (!saveKey->isPersist())
963  delete saveKey;
964 
965  if (searchKey)
966  delete searchKey;
967  delete resultKey;
968  delete lastKey;
969 
970  listKey = TOP;
971  setProcessEntryAttributes(savePEA);
972 
973 
974  (*percent)(100, percentUserData);
975 
976 
977  return listKey;
978 }
979 
980 
981 /******************************************************************************
982  * SWModule::stripText() - calls all stripfilters on current text
983  *
984  * ENT: buf - buf to massage instead of this modules current text
985  * len - max len of buf
986  *
987  * RET: this module's text at current key location massaged by Strip filters
988  */
989 
990 const char *SWModule::stripText(const char *buf, int len) {
991  static SWBuf local;
992  local = renderText(buf, len, false);
993  return local.c_str();
994 }
995 
996 
1002 const char *SWModule::getRenderHeader() const {
1003  FilterList::const_iterator first = getRenderFilters().begin();
1004  if (first != getRenderFilters().end()) {
1005  return (*first)->getHeader();
1006  }
1007  return "";
1008 }
1009 
1010 
1011 /******************************************************************************
1012  * SWModule::renderText - calls all renderfilters on current module
1013  * position
1014  *
1015  * RET: this module's text at current key location massaged by renderText filters
1016  */
1018  return renderText((const char *)0);
1019 }
1020 
1021 /******************************************************************************
1022  * SWModule::renderText - calls all renderfilters on provided text
1023  * or current module position provided text null
1024  *
1025  * ENT: buf - buffer to render
1026  *
1027  * RET: this module's text at current key location massaged by renderText filters
1028  *
1029  * NOTES: This method is only truly const if called with a provided text; using
1030  * module's current position may produce a new entry attributes map which
1031  * logically violates the const semantic, which is why the above method
1032  * which takes no params is not const, i.e., don't call this method with
1033  * null as text param, but instead use non-const method above. The public
1034  * interface for this method expects a value for the text param. We use it
1035  * internally sometimes calling with null to save duplication of code.
1036  */
1037 
1038 SWBuf SWModule::renderText(const char *buf, int len, bool render) const {
1039  bool savePEA = isProcessEntryAttributes();
1040  if (!buf) {
1041  entryAttributes.clear();
1042  }
1043  else {
1044  setProcessEntryAttributes(false);
1045  }
1046 
1047  SWBuf local;
1048  if (buf)
1049  local = buf;
1050 
1051  SWBuf &tmpbuf = (buf) ? local : getRawEntryBuf();
1052  SWKey *key = 0;
1053  static const char *null = "";
1054 
1055  if (tmpbuf) {
1056  unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) : len;
1057  if (size > 0) {
1058  key = this->getKey();
1059 
1060  optionFilter(tmpbuf, key);
1061 
1062  if (render) {
1063  renderFilter(tmpbuf, key);
1064  encodingFilter(tmpbuf, key);
1065  }
1066  else stripFilter(tmpbuf, key);
1067  }
1068  }
1069  else {
1070  tmpbuf = null;
1071  }
1072 
1073  setProcessEntryAttributes(savePEA);
1074 
1075  return tmpbuf;
1076 }
1077 
1078 
1079 /******************************************************************************
1080  * SWModule::renderText - calls all renderfilters on current text
1081  *
1082  * ENT: tmpKey - key to use to grab text
1083  *
1084  * RET: this module's text at current key location massaged by RenderFilers
1085  */
1086 
1088  SWKey *saveKey;
1089  const char *retVal;
1090 
1091  if (!key->isPersist()) {
1092  saveKey = createKey();
1093  *saveKey = *key;
1094  }
1095  else saveKey = key;
1096 
1097  setKey(*tmpKey);
1098 
1099  retVal = renderText();
1100 
1101  setKey(*saveKey);
1102 
1103  if (!saveKey->isPersist())
1104  delete saveKey;
1105 
1106  return retVal;
1107 }
1108 
1109 
1110 /******************************************************************************
1111  * SWModule::stripText - calls all StripTextFilters on current text
1112  *
1113  * ENT: tmpKey - key to use to grab text
1114  *
1115  * RET: this module's text at specified key location massaged by Strip filters
1116  */
1117 
1118 const char *SWModule::stripText(const SWKey *tmpKey) {
1119  SWKey *saveKey;
1120  const char *retVal;
1121 
1122  if (!key->isPersist()) {
1123  saveKey = createKey();
1124  *saveKey = *key;
1125  }
1126  else saveKey = key;
1127 
1128  setKey(*tmpKey);
1129 
1130  retVal = stripText();
1131 
1132  setKey(*saveKey);
1133 
1134  if (!saveKey->isPersist())
1135  delete saveKey;
1136 
1137  return retVal;
1138 }
1139 
1140 /******************************************************************************
1141  * SWModule::getBibliography -Returns bibliographic data for a module in the
1142  * requested format
1143  *
1144  * ENT: bibFormat format of the bibliographic data
1145  *
1146  * RET: bibliographic data in the requested format as a string (BibTeX by default)
1147  */
1148 
1149 SWBuf SWModule::getBibliography(unsigned char bibFormat) const {
1150  SWBuf s;
1151  switch (bibFormat) {
1152  case BIB_BIBTEX:
1153  s.append("@Book {").append(modname).append(", Title = \"").append(moddesc).append("\", Publisher = \"CrossWire Bible Society\"}");
1154  break;
1155  }
1156  return s;
1157 }
1158 
1159 const char *SWModule::getConfigEntry(const char *key) const {
1160  ConfigEntMap::iterator it = config->find(key);
1161  return (it != config->end()) ? it->second.c_str() : 0;
1162 }
1163 
1164 
1166  this->config = config;
1167 }
1168 
1169 
1171 #ifdef USELUCENE
1172  return true;
1173 #else
1175 #endif
1176 }
1177 
1179 #ifdef USELUCENE
1180  SWBuf target = getConfigEntry("AbsoluteDataPath");
1181  if (!target.endsWith("/") && !target.endsWith("\\")) {
1182  target.append('/');
1183  }
1184  target.append("lucene");
1185 
1186  FileMgr::removeDir(target.c_str());
1187 #else
1189 #endif
1190 }
1191 
1192 
1193 signed char SWModule::createSearchFramework(void (*percent)(char, void *), void *percentUserData) {
1194 
1195 #if defined USELUCENE || defined USEXAPIAN
1196  SWBuf target = getConfigEntry("AbsoluteDataPath");
1197  if (!target.endsWith("/") && !target.endsWith("\\")) {
1198  target.append('/');
1199  }
1200 #if defined USEXAPIAN
1201  target.append("xapian");
1202 #elif defined USELUCENE
1203  const int MAX_CONV_SIZE = 1024 * 1024;
1204  target.append("lucene");
1205 #endif
1206  int status = FileMgr::createParent(target+"/dummy");
1207  if (status) return -1;
1208 
1209  SWKey *saveKey = 0;
1210  SWKey *searchKey = 0;
1211  SWKey textkey;
1212  SWBuf c;
1213 
1214 
1215  // turn all filters to default values
1216  StringList filterSettings;
1217  for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
1218  filterSettings.push_back((*filter)->getOptionValue());
1219  (*filter)->setOptionValue(*((*filter)->getOptionValues().begin()));
1220 
1221  if ( (!strcmp("Greek Accents", (*filter)->getOptionName())) ||
1222  (!strcmp("Hebrew Vowel Points", (*filter)->getOptionName())) ||
1223  (!strcmp("Arabic Vowel Points", (*filter)->getOptionName()))
1224  ) {
1225  (*filter)->setOptionValue("Off");
1226  }
1227  }
1228 
1229 
1230  // be sure we give CLucene enough file handles
1232 
1233  // save key information so as not to disrupt original
1234  // module position
1235  if (!key->isPersist()) {
1236  saveKey = createKey();
1237  *saveKey = *key;
1238  }
1239  else saveKey = key;
1240 
1241  searchKey = (key->isPersist())?key->clone():0;
1242  if (searchKey) {
1243  searchKey->setPersist(1);
1244  setKey(*searchKey);
1245  }
1246 
1247  bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch");
1248 
1249  // lets create or open our search index
1250 #if defined USEXAPIAN
1251  Xapian::WritableDatabase database(target.c_str(), Xapian::DB_CREATE_OR_OPEN);
1252  Xapian::TermGenerator termGenerator;
1253  SWTRY {
1254  termGenerator.set_stemmer(Xapian::Stem(getLanguage()));
1255  } SWCATCH(...) {}
1256 
1257 #elif defined USELUCENE
1258  RAMDirectory *ramDir = 0;
1259  IndexWriter *coreWriter = 0;
1260  IndexWriter *fsWriter = 0;
1261  Directory *d = 0;
1262 
1263  const TCHAR *stopWords[] = { 0 };
1264  standard::StandardAnalyzer *an = new standard::StandardAnalyzer(stopWords);
1265 
1266  ramDir = new RAMDirectory();
1267  coreWriter = new IndexWriter(ramDir, an, true);
1268  coreWriter->setMaxFieldLength(MAX_CONV_SIZE);
1269 #endif
1270 
1271 
1272 
1273 
1274  char perc = 1;
1275  VerseKey *vkcheck = 0;
1276  vkcheck = SWDYNAMIC_CAST(VerseKey, key);
1277  VerseKey *chapMax = 0;
1278  if (vkcheck) chapMax = (VerseKey *)vkcheck->clone();
1279 
1280  TreeKeyIdx *tkcheck = 0;
1281  tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key);
1282 
1283 
1284  *this = BOTTOM;
1285  long highIndex = key->getIndex();
1286  if (!highIndex)
1287  highIndex = 1; // avoid division by zero errors.
1288 
1289  bool savePEA = isProcessEntryAttributes();
1290  setProcessEntryAttributes(true);
1291 
1292  // prox chapter blocks
1293  // position module at the beginning
1294  *this = TOP;
1295 
1296  SWBuf proxBuf;
1297  SWBuf proxLem;
1298  SWBuf proxMorph;
1299  SWBuf strong;
1300  SWBuf morph;
1301 
1302  char err = popError();
1303  while (!err) {
1304  long mindex = key->getIndex();
1305 
1306  proxBuf = "";
1307  proxLem = "";
1308  proxMorph = "";
1309 
1310  // computer percent complete so we can report to our progress callback
1311  float per = (float)mindex / highIndex;
1312  // between 5%-98%
1313  per *= 93; per += 5;
1314  char newperc = (char)per;
1315  if (newperc > perc) {
1316  perc = newperc;
1317  (*percent)(perc, percentUserData);
1318  }
1319 
1320  // get "content" field
1321  const char *content = stripText();
1322 
1323  bool good = false;
1324 
1325  // start out entry
1326 #if defined USEXAPIAN
1327  Xapian::Document doc;
1328  termGenerator.set_document(doc);
1329 #elif defined USELUCENE
1330  Document *doc = new Document();
1331 #endif
1332  // get "key" field
1333  SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText();
1334  if (content && *content) {
1335  good = true;
1336 
1337 
1338  // build "strong" field
1339  AttributeTypeList::iterator words;
1340  AttributeList::iterator word;
1341  AttributeValue::iterator strongVal;
1342  AttributeValue::iterator morphVal;
1343 
1344  strong="";
1345  morph="";
1346  words = getEntryAttributes().find("Word");
1347  if (words != getEntryAttributes().end()) {
1348  for (word = words->second.begin();word != words->second.end(); word++) {
1349  int partCount = atoi(word->second["PartCount"]);
1350  if (!partCount) partCount = 1;
1351  for (int i = 0; i < partCount; i++) {
1352  SWBuf tmp = "Lemma";
1353  if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1354  strongVal = word->second.find(tmp);
1355  if (strongVal != word->second.end()) {
1356  // cheeze. skip empty article tags that weren't assigned to any text
1357  if (strongVal->second == "G3588") {
1358  if (word->second.find("Text") == word->second.end())
1359  continue; // no text? let's skip
1360  }
1361  strong.append(strongVal->second);
1362  morph.append(strongVal->second);
1363  morph.append('@');
1364  SWBuf tmp = "Morph";
1365  if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1366  morphVal = word->second.find(tmp);
1367  if (morphVal != word->second.end()) {
1368  morph.append(morphVal->second);
1369  }
1370  strong.append(' ');
1371  morph.append(' ');
1372  }
1373  }
1374  }
1375  }
1376 
1377 #if defined USEXAPIAN
1378  doc.set_data(keyText.c_str());
1379 #elif defined USELUCENE
1380  doc->add(*_CLNEW Field(_T("key"), (wchar_t *)utf8ToWChar(keyText).getRawData(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
1381 #endif
1382 
1383  if (includeKeyInSearch) {
1384  c = keyText;
1385  c += " ";
1386  c += content;
1387  content = c.c_str();
1388  }
1389 
1390 #if defined USEXAPIAN
1391  termGenerator.index_text(content);
1392  termGenerator.index_text(content, 1, "C");
1393 #elif defined USELUCENE
1394  doc->add(*_CLNEW Field(_T("content"), (wchar_t *)utf8ToWChar(content).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1395 #endif
1396 
1397  if (strong.length() > 0) {
1398 #if defined USEXAPIAN
1399  termGenerator.index_text(strong.c_str(), 1, "L");
1400  termGenerator.index_text(morph.c_str(), 1, "M");
1401 #elif defined USELUCENE
1402  doc->add(*_CLNEW Field(_T("lemma"), (wchar_t *)utf8ToWChar(strong).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1403  doc->add(*_CLNEW Field(_T("morph"), (wchar_t *)utf8ToWChar(morph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1404 #endif
1405 //printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str());
1406  }
1407 
1408 //printf("setting fields (%s).\n", (const char *)*key);
1409 //fflush(stdout);
1410  }
1411  // don't write yet, cuz we have to see if we're the first of a prox block (5:1 or chapter5/verse1
1412 
1413  // for VerseKeys use chapter
1414  if (vkcheck) {
1415  *chapMax = *vkcheck;
1416  // we're the first verse in a chapter
1417  if (vkcheck->getVerse() == 1) {
1418  *chapMax = MAXVERSE;
1419  VerseKey saveKey = *vkcheck;
1420  while ((!err) && (*vkcheck <= *chapMax)) {
1421 //printf("building proxBuf from (%s).\nproxBuf.c_str(): %s\n", (const char *)*key, proxBuf.c_str());
1422 //printf("building proxBuf from (%s).\n", (const char *)*key);
1423 
1424  content = stripText();
1425  if (content && *content) {
1426  // build "strong" field
1427  strong = "";
1428  morph = "";
1429  AttributeTypeList::iterator words;
1430  AttributeList::iterator word;
1431  AttributeValue::iterator strongVal;
1432  AttributeValue::iterator morphVal;
1433 
1434  words = getEntryAttributes().find("Word");
1435  if (words != getEntryAttributes().end()) {
1436  for (word = words->second.begin();word != words->second.end(); word++) {
1437  int partCount = atoi(word->second["PartCount"]);
1438  if (!partCount) partCount = 1;
1439  for (int i = 0; i < partCount; i++) {
1440  SWBuf tmp = "Lemma";
1441  if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1442  strongVal = word->second.find(tmp);
1443  if (strongVal != word->second.end()) {
1444  // cheeze. skip empty article tags that weren't assigned to any text
1445  if (strongVal->second == "G3588") {
1446  if (word->second.find("Text") == word->second.end())
1447  continue; // no text? let's skip
1448  }
1449  strong.append(strongVal->second);
1450  morph.append(strongVal->second);
1451  morph.append('@');
1452  SWBuf tmp = "Morph";
1453  if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1454  morphVal = word->second.find(tmp);
1455  if (morphVal != word->second.end()) {
1456  morph.append(morphVal->second);
1457  }
1458  strong.append(' ');
1459  morph.append(' ');
1460  }
1461  }
1462  }
1463  }
1464  proxBuf += content;
1465  proxBuf.append(' ');
1466  proxLem += strong;
1467  proxMorph += morph;
1468  if (proxLem.length()) {
1469  proxLem.append("\n");
1470  proxMorph.append("\n");
1471  }
1472  }
1473  (*this)++;
1474  err = popError();
1475  }
1476  err = 0;
1477  *vkcheck = saveKey;
1478  }
1479  }
1480 
1481  // for TreeKeys use siblings if we have no children
1482  else if (tkcheck) {
1483  if (!tkcheck->hasChildren()) {
1484  if (!tkcheck->previousSibling()) {
1485  do {
1486 //printf("building proxBuf from (%s).\n", (const char *)*key);
1487 //fflush(stdout);
1488 
1489  content = stripText();
1490  if (content && *content) {
1491  // build "strong" field
1492  strong = "";
1493  morph = "";
1494  AttributeTypeList::iterator words;
1495  AttributeList::iterator word;
1496  AttributeValue::iterator strongVal;
1497  AttributeValue::iterator morphVal;
1498 
1499  words = getEntryAttributes().find("Word");
1500  if (words != getEntryAttributes().end()) {
1501  for (word = words->second.begin();word != words->second.end(); word++) {
1502  int partCount = atoi(word->second["PartCount"]);
1503  if (!partCount) partCount = 1;
1504  for (int i = 0; i < partCount; i++) {
1505  SWBuf tmp = "Lemma";
1506  if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1507  strongVal = word->second.find(tmp);
1508  if (strongVal != word->second.end()) {
1509  // cheeze. skip empty article tags that weren't assigned to any text
1510  if (strongVal->second == "G3588") {
1511  if (word->second.find("Text") == word->second.end())
1512  continue; // no text? let's skip
1513  }
1514  strong.append(strongVal->second);
1515  morph.append(strongVal->second);
1516  morph.append('@');
1517  SWBuf tmp = "Morph";
1518  if (partCount > 1) tmp.appendFormatted(".%d", i+1);
1519  morphVal = word->second.find(tmp);
1520  if (morphVal != word->second.end()) {
1521  morph.append(morphVal->second);
1522  }
1523  strong.append(' ');
1524  morph.append(' ');
1525  }
1526  }
1527  }
1528  }
1529 
1530  proxBuf += content;
1531  proxBuf.append(' ');
1532  proxLem += strong;
1533  proxMorph += morph;
1534  if (proxLem.length()) {
1535  proxLem.append("\n");
1536  proxMorph.append("\n");
1537  }
1538  }
1539  } while (tkcheck->nextSibling());
1540  tkcheck->parent();
1541  tkcheck->firstChild();
1542  }
1543  else tkcheck->nextSibling(); // reposition from our previousSibling test
1544  }
1545  }
1546 
1547  if (proxBuf.length() > 0) {
1548 
1549 #if defined USEXAPIAN
1550  termGenerator.index_text(proxBuf.c_str(), 1, "P");
1551 #elif defined USELUCENE
1552  doc->add(*_CLNEW Field(_T("prox"), (wchar_t *)utf8ToWChar(proxBuf).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
1553 #endif
1554  good = true;
1555  }
1556  if (proxLem.length() > 0) {
1557 #if defined USEXAPIAN
1558  termGenerator.index_text(proxLem.c_str(), 1, "PL");
1559  termGenerator.index_text(proxMorph.c_str(), 1, "PM");
1560 #elif defined USELUCENE
1561  doc->add(*_CLNEW Field(_T("proxlem"), (wchar_t *)utf8ToWChar(proxLem).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
1562  doc->add(*_CLNEW Field(_T("proxmorph"), (wchar_t *)utf8ToWChar(proxMorph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
1563 #endif
1564  good = true;
1565  }
1566  if (good) {
1567 //printf("writing (%s).\n", (const char *)*key);
1568 //fflush(stdout);
1569 #if defined USEXAPIAN
1570  SWBuf idTerm;
1571  idTerm.setFormatted("Q%ld", key->getIndex());
1572  doc.add_boolean_term(idTerm.c_str());
1573  database.replace_document(idTerm.c_str(), doc);
1574 #elif defined USELUCENE
1575  coreWriter->addDocument(doc);
1576 #endif
1577  }
1578 #if defined USEXAPIAN
1579 #elif defined USELUCENE
1580  delete doc;
1581 #endif
1582 
1583  (*this)++;
1584  err = popError();
1585  }
1586 
1587  // Optimizing automatically happens with the call to addIndexes
1588  //coreWriter->optimize();
1589 #if defined USEXAPIAN
1590 #elif defined USELUCENE
1591  coreWriter->close();
1592 
1593 #ifdef CLUCENE2
1594  d = FSDirectory::getDirectory(target.c_str());
1595 #endif
1596  if (IndexReader::indexExists(target.c_str())) {
1597 #ifndef CLUCENE2
1598  d = FSDirectory::getDirectory(target.c_str(), false);
1599 #endif
1600  if (IndexReader::isLocked(d)) {
1601  IndexReader::unlock(d);
1602  }
1603  fsWriter = new IndexWriter( d, an, false);
1604  }
1605  else {
1606 #ifndef CLUCENE2
1607  d = FSDirectory::getDirectory(target.c_str(), true);
1608 #endif
1609  fsWriter = new IndexWriter(d, an, true);
1610  }
1611 
1612  Directory *dirs[] = { ramDir, 0 };
1613 #ifdef CLUCENE2
1614  lucene::util::ConstValueArray< lucene::store::Directory *>dirsa(dirs, 1);
1615  fsWriter->addIndexes(dirsa);
1616 #else
1617  fsWriter->addIndexes(dirs);
1618 #endif
1619  fsWriter->close();
1620 
1621  delete ramDir;
1622  delete coreWriter;
1623  delete fsWriter;
1624  delete an;
1625 #endif
1626 
1627  // reposition module back to where it was before we were called
1628  setKey(*saveKey);
1629 
1630  if (!saveKey->isPersist())
1631  delete saveKey;
1632 
1633  if (searchKey)
1634  delete searchKey;
1635 
1636  delete chapMax;
1637 
1638  setProcessEntryAttributes(savePEA);
1639 
1640  // reset option filters back to original values
1641  StringList::iterator origVal = filterSettings.begin();
1642  for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
1643  (*filter)->setOptionValue(*origVal++);
1644  }
1645 
1646  return 0;
1647 #else
1648  return SWSearchable::createSearchFramework(percent, percentUserData);
1649 #endif
1650 }
1651 
1657 void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, const SWKey *key) const {
1658  OptionFilterList::iterator it;
1659  for (it = filters->begin(); it != filters->end(); it++) {
1660  (*it)->processText(buf, key, this);
1661  }
1662 }
1663 
1669 void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, const SWKey *key) const {
1670  FilterList::iterator it;
1671  for (it = filters->begin(); it != filters->end(); it++) {
1672  (*it)->processText(buf, key, this);
1673  }
1674 }
1675 
1676 signed char SWModule::createModule(const char*) {
1677  return -1;
1678 }
1679 
1680 void SWModule::setEntry(const char*, long) {
1681 }
1682 
1684 }
1685 
1686 
1687 /******************************************************************************
1688  * SWModule::prepText - Prepares the text before returning it to external
1689  * objects
1690  *
1691  * ENT: buf - buffer where text is stored and where to store the prep'd
1692  * text.
1693  */
1694 
1696  unsigned int to, from;
1697  char space = 0, cr = 0, realdata = 0, nlcnt = 0;
1698  char *rawBuf = buf.getRawData();
1699  for (to = from = 0; rawBuf[from]; from++) {
1700  switch (rawBuf[from]) {
1701  case 10:
1702  if (!realdata)
1703  continue;
1704  space = (cr) ? 0 : 1;
1705  cr = 0;
1706  nlcnt++;
1707  if (nlcnt > 1) {
1708 // *to++ = nl;
1709  rawBuf[to++] = 10;
1710 // *to++ = nl[1];
1711 // nlcnt = 0;
1712  }
1713  continue;
1714  case 13:
1715  if (!realdata)
1716  continue;
1717 // *to++ = nl[0];
1718  rawBuf[to++] = 10;
1719  space = 0;
1720  cr = 1;
1721  continue;
1722  }
1723  realdata = 1;
1724  nlcnt = 0;
1725  if (space) {
1726  space = 0;
1727  if (rawBuf[from] != ' ') {
1728  rawBuf[to++] = ' ';
1729  from--;
1730  continue;
1731  }
1732  }
1733  rawBuf[to++] = rawBuf[from];
1734  }
1735  buf.setSize(to);
1736 
1737  while (to > 1) { // remove trailing excess
1738  to--;
1739  if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
1740  buf.setSize(to);
1741  else break;
1742  }
1743 }
1744 
const char * getName() const
Definition: swmodule.cpp:204
#define SWTextEncoding
Definition: swmodule.h:78
#define TOP
Definition: swkey.h:68
#define SWORD_NAMESPACE_START
Definition: defs.h:39
SWBuf & appendFormatted(const char *format,...)
Definition: swbuf.cpp:81
virtual SWKey * clone() const
Definition: versekey.cpp:278
static int removeDir(const char *targetDir)
Definition: filemgr.cpp:639
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
virtual bool hasSearchFramework()
Definition: swmodule.cpp:1170
static signed char createModule(const char *path)
Definition: swmodule.cpp:1676
static void prepText(SWBuf &buf)
Definition: swmodule.cpp:1695
#define MAXVERSE
Definition: versekey.h:43
const char * getType() const
Definition: swmodule.cpp:232
static signed char existsDir(const char *ipath, const char *idirName=0)
Definition: filemgr.cpp:357
virtual signed char createSearchFramework(void(*percent)(char, void *)=&nullPercent, void *percentUserData=0)
Definition: swmodule.cpp:1193
virtual SWBuf getBibliography(unsigned char bibFormat=BIB_BIBTEX) const
Definition: swmodule.cpp:1149
virtual void deleteSearchFramework()
virtual const char * getConfigEntry(const char *key) const
Definition: swmodule.cpp:1159
static SWLog * getSystemLog()
Definition: swlog.cpp:53
virtual void setPosition(SW_POSITION pos)
Definition: swmodule.cpp:327
std::map< SWBuf, AttributeList, std::less< SWBuf > > AttributeTypeList
Definition: swmodule.h:75
SWBuf wcharToUTF8(const wchar_t *buf)
Definition: utilstr.cpp:263
static StdOutDisplay rawdisp
Definition: swmodule.h:139
virtual void setEntry(const char *inbuf, long len=-1)
Definition: swmodule.cpp:1680
#define BOTTOM
Definition: swkey.h:69
std::list< SWOptionFilter * > OptionFilterList
Definition: swmgr.h:81
virtual SWKey * getKey() const
Definition: swmodule.cpp:284
size_t length
Definition: regex.c:7928
virtual void clear()
Definition: listkey.cpp:87
virtual char setKey(const SWKey *ikey)
Definition: swmodule.cpp:298
virtual char getDirection() const
Definition: swmodule.cpp:244
SWBuf utf8ToWChar(const char *buf)
Definition: utilstr.cpp:239
#define SWTRY
Definition: defs.h:57
#define REG_ICASE
Definition: regex.h:261
bool endsWith(const SWBuf &postfix) const
Definition: swbuf.h:501
int stricmp(const char *s1, const char *s2)
Definition: utilstr.cpp:194
virtual signed char createSearchFramework(void(*percent)(char, void *)=&nullPercent, void *percentUserData=0)
virtual bool previousSibling()
Definition: treekeyidx.cpp:178
virtual SWKey * clone() const
Definition: swkey.cpp:75
void setLowerBound(const VerseKey &lb)
Definition: versekey.cpp:1112
virtual void clearBounds() const
Definition: swkey.h:190
virtual bool nextSibling()
Definition: treekeyidx.cpp:168
void setPersist(bool ipersist)
Definition: swkey.cpp:135
SWORD_NAMESPACE_START char * stdstr(char **ipstr, const char *istr, unsigned int memPadFactor=1)
Definition: utilstr.h:44
bool isPersist() const
Definition: swkey.cpp:99
virtual void linkEntry(const SWKey *sourceKey)
Definition: swmodule.cpp:1683
virtual const char * getRenderHeader() const
Definition: swmodule.cpp:1002
static const signed int SEARCHTYPE_MULTIWORD
Definition: swmodule.h:173
virtual void deleteSearchFramework()
Definition: swmodule.cpp:1178
#define SWCATCH(x)
Definition: defs.h:58
char * getRawData()
Definition: swbuf.h:379
const char * stristr(const char *s1, const char *s2)
Definition: utilstr.cpp:145
const char * c_str() const
Definition: swbuf.h:158
std::list< SWBuf > StringList
Definition: swmodule.cpp:91
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
#define POS_TOP
Definition: swkey.h:65
SWModule(const char *imodname=0, const char *imoddesc=0, SWDisplay *idisp=0, const char *imodtype=0, SWTextEncoding encoding=ENC_UNKNOWN, SWTextDirection dir=DIRECTION_LTR, SWTextMarkup markup=FMT_UNKNOWN, const char *modlang=0)
Definition: swmodule.cpp:104
virtual bool hasSearchFramework()
Definition: swsearchable.h:89
virtual void flush()
Definition: filemgr.cpp:657
std::list< SWFilter * > FilterList
Definition: swmgr.h:80
static const signed int SEARCHTYPE_REGEX
Definition: swmodule.h:171
static const signed int SEARCHTYPE_EXTERNAL
Definition: swmodule.h:175
virtual char display()
Definition: swmodule.cpp:272
virtual bool parent()
Definition: treekeyidx.cpp:148
virtual int getVerse() const
Definition: versekey.cpp:1534
virtual void decrement(int steps=1)
Definition: swmodule.cpp:369
virtual const char * getOSISRef() const
Definition: versekey.cpp:1810
virtual void increment(int steps=1)
Definition: swmodule.cpp:355
virtual char popError()
Definition: swmodule.cpp:185
virtual void filterBuffer(OptionFilterList *filters, SWBuf &buf, const SWKey *key) const
Definition: swmodule.cpp:1657
#define REG_EXTENDED
Definition: regex.h:257
#define SWDYNAMIC_CAST(className, object)
Definition: defs.h:47
void setUpperBound(const VerseKey &ub)
Definition: versekey.cpp:1135
virtual bool hasChildren()
Definition: treekeyidx.cpp:198
static int createParent(const char *pName)
Definition: filemgr.cpp:426
unsigned long size() const
Definition: swbuf.h:185
const char * stripPrefix(char separator, bool endOfStringAsSeparator=false)
Definition: swbuf.h:457
virtual ListKey & search(const char *istr, int searchType=0, int flags=0, SWKey *scope=0, bool *justCheckIfSupported=0, void(*percent)(char, void *)=&nullPercent, void *percentUserData=0)
Definition: swmodule.cpp:397
#define POS_BOTTOM
Definition: swkey.h:66
const char * getDescription() const
Definition: swmodule.cpp:218
unsigned long long SW_u64
Definition: sysdata.h:56
int size
Definition: regex.c:5043
SWBuf & toUpper()
Definition: swbuf.cpp:132
virtual void setConfig(ConfigEntMap *config)
Definition: swmodule.cpp:1165
SWORD_NAMESPACE_START typedef multimapwithdefault< SWBuf, SWBuf, std::less< SWBuf > > ConfigEntMap
Definition: swconfig.h:35
virtual const char * stripText(const char *buf=0, int len=-1)
Definition: swmodule.cpp:990
int regex_t * preg
Definition: regex.c:8079
virtual void setDisplay(SWDisplay *idisp)
Definition: swmodule.cpp:262
void logError(const char *fmt,...) const
Definition: swlog.cpp:87
#define REG_NOSUB
Definition: regex.h:270
unsigned int SW_u32
Definition: sysdata.h:41
static const signed int SEARCHFLAG_MATCHWHOLEENTRY
Definition: swmodule.h:165
virtual SWDisplay * getDisplay() const
Definition: swmodule.cpp:258
SW_u64 userData
Definition: swkey.h:115
#define SWTextDirection
Definition: swmodule.h:77
#define SWORD_NAMESPACE_END
Definition: defs.h:40
virtual char getError() const
Definition: swkey.h:164
virtual SWKey * createKey() const
Definition: swmodule.cpp:173
const char * string
Definition: regex.c:5014
SWBuf & setFormatted(const char *format,...)
Definition: swbuf.cpp:50
Definition: swkey.h:77
virtual ~SWModule()
Definition: swmodule.cpp:136
#define SWTextMarkup
Definition: swmodule.h:79
std::map< SWBuf, AttributeValue, std::less< SWBuf > > AttributeList
Definition: swmodule.h:74
virtual bool firstChild()
Definition: treekeyidx.cpp:158
void setSize(unsigned long len)
Definition: swbuf.h:255
SWBuf renderText()
Definition: swmodule.cpp:1017
static const signed int SEARCHTYPE_PHRASE
Definition: swmodule.h:172
static FileMgr * getSystemFileMgr()
Definition: filemgr.cpp:101
static const signed int SEARCHTYPE_ENTRYATTR
Definition: swmodule.h:174
static const signed int SEARCHFLAG_STRICTBOUNDARIES
Definition: swmodule.h:169