00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <vector>
00025
00026 #include <swlog.h>
00027 #include <sysdata.h>
00028 #include <swmodule.h>
00029 #include <utilstr.h>
00030 #include <regex.h>
00031 #include <swfilter.h>
00032 #include <versekey.h>
00033 #include <treekeyidx.h>
00034 #include <swoptfilter.h>
00035 #include <filemgr.h>
00036 #include <stringmgr.h>
00037 #ifndef _MSC_VER
00038 #include <iostream>
00039 #endif
00040
00041 #ifdef USELUCENE
00042 #include <CLucene.h>
00043
00044
00045
00046
00047
00048
00049
00050 using namespace lucene::index;
00051 using namespace lucene::analysis;
00052 using namespace lucene::util;
00053 using namespace lucene::store;
00054 using namespace lucene::document;
00055 using namespace lucene::queryParser;
00056 using namespace lucene::search;
00057 #endif
00058
00059 using std::vector;
00060
00061 SWORD_NAMESPACE_START
00062
00063 SWModule::StdOutDisplay SWModule::rawdisp;
00064
00065 typedef std::list<SWBuf> StringList;
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078 SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, const char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char *imodlang) {
00079 key = createKey();
00080 entryBuf = "";
00081 config = &ownConfig;
00082 modname = 0;
00083 error = 0;
00084 moddesc = 0;
00085 modtype = 0;
00086 modlang = 0;
00087 this->encoding = encoding;
00088 this->direction = direction;
00089 this->markup = markup;
00090 entrySize= -1;
00091 disp = (idisp) ? idisp : &rawdisp;
00092 stdstr(&modname, imodname);
00093 stdstr(&moddesc, imoddesc);
00094 stdstr(&modtype, imodtype);
00095 stdstr(&modlang, imodlang);
00096 stripFilters = new FilterList();
00097 rawFilters = new FilterList();
00098 renderFilters = new FilterList();
00099 optionFilters = new OptionFilterList();
00100 encodingFilters = new FilterList();
00101 skipConsecutiveLinks = true;
00102 procEntAttr = true;
00103 }
00104
00105
00106
00107
00108
00109
00110 SWModule::~SWModule()
00111 {
00112 if (modname)
00113 delete [] modname;
00114 if (moddesc)
00115 delete [] moddesc;
00116 if (modtype)
00117 delete [] modtype;
00118 if (modlang)
00119 delete [] modlang;
00120
00121 if (key) {
00122 if (!key->isPersist())
00123 delete key;
00124 }
00125
00126 stripFilters->clear();
00127 rawFilters->clear();
00128 renderFilters->clear();
00129 optionFilters->clear();
00130 encodingFilters->clear();
00131 entryAttributes.clear();
00132
00133 delete stripFilters;
00134 delete rawFilters;
00135 delete renderFilters;
00136 delete optionFilters;
00137 delete encodingFilters;
00138 }
00139
00140
00141
00142
00143
00144
00145
00146
00147 SWKey *SWModule::createKey() const
00148 {
00149 return new SWKey();
00150 }
00151
00152
00153
00154
00155
00156
00157
00158
00159 char SWModule::popError()
00160 {
00161 char retval = error;
00162
00163 error = 0;
00164 return retval;
00165 }
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177 const char *SWModule::getName() const {
00178 return modname;
00179 }
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191 const char *SWModule::getDescription() const {
00192 return moddesc;
00193 }
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205 const char *SWModule::getType() const {
00206 return modtype;
00207 }
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217 char SWModule::getDirection() const {
00218 return direction;
00219 }
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231 SWDisplay *SWModule::getDisplay() const {
00232 return disp;
00233 }
00234
00235 void SWModule::setDisplay(SWDisplay *idisp) {
00236 disp = idisp;
00237 }
00238
00239
00240
00241
00242
00243
00244
00245 char SWModule::display() {
00246 disp->display(*this);
00247 return 0;
00248 }
00249
00250
00251
00252
00253
00254
00255
00256
00257 SWKey *SWModule::getKey() const {
00258 return key;
00259 }
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271 char SWModule::setKey(const SWKey *ikey) {
00272 SWKey *oldKey = 0;
00273
00274 if (key) {
00275 if (!key->isPersist())
00276 oldKey = key;
00277 }
00278
00279 if (!ikey->isPersist()) {
00280 key = createKey();
00281 *key = *ikey;
00282 }
00283 else key = (SWKey *)ikey;
00284
00285 if (oldKey)
00286 delete oldKey;
00287
00288 return 0;
00289 }
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300 void SWModule::setPosition(SW_POSITION p) {
00301 *key = p;
00302 char saveError = key->popError();
00303
00304 switch (p) {
00305 case POS_TOP:
00306 (*this)++;
00307 (*this)--;
00308 break;
00309
00310 case POS_BOTTOM:
00311 (*this)--;
00312 (*this)++;
00313 break;
00314 }
00315
00316 error = saveError;
00317 }
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328 void SWModule::increment(int steps) {
00329 (*key) += steps;
00330 error = key->popError();
00331 }
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342 void SWModule::decrement(int steps) {
00343 (*key) -= steps;
00344 error = key->popError();
00345 }
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366 ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
00367
00368 listKey.ClearList();
00369 SWBuf term = istr;
00370 bool includeComponents = false;
00371
00372 #ifdef USELUCENE
00373 SWBuf target = getConfigEntry("AbsoluteDataPath");
00374 if (!target.endsWith("/") && !target.endsWith("\\")) {
00375 target.append('/');
00376 }
00377 target.append("lucene");
00378 #endif
00379 if (justCheckIfSupported) {
00380 *justCheckIfSupported = (searchType >= -3);
00381 #ifdef USELUCENE
00382 if ((searchType == -4) && (IndexReader::indexExists(target.c_str()))) {
00383 *justCheckIfSupported = true;
00384 }
00385 #endif
00386 return listKey;
00387 }
00388
00389 SWKey *saveKey = 0;
00390 SWKey *searchKey = 0;
00391 SWKey *resultKey = createKey();
00392 regex_t preg;
00393 vector<SWBuf> words;
00394 vector<SWBuf> window;
00395 const char *sres;
00396 terminateSearch = false;
00397 char perc = 1;
00398 bool savePEA = isProcessEntryAttributes();
00399
00400
00401 bool specialStrips = (getConfigEntry("LocalStripFilter")
00402 || (getConfig().has("GlobalOptionFilter", "UTF8GreekAccents"))
00403 || (getConfig().has("GlobalOptionFilter", "UTF8HebrewPoints"))
00404 || (getConfig().has("GlobalOptionFilter", "UTF8ArabicPoints"))
00405 || (strchr(istr, '<')));
00406
00407 setProcessEntryAttributes(searchType == -3);
00408
00409
00410 if (!key->isPersist()) {
00411 saveKey = createKey();
00412 *saveKey = *key;
00413 }
00414 else saveKey = key;
00415
00416 searchKey = (scope)?scope->clone():(key->isPersist())?key->clone():0;
00417 if (searchKey) {
00418 searchKey->setPersist(true);
00419 setKey(*searchKey);
00420 }
00421
00422 (*percent)(perc, percentUserData);
00423
00424 *this = BOTTOM;
00425 long highIndex = key->getIndex();
00426 if (!highIndex)
00427 highIndex = 1;
00428 *this = TOP;
00429 if (searchType >= 0) {
00430 flags |=searchType|REG_NOSUB|REG_EXTENDED;
00431 regcomp(&preg, istr, flags);
00432 }
00433
00434 (*percent)(++perc, percentUserData);
00435
00436
00437 #ifdef USELUCENE
00438 if (searchType == -4) {
00439
00440 lucene::index::IndexReader *ir = 0;
00441 lucene::search::IndexSearcher *is = 0;
00442 Query *q = 0;
00443 Hits *h = 0;
00444 SWTRY {
00445 ir = IndexReader::open(target);
00446 is = new IndexSearcher(ir);
00447 (*percent)(10, percentUserData);
00448
00449 const TCHAR *stopWords[] = { 0 };
00450 standard::StandardAnalyzer analyzer(stopWords);
00451 q = QueryParser::parse((wchar_t *)utf8ToWChar(istr).getRawData(), _T("content"), &analyzer);
00452 (*percent)(20, percentUserData);
00453 h = is->search(q);
00454 (*percent)(80, percentUserData);
00455
00456
00457 bool checkBounds = getKey()->isBoundSet();
00458 for (unsigned long i = 0; i < (unsigned long)h->length(); i++) {
00459 Document &doc = h->doc(i);
00460
00461
00462 *resultKey = wcharToUTF8(doc.get(_T("key")));
00463
00464
00465 if (checkBounds) {
00466 *getKey() = *resultKey;
00467 if (*getKey() != *resultKey) {
00468 continue;
00469 }
00470 }
00471 listKey << *resultKey;
00472 listKey.GetElement()->userData = (__u64)((__u32)(h->score(i)*100));
00473 }
00474 (*percent)(98, percentUserData);
00475 }
00476 SWCATCH (...) {
00477 q = 0;
00478
00479 }
00480 delete h;
00481 delete q;
00482
00483 delete is;
00484 if (ir) {
00485 ir->close();
00486 }
00487 }
00488 #endif
00489
00490
00491 switch (searchType) {
00492
00493
00494 case -1:
00495
00496 if ((flags & REG_ICASE) == REG_ICASE) toupperstr(term);
00497 break;
00498
00499
00500 case -2:
00501 case -5:
00502
00503 while (1) {
00504 const char *word = term.stripPrefix(' ');
00505 if (!word) {
00506 words.push_back(term);
00507 break;
00508 }
00509 words.push_back(word);
00510 }
00511 if ((flags & REG_ICASE) == REG_ICASE) {
00512 for (unsigned int i = 0; i < words.size(); i++) {
00513 toupperstr(words[i]);
00514 }
00515 }
00516 break;
00517
00518
00519 case -3:
00520
00521 while (1) {
00522 const char *word = term.stripPrefix('/');
00523 if (!word) {
00524 words.push_back(term);
00525 break;
00526 }
00527 words.push_back(word);
00528 }
00529 if ((words.size()>2) && words[2].endsWith(".")) {
00530 includeComponents = true;
00531 words[2]--;
00532 }
00533 break;
00534 }
00535
00536
00537
00538 perc = 5;
00539 (*percent)(perc, percentUserData);
00540
00541
00542 while ((searchType != -4) && !popError() && !terminateSearch) {
00543 long mindex = key->getIndex();
00544 float per = (float)mindex / highIndex;
00545 per *= 93;
00546 per += 5;
00547 char newperc = (char)per;
00548 if (newperc > perc) {
00549 perc = newperc;
00550 (*percent)(perc, percentUserData);
00551 }
00552 else if (newperc < perc) {
00553 #ifndef _MSC_VER
00554 std::cerr << "Serious error: new percentage complete is less than previous value\n";
00555 std::cerr << "index: " << (key->getIndex()) << "\n";
00556 std::cerr << "highIndex: " << highIndex << "\n";
00557 std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n";
00558 std::cerr << "perc == " << (int )perc << "% \n";
00559 #endif
00560 }
00561 if (searchType >= 0) {
00562 if (!regexec(&preg, stripText(), 0, 0, 0)) {
00563 *resultKey = *getKey();
00564 resultKey->clearBound();
00565 listKey << *resultKey;
00566 }
00567 }
00568
00569
00570 else {
00571 SWBuf textBuf;
00572 switch (searchType) {
00573
00574
00575 case -1:
00576 textBuf = stripText();
00577 if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
00578 sres = strstr(textBuf.c_str(), term.c_str());
00579 if (sres) {
00580 *resultKey = *getKey();
00581 resultKey->clearBound();
00582 listKey << *resultKey;
00583 }
00584 break;
00585
00586
00587 case -2: {
00588 int loopCount = 0;
00589 unsigned int foundWords = 0;
00590 do {
00591 textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : stripText();
00592 foundWords = 0;
00593
00594 for (unsigned int i = 0; i < words.size(); i++) {
00595 if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
00596 sres = strstr(textBuf.c_str(), words[i].c_str());
00597 if (!sres) {
00598 break;
00599 }
00600 foundWords++;
00601 }
00602
00603 loopCount++;
00604 } while ( (loopCount < 2) && (foundWords == words.size()));
00605
00606 if ((loopCount == 2) && (foundWords == words.size())) {
00607 *resultKey = *getKey();
00608 resultKey->clearBound();
00609 listKey << *resultKey;
00610 }
00611 }
00612 break;
00613
00614
00615 case -3: {
00616 renderText();
00617 AttributeTypeList &entryAttribs = getEntryAttributes();
00618 AttributeTypeList::iterator i1Start, i1End;
00619 AttributeList::iterator i2Start, i2End;
00620 AttributeValue::iterator i3Start, i3End;
00621
00622 if ((words.size()) && (words[0].length())) {
00623
00624 for (i1Start = entryAttribs.begin(); i1Start != entryAttribs.end(); ++i1Start) {
00625
00626 }
00627 i1Start = entryAttribs.find(words[0]);
00628 i1End = i1Start;
00629 if (i1End != entryAttribs.end()) {
00630 i1End++;
00631 }
00632 }
00633 else {
00634 i1Start = entryAttribs.begin();
00635 i1End = entryAttribs.end();
00636 }
00637 for (;i1Start != i1End; i1Start++) {
00638 if ((words.size()>1) && (words[1].length())) {
00639 i2Start = i1Start->second.find(words[1]);
00640 i2End = i2Start;
00641 if (i2End != i1Start->second.end())
00642 i2End++;
00643 }
00644 else {
00645 i2Start = i1Start->second.begin();
00646 i2End = i1Start->second.end();
00647 }
00648 for (;i2Start != i2End; i2Start++) {
00649 if ((words.size()>2) && (words[2].length()) && (!includeComponents)) {
00650 i3Start = i2Start->second.find(words[2]);
00651 i3End = i3Start;
00652 if (i3End != i2Start->second.end())
00653 i3End++;
00654 }
00655 else {
00656 i3Start = i2Start->second.begin();
00657 i3End = i2Start->second.end();
00658 }
00659 for (;i3Start != i3End; i3Start++) {
00660 if ((words.size()>3) && (words[3].length())) {
00661 if (includeComponents) {
00662 SWBuf key = i3Start->first.c_str();
00663 key = key.stripPrefix('.', true);
00664
00665
00666 if (key != words[2]) continue;
00667 }
00668 if (flags & SEARCHFLAG_MATCHWHOLEENTRY) {
00669 bool found = !(((flags & REG_ICASE) == REG_ICASE) ? sword::stricmp(i3Start->second.c_str(), words[3]) : strcmp(i3Start->second.c_str(), words[3]));
00670 sres = (found) ? i3Start->second.c_str() : 0;
00671 }
00672 else {
00673 sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(i3Start->second.c_str(), words[3]) : strstr(i3Start->second.c_str(), words[3]);
00674 }
00675 if (sres) {
00676 *resultKey = *getKey();
00677 resultKey->clearBound();
00678 listKey << *resultKey;
00679 break;
00680 }
00681 }
00682 }
00683 if (i3Start != i3End)
00684 break;
00685 }
00686 if (i2Start != i2End)
00687 break;
00688 }
00689 break;
00690 }
00691 case -5:
00692 AttributeList &words = getEntryAttributes()["Word"];
00693 SWBuf kjvWord = "";
00694 SWBuf bibWord = "";
00695 for (AttributeList::iterator it = words.begin(); it != words.end(); it++) {
00696 int parts = atoi(it->second["PartCount"]);
00697 SWBuf lemma = "";
00698 SWBuf morph = "";
00699 for (int i = 1; i <= parts; i++) {
00700 SWBuf key = "";
00701 key = (parts == 1) ? "Lemma" : SWBuf().setFormatted("Lemma.%d", i).c_str();
00702 AttributeValue::iterator li = it->second.find(key);
00703 if (li != it->second.end()) {
00704 if (i > 1) lemma += " ";
00705 key = (parts == 1) ? "LemmaClass" : SWBuf().setFormatted("LemmaClass.%d", i).c_str();
00706 AttributeValue::iterator lci = it->second.find(key);
00707 if (lci != it->second.end()) {
00708 lemma += lci->second + ":";
00709 }
00710 lemma += li->second;
00711 }
00712 key = (parts == 1) ? "Morph" : SWBuf().setFormatted("Morph.%d", i).c_str();
00713 li = it->second.find(key);
00714
00715 if (i == 1 && parts != 1 && li == it->second.end()) {
00716 li = it->second.find("Morph");
00717 }
00718 if (li != it->second.end()) {
00719 if (i > 1) morph += " ";
00720 key = (parts == 1) ? "MorphClass" : SWBuf().setFormatted("MorphClass.%d", i).c_str();
00721 AttributeValue::iterator lci = it->second.find(key);
00722
00723 if (i == 1 && parts != 1 && lci == it->second.end()) {
00724 lci = it->second.find("MorphClass");
00725 }
00726 if (lci != it->second.end()) {
00727 morph += lci->second + ":";
00728 }
00729 morph += li->second;
00730 }
00731
00732 }
00733 while (window.size() < (unsigned)flags) {
00734
00735 }
00736 }
00737 break;
00738 }
00739 }
00740 (*this)++;
00741 }
00742
00743
00744
00745 if (searchType >= 0)
00746 regfree(&preg);
00747
00748 setKey(*saveKey);
00749
00750 if (!saveKey->isPersist())
00751 delete saveKey;
00752
00753 if (searchKey)
00754 delete searchKey;
00755 delete resultKey;
00756
00757 listKey = TOP;
00758 setProcessEntryAttributes(savePEA);
00759
00760
00761 (*percent)(100, percentUserData);
00762
00763
00764 return listKey;
00765 }
00766
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777 const char *SWModule::stripText(const char *buf, int len) {
00778 return renderText(buf, len, false);
00779 }
00780
00781
00787 const char *SWModule::getRenderHeader() const {
00788 FilterList::const_iterator first = getRenderFilters().begin();
00789 if (first != getRenderFilters().end()) {
00790 return (*first)->getHeader();
00791 }
00792 return "";
00793 }
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804 const char *SWModule::renderText(const char *buf, int len, bool render) {
00805 bool savePEA = isProcessEntryAttributes();
00806 if (!buf) {
00807 entryAttributes.clear();
00808 }
00809 else {
00810 setProcessEntryAttributes(false);
00811 }
00812
00813 static SWBuf local;
00814 if (buf)
00815 local = buf;
00816
00817 SWBuf &tmpbuf = (buf) ? local : getRawEntryBuf();
00818 SWKey *key = 0;
00819 static const char *null = "";
00820
00821 if (tmpbuf) {
00822 unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) : len;
00823 if (size > 0) {
00824 key = (SWKey *)*this;
00825
00826 optionFilter(tmpbuf, key);
00827
00828 if (render) {
00829 renderFilter(tmpbuf, key);
00830 encodingFilter(tmpbuf, key);
00831 }
00832 else stripFilter(tmpbuf, key);
00833 }
00834 }
00835 else {
00836 tmpbuf = null;
00837 }
00838
00839 setProcessEntryAttributes(savePEA);
00840
00841 return tmpbuf;
00842 }
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853 const char *SWModule::renderText(const SWKey *tmpKey) {
00854 SWKey *saveKey;
00855 const char *retVal;
00856
00857 if (!key->isPersist()) {
00858 saveKey = createKey();
00859 *saveKey = *key;
00860 }
00861 else saveKey = key;
00862
00863 setKey(*tmpKey);
00864
00865 retVal = renderText();
00866
00867 setKey(*saveKey);
00868
00869 if (!saveKey->isPersist())
00870 delete saveKey;
00871
00872 return retVal;
00873 }
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884 const char *SWModule::stripText(const SWKey *tmpKey) {
00885 SWKey *saveKey;
00886 const char *retVal;
00887
00888 if (!key->isPersist()) {
00889 saveKey = createKey();
00890 *saveKey = *key;
00891 }
00892 else saveKey = key;
00893
00894 setKey(*tmpKey);
00895
00896 retVal = stripText();
00897
00898 setKey(*saveKey);
00899
00900 if (!saveKey->isPersist())
00901 delete saveKey;
00902
00903 return retVal;
00904 }
00905
00906
00907 const char *SWModule::getConfigEntry(const char *key) const {
00908 ConfigEntMap::iterator it = config->find(key);
00909 return (it != config->end()) ? it->second.c_str() : 0;
00910 }
00911
00912
00913 void SWModule::setConfig(ConfigEntMap *config) {
00914 this->config = config;
00915 }
00916
00917
00918 bool SWModule::hasSearchFramework() {
00919 #ifdef USELUCENE
00920 return true;
00921 #else
00922 return SWSearchable::hasSearchFramework();
00923 #endif
00924 }
00925
00926 void SWModule::deleteSearchFramework() {
00927 #ifdef USELUCENE
00928 SWBuf target = getConfigEntry("AbsoluteDataPath");
00929 if (!target.endsWith("/") && !target.endsWith("\\")) {
00930 target.append('/');
00931 }
00932 target.append("lucene");
00933
00934 FileMgr::removeDir(target.c_str());
00935 #else
00936 SWSearchable::deleteSearchFramework();
00937 #endif
00938 }
00939
00940
00941 signed char SWModule::createSearchFramework(void (*percent)(char, void *), void *percentUserData) {
00942
00943 #ifdef USELUCENE
00944 SWBuf target = getConfigEntry("AbsoluteDataPath");
00945 if (!target.endsWith("/") && !target.endsWith("\\")) {
00946 target.append('/');
00947 }
00948 target.append("lucene");
00949 int status = FileMgr::createParent(target+"/dummy");
00950 if (status) return -1;
00951
00952 SWKey *saveKey = 0;
00953 SWKey *searchKey = 0;
00954 SWKey textkey;
00955 SWBuf c;
00956
00957 const int MAX_CONV_SIZE = 1024 * 1024;
00958
00959
00960 StringList filterSettings;
00961 for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
00962 filterSettings.push_back((*filter)->getOptionValue());
00963 (*filter)->setOptionValue(*((*filter)->getOptionValues().begin()));
00964
00965 if ( (!strcmp("Greek Accents", (*filter)->getOptionName())) ||
00966 (!strcmp("Hebrew Vowel Points", (*filter)->getOptionName())) ||
00967 (!strcmp("Arabic Vowel Points", (*filter)->getOptionName()))
00968 ) {
00969 (*filter)->setOptionValue("Off");
00970 }
00971 }
00972
00973
00974
00975 FileMgr::getSystemFileMgr()->flush();
00976
00977
00978
00979 if (!key->isPersist()) {
00980 saveKey = createKey();
00981 *saveKey = *key;
00982 }
00983 else saveKey = key;
00984
00985 searchKey = (key->isPersist())?key->clone():0;
00986 if (searchKey) {
00987 searchKey->setPersist(1);
00988 setKey(*searchKey);
00989 }
00990
00991 RAMDirectory *ramDir = 0;
00992 IndexWriter *coreWriter = 0;
00993 IndexWriter *fsWriter = 0;
00994 Directory *d = 0;
00995
00996 const TCHAR *stopWords[] = { 0 };
00997 standard::StandardAnalyzer *an = new standard::StandardAnalyzer(stopWords);
00998 bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch");
00999
01000 ramDir = new RAMDirectory();
01001 coreWriter = new IndexWriter(ramDir, an, true);
01002 coreWriter->setMaxFieldLength(MAX_CONV_SIZE);
01003
01004
01005
01006
01007 char perc = 1;
01008 VerseKey *vkcheck = 0;
01009 vkcheck = SWDYNAMIC_CAST(VerseKey, key);
01010 VerseKey *chapMax = 0;
01011 if (vkcheck) chapMax = (VerseKey *)vkcheck->clone();
01012
01013 TreeKeyIdx *tkcheck = 0;
01014 tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key);
01015
01016
01017 *this = BOTTOM;
01018 long highIndex = key->getIndex();
01019 if (!highIndex)
01020 highIndex = 1;
01021
01022 bool savePEA = isProcessEntryAttributes();
01023 setProcessEntryAttributes(true);
01024
01025
01026
01027 *this = TOP;
01028
01029 SWBuf proxBuf;
01030 SWBuf proxLem;
01031 SWBuf proxMorph;
01032 SWBuf strong;
01033 SWBuf morph;
01034
01035 char err = popError();
01036 while (!err) {
01037 long mindex = key->getIndex();
01038
01039 proxBuf = "";
01040 proxLem = "";
01041 proxMorph = "";
01042
01043
01044 float per = (float)mindex / highIndex;
01045
01046 per *= 93; per += 5;
01047 char newperc = (char)per;
01048 if (newperc > perc) {
01049 perc = newperc;
01050 (*percent)(perc, percentUserData);
01051 }
01052
01053
01054 const char *content = stripText();
01055
01056 bool good = false;
01057
01058
01059 Document *doc = new Document();
01060
01061 SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText();
01062 if (content && *content) {
01063 good = true;
01064
01065
01066
01067 AttributeTypeList::iterator words;
01068 AttributeList::iterator word;
01069 AttributeValue::iterator strongVal;
01070 AttributeValue::iterator morphVal;
01071
01072 strong="";
01073 morph="";
01074 words = getEntryAttributes().find("Word");
01075 if (words != getEntryAttributes().end()) {
01076 for (word = words->second.begin();word != words->second.end(); word++) {
01077 int partCount = atoi(word->second["PartCount"]);
01078 if (!partCount) partCount = 1;
01079 for (int i = 0; i < partCount; i++) {
01080 SWBuf tmp = "Lemma";
01081 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
01082 strongVal = word->second.find(tmp);
01083 if (strongVal != word->second.end()) {
01084
01085 if (strongVal->second == "G3588") {
01086 if (word->second.find("Text") == word->second.end())
01087 continue;
01088 }
01089 strong.append(strongVal->second);
01090 morph.append(strongVal->second);
01091 morph.append('@');
01092 SWBuf tmp = "Morph";
01093 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
01094 morphVal = word->second.find(tmp);
01095 if (morphVal != word->second.end()) {
01096 morph.append(morphVal->second);
01097 }
01098 strong.append(' ');
01099 morph.append(' ');
01100 }
01101 }
01102 }
01103 }
01104
01105 doc->add(*_CLNEW Field(_T("key"), (wchar_t *)utf8ToWChar(keyText).getRawData(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
01106
01107 if (includeKeyInSearch) {
01108 c = keyText;
01109 c += " ";
01110 c += content;
01111 content = c.c_str();
01112 }
01113
01114 doc->add(*_CLNEW Field(_T("content"), (wchar_t *)utf8ToWChar(content).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
01115
01116 if (strong.length() > 0) {
01117 doc->add(*_CLNEW Field(_T("lemma"), (wchar_t *)utf8ToWChar(strong).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
01118 doc->add(*_CLNEW Field(_T("morph"), (wchar_t *)utf8ToWChar(morph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
01119
01120 }
01121
01122
01123
01124 }
01125
01126
01127
01128 if (vkcheck) {
01129 *chapMax = *vkcheck;
01130
01131 if (vkcheck->getVerse() == 1) {
01132 *chapMax = MAXVERSE;
01133 VerseKey saveKey = *vkcheck;
01134 while ((!err) && (*vkcheck <= *chapMax)) {
01135
01136
01137
01138 content = stripText();
01139 if (content && *content) {
01140
01141 strong = "";
01142 morph = "";
01143 AttributeTypeList::iterator words;
01144 AttributeList::iterator word;
01145 AttributeValue::iterator strongVal;
01146 AttributeValue::iterator morphVal;
01147
01148 words = getEntryAttributes().find("Word");
01149 if (words != getEntryAttributes().end()) {
01150 for (word = words->second.begin();word != words->second.end(); word++) {
01151 int partCount = atoi(word->second["PartCount"]);
01152 if (!partCount) partCount = 1;
01153 for (int i = 0; i < partCount; i++) {
01154 SWBuf tmp = "Lemma";
01155 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
01156 strongVal = word->second.find(tmp);
01157 if (strongVal != word->second.end()) {
01158
01159 if (strongVal->second == "G3588") {
01160 if (word->second.find("Text") == word->second.end())
01161 continue;
01162 }
01163 strong.append(strongVal->second);
01164 morph.append(strongVal->second);
01165 morph.append('@');
01166 SWBuf tmp = "Morph";
01167 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
01168 morphVal = word->second.find(tmp);
01169 if (morphVal != word->second.end()) {
01170 morph.append(morphVal->second);
01171 }
01172 strong.append(' ');
01173 morph.append(' ');
01174 }
01175 }
01176 }
01177 }
01178 proxBuf += content;
01179 proxBuf.append(' ');
01180 proxLem += strong;
01181 proxMorph += morph;
01182 if (proxLem.length()) {
01183 proxLem.append("\n");
01184 proxMorph.append("\n");
01185 }
01186 }
01187 (*this)++;
01188 err = popError();
01189 }
01190 err = 0;
01191 *vkcheck = saveKey;
01192 }
01193 }
01194
01195
01196 else if (tkcheck) {
01197 if (!tkcheck->hasChildren()) {
01198 if (!tkcheck->previousSibling()) {
01199 do {
01200
01201
01202
01203 content = stripText();
01204 if (content && *content) {
01205
01206 strong = "";
01207 morph = "";
01208 AttributeTypeList::iterator words;
01209 AttributeList::iterator word;
01210 AttributeValue::iterator strongVal;
01211 AttributeValue::iterator morphVal;
01212
01213 words = getEntryAttributes().find("Word");
01214 if (words != getEntryAttributes().end()) {
01215 for (word = words->second.begin();word != words->second.end(); word++) {
01216 int partCount = atoi(word->second["PartCount"]);
01217 if (!partCount) partCount = 1;
01218 for (int i = 0; i < partCount; i++) {
01219 SWBuf tmp = "Lemma";
01220 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
01221 strongVal = word->second.find(tmp);
01222 if (strongVal != word->second.end()) {
01223
01224 if (strongVal->second == "G3588") {
01225 if (word->second.find("Text") == word->second.end())
01226 continue;
01227 }
01228 strong.append(strongVal->second);
01229 morph.append(strongVal->second);
01230 morph.append('@');
01231 SWBuf tmp = "Morph";
01232 if (partCount > 1) tmp.appendFormatted(".%d", i+1);
01233 morphVal = word->second.find(tmp);
01234 if (morphVal != word->second.end()) {
01235 morph.append(morphVal->second);
01236 }
01237 strong.append(' ');
01238 morph.append(' ');
01239 }
01240 }
01241 }
01242 }
01243
01244 proxBuf += content;
01245 proxBuf.append(' ');
01246 proxLem += strong;
01247 proxMorph += morph;
01248 if (proxLem.length()) {
01249 proxLem.append("\n");
01250 proxMorph.append("\n");
01251 }
01252 }
01253 } while (tkcheck->nextSibling());
01254 tkcheck->parent();
01255 tkcheck->firstChild();
01256 }
01257 else tkcheck->nextSibling();
01258 }
01259 }
01260
01261 if (proxBuf.length() > 0) {
01262
01263 doc->add(*_CLNEW Field(_T("prox"), (wchar_t *)utf8ToWChar(proxBuf).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED));
01264 good = true;
01265 }
01266 if (proxLem.length() > 0) {
01267 doc->add(*_CLNEW Field(_T("proxlem"), (wchar_t *)utf8ToWChar(proxLem).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
01268 doc->add(*_CLNEW Field(_T("proxmorph"), (wchar_t *)utf8ToWChar(proxMorph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) );
01269 good = true;
01270 }
01271 if (good) {
01272
01273
01274 coreWriter->addDocument(doc);
01275 }
01276 delete doc;
01277
01278 (*this)++;
01279 err = popError();
01280 }
01281
01282
01283
01284 coreWriter->close();
01285
01286 #ifdef CLUCENE2
01287 d = FSDirectory::getDirectory(target.c_str());
01288 #endif
01289 if (IndexReader::indexExists(target.c_str())) {
01290 #ifndef CLUCENE2
01291 d = FSDirectory::getDirectory(target.c_str(), false);
01292 #endif
01293 if (IndexReader::isLocked(d)) {
01294 IndexReader::unlock(d);
01295 }
01296 fsWriter = new IndexWriter( d, an, false);
01297 }
01298 else {
01299 #ifndef CLUCENE2
01300 d = FSDirectory::getDirectory(target.c_str(), true);
01301 #endif
01302 fsWriter = new IndexWriter(d, an, true);
01303 }
01304
01305 Directory *dirs[] = { ramDir, 0 };
01306 #ifdef CLUCENE2
01307 lucene::util::ConstValueArray< lucene::store::Directory *>dirsa(dirs, 1);
01308 fsWriter->addIndexes(dirsa);
01309 #else
01310 fsWriter->addIndexes(dirs);
01311 #endif
01312 fsWriter->close();
01313
01314 delete ramDir;
01315 delete coreWriter;
01316 delete fsWriter;
01317 delete an;
01318
01319
01320 setKey(*saveKey);
01321
01322 if (!saveKey->isPersist())
01323 delete saveKey;
01324
01325 if (searchKey)
01326 delete searchKey;
01327
01328 delete chapMax;
01329
01330 setProcessEntryAttributes(savePEA);
01331
01332
01333 StringList::iterator origVal = filterSettings.begin();
01334 for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
01335 (*filter)->setOptionValue(*origVal++);
01336 }
01337
01338 return 0;
01339 #else
01340 return SWSearchable::createSearchFramework(percent, percentUserData);
01341 #endif
01342 }
01343
01349 void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, const SWKey *key) const {
01350 OptionFilterList::iterator it;
01351 for (it = filters->begin(); it != filters->end(); it++) {
01352 (*it)->processText(buf, key, this);
01353 }
01354 }
01355
01361 void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, const SWKey *key) const {
01362 FilterList::iterator it;
01363 for (it = filters->begin(); it != filters->end(); it++) {
01364 (*it)->processText(buf, key, this);
01365 }
01366 }
01367
01368 signed char SWModule::createModule(const char*) {
01369 return -1;
01370 }
01371
01372 void SWModule::setEntry(const char*, long) {
01373 }
01374
01375 void SWModule::linkEntry(const SWKey*) {
01376 }
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387 void SWModule::prepText(SWBuf &buf) {
01388 unsigned int to, from;
01389 char space = 0, cr = 0, realdata = 0, nlcnt = 0;
01390 char *rawBuf = buf.getRawData();
01391 for (to = from = 0; rawBuf[from]; from++) {
01392 switch (rawBuf[from]) {
01393 case 10:
01394 if (!realdata)
01395 continue;
01396 space = (cr) ? 0 : 1;
01397 cr = 0;
01398 nlcnt++;
01399 if (nlcnt > 1) {
01400
01401 rawBuf[to++] = 10;
01402
01403
01404 }
01405 continue;
01406 case 13:
01407 if (!realdata)
01408 continue;
01409
01410 rawBuf[to++] = 10;
01411 space = 0;
01412 cr = 1;
01413 continue;
01414 }
01415 realdata = 1;
01416 nlcnt = 0;
01417 if (space) {
01418 space = 0;
01419 if (rawBuf[from] != ' ') {
01420 rawBuf[to++] = ' ';
01421 from--;
01422 continue;
01423 }
01424 }
01425 rawBuf[to++] = rawBuf[from];
01426 }
01427 buf.setSize(to);
01428
01429 while (to > 1) {
01430 to--;
01431 if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
01432 buf.setSize(to);
01433 else break;
01434 }
01435 }
01436
01437 SWORD_NAMESPACE_END