1
22 package org.crosswire.jsword.index.lucene;
23
24 import java.io.File;
25 import java.io.IOException;
26 import java.net.URI;
27 import java.util.ArrayList;
28 import java.util.List;
29
30 import org.apache.lucene.analysis.Analyzer;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.Field;
33 import org.apache.lucene.index.IndexWriter;
34 import org.apache.lucene.queryParser.ParseException;
35 import org.apache.lucene.queryParser.QueryParser;
36 import org.apache.lucene.search.IndexSearcher;
37 import org.apache.lucene.search.Query;
38 import org.apache.lucene.search.ScoreDoc;
39 import org.apache.lucene.search.Searcher;
40 import org.apache.lucene.search.TopScoreDocCollector;
41 import org.apache.lucene.store.Directory;
42 import org.apache.lucene.store.FSDirectory;
43 import org.apache.lucene.store.RAMDirectory;
44 import org.apache.lucene.util.Version;
45 import org.crosswire.common.activate.Activatable;
46 import org.crosswire.common.activate.Activator;
47 import org.crosswire.common.activate.Lock;
48 import org.crosswire.common.progress.JobManager;
49 import org.crosswire.common.progress.Progress;
50 import org.crosswire.common.util.Logger;
51 import org.crosswire.common.util.NetUtil;
52 import org.crosswire.common.util.Reporter;
53 import org.crosswire.jsword.JSMsg;
54 import org.crosswire.jsword.book.Book;
55 import org.crosswire.jsword.book.BookData;
56 import org.crosswire.jsword.book.BookException;
57 import org.crosswire.jsword.book.FeatureType;
58 import org.crosswire.jsword.book.OSISUtil;
59 import org.crosswire.jsword.index.AbstractIndex;
60 import org.crosswire.jsword.index.IndexStatus;
61 import org.crosswire.jsword.index.lucene.analysis.LuceneAnalyzer;
62 import org.crosswire.jsword.index.search.SearchModifier;
63 import org.crosswire.jsword.passage.AbstractPassage;
64 import org.crosswire.jsword.passage.Key;
65 import org.crosswire.jsword.passage.NoSuchKeyException;
66 import org.crosswire.jsword.passage.NoSuchVerseException;
67 import org.crosswire.jsword.passage.PassageTally;
68 import org.crosswire.jsword.passage.VerseFactory;
69 import org.crosswire.jsword.versification.Versification;
70 import org.crosswire.jsword.versification.system.Versifications;
71 import org.jdom.Element;
72
73
80 public class LuceneIndex extends AbstractIndex implements Activatable {
81
85
88 public static final String FIELD_KEY = "key";
89
90
93 public static final String FIELD_BODY = "content";
94
95
98 public static final String FIELD_STRONG = "strong";
99
100
103 public static final String FIELD_HEADING = "heading";
104
105
108 public static final String FIELD_XREF = "xref";
109
110
113 public static final String FIELD_NOTE = "note";
114
115
121 public LuceneIndex(Book book, URI storage) throws BookException {
122 this.book = book;
123
124 try {
125 this.path = NetUtil.getAsFile(storage).getCanonicalPath();
126 } catch (IOException ex) {
127 throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
129 }
130 }
131
132
138 public LuceneIndex(Book book, URI storage, boolean create) throws BookException {
139 assert create;
140
141 this.book = book;
142 File finalPath = null;
143 try {
144 finalPath = NetUtil.getAsFile(storage);
145 this.path = finalPath.getCanonicalPath();
146 } catch (IOException ex) {
147 throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
149 }
150
151 String jobName = JSMsg.gettext("Creating index. Processing {0}", book.getInitials());
153 Progress job = JobManager.createJob(jobName, Thread.currentThread());
154 job.beginJob(jobName);
155
156 IndexStatus finalStatus = IndexStatus.UNDONE;
157
158 Analyzer analyzer = new LuceneAnalyzer(book);
159
160 List<Key> errors = new ArrayList<Key>();
161 File tempPath = new File(path + '.' + IndexStatus.CREATING.toString());
162
163 try {
164 synchronized (CREATING) {
165
166 book.setIndexStatus(IndexStatus.CREATING);
167
168
173 final RAMDirectory ramDir = new RAMDirectory();
175 IndexWriter writer = new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
176
177 generateSearchIndexImpl(job, errors, writer, book.getGlobalKeyList(), 0);
178
179 job.setSectionName(JSMsg.gettext("Optimizing"));
181 job.setWork(95);
182
183 writer.close();
186
187 final Directory destination = FSDirectory.open(new File(tempPath.getCanonicalPath()));
189 IndexWriter fsWriter = new IndexWriter(destination, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
190 fsWriter.addIndexesNoOptimize(new Directory[] {
191 ramDir
192 });
193 fsWriter.optimize();
194 fsWriter.close();
195
196 ramDir.close();
198
199 job.setCancelable(false);
200 if (!job.isFinished()) {
201 if (!tempPath.renameTo(finalPath)) {
202 throw new BookException(JSMsg.gettext("Installation failed."));
204 }
205 }
206
207 if (finalPath.exists()) {
208 finalStatus = IndexStatus.DONE;
209 }
210
211 if (!errors.isEmpty()) {
212 StringBuilder buf = new StringBuilder();
213 for (Key error : errors) {
214 buf.append(error);
215 buf.append('\n');
216 }
217 Reporter.informUser(this, JSMsg.gettext("The following verses have errors and could not be indexed\n{0}", buf));
220 }
221
222 }
223 } catch (IOException ex) {
224 job.cancel();
225 throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
227 } finally {
228 book.setIndexStatus(finalStatus);
229 job.done();
230 }
231 }
232
233
238 public Key find(String search) throws BookException {
239 checkActive();
240 String v11nName = book.getBookMetaData().getProperty("Versification").toString();
241 Versification v11n = Versifications.instance().getVersification(v11nName);
242
243 SearchModifier modifier = getSearchModifier();
244 Key results = null;
245
246 if (search != null) {
247 try {
248 Analyzer analyzer = new LuceneAnalyzer(book);
249
250 QueryParser parser = new QueryParser(Version.LUCENE_29, LuceneIndex.FIELD_BODY, analyzer);
251 parser.setAllowLeadingWildcard(true);
252 Query query = parser.parse(search);
253 log.info("ParsedQuery-" + query.toString());
254
255 if (modifier != null && modifier.isRanked()) {
257 PassageTally tally = new PassageTally(v11n);
258 tally.raiseEventSuppresion();
259 tally.raiseNormalizeProtection();
260 results = tally;
261
262 TopScoreDocCollector collector = TopScoreDocCollector.create(modifier.getMaxResults(), false);
263 searcher.search(query, collector);
264 tally.setTotal(collector.getTotalHits());
265 ScoreDoc[] hits = collector.topDocs().scoreDocs;
266 for (int i = 0; i < hits.length; i++) {
267 int docId = hits[i].doc;
268 Document doc = searcher.doc(docId);
269 Key key = VerseFactory.fromString(v11n, doc.get(LuceneIndex.FIELD_KEY));
270 int score = (int) (hits[i].score * 100 + 1);
273 tally.add(key, score);
274 }
275 tally.lowerNormalizeProtection();
276 tally.lowerEventSuppresionAndTest();
277 } else {
278 results = book.createEmptyKeyList();
279 AbstractPassage passage = null;
282 if (results instanceof AbstractPassage) {
283 passage = (AbstractPassage) results;
284 passage.raiseEventSuppresion();
285 passage.raiseNormalizeProtection();
286 }
287 searcher.search(query, new VerseCollector(v11n, searcher, results));
288 if (passage != null) {
289 passage.lowerNormalizeProtection();
290 passage.lowerEventSuppresionAndTest();
291 }
292 }
293 } catch (IOException e) {
294 Throwable cause = e.getCause();
297 if (cause instanceof NoSuchVerseException) {
298 throw new BookException(JSMsg.gettext("Search failed."), cause);
300 }
301
302 throw new BookException(JSMsg.gettext("Search failed."), e);
304 } catch (NoSuchVerseException e) {
305 throw new BookException(JSMsg.gettext("Search failed."), e);
307 } catch (ParseException e) {
308 throw new BookException(JSMsg.gettext("Search failed."), e);
310 } finally {
311 Activator.deactivate(this);
312 }
313 }
314
315 if (results == null) {
316 if (modifier != null && modifier.isRanked()) {
317 results = new PassageTally(v11n);
318 } else {
319 results = book.createEmptyKeyList();
320 }
321 }
322 return results;
323 }
324
325
330 public Key getKey(String name) throws NoSuchKeyException {
331 return book.getKey(name);
332 }
333
334
341 public final void activate(Lock lock) {
342 try {
343 directory = FSDirectory.open(new File(path));
344 searcher = new IndexSearcher(directory, true);
345 } catch (IOException ex) {
346 log.warn("second load failure", ex);
347 }
348
349 active = true;
350 }
351
352
359 public final void deactivate(Lock lock) {
360 try {
361 searcher.close();
362 directory.close();
363 } catch (IOException ex) {
364 Reporter.informUser(this, ex);
365 } finally {
366 searcher = null;
367 directory = null;
368 }
369
370 active = false;
371 }
372
373
376 protected final void checkActive() {
377 if (!active) {
378 Activator.activate(this);
379 }
380 }
381
382
385 private void generateSearchIndexImpl(Progress job, List<Key> errors, IndexWriter writer, Key key, int count) throws BookException, IOException {
386 String v11nName = book.getBookMetaData().getProperty("Versification").toString();
387 Versification v11n = Versifications.instance().getVersification(v11nName);
388 boolean hasStrongs = book.getBookMetaData().hasFeature(FeatureType.STRONGS_NUMBERS);
389 boolean hasXRefs = book.getBookMetaData().hasFeature(FeatureType.SCRIPTURE_REFERENCES);
390 boolean hasNotes = book.getBookMetaData().hasFeature(FeatureType.FOOTNOTES);
391 boolean hasHeadings = book.getBookMetaData().hasFeature(FeatureType.HEADINGS);
392
393 String oldRootName = "";
394 int percent = 0;
395 String rootName = "";
396 BookData data = null;
397 Element osis = null;
398
399 Document doc = new Document();
401 Field keyField = new Field(FIELD_KEY, "", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO);
402 Field bodyField = new Field(FIELD_BODY, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
403 Field strongField = new Field(FIELD_STRONG, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
404 Field xrefField = new Field(FIELD_XREF, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
405 Field noteField = new Field(FIELD_NOTE, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
406 Field headingField = new Field(FIELD_HEADING, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
407
408 int size = key.getCardinality();
409 int subCount = count;
410 for (Key subkey : key) {
411 if (subkey.canHaveChildren()) {
412 generateSearchIndexImpl(job, errors, writer, subkey, subCount);
413 } else {
414 data = new BookData(book, subkey);
415 osis = null;
416
417 try {
418 osis = data.getOsisFragment();
419 } catch (BookException e) {
420 errors.add(subkey);
421 continue;
422 }
423
424 doc.getFields().clear();
426
427 keyField.setValue(subkey.getOsisRef());
430 doc.add(keyField);
431
432 addField(doc, bodyField, OSISUtil.getCanonicalText(osis));
433
434 if (hasStrongs) {
435 addField(doc, strongField, OSISUtil.getStrongsNumbers(osis));
436 }
437
438 if (hasXRefs) {
439 addField(doc, xrefField, OSISUtil.getReferences(v11n, osis));
440 }
441
442 if (hasNotes) {
443 addField(doc, noteField, OSISUtil.getNotes(osis));
444 }
445
446 if (hasHeadings) {
447 addField(doc, headingField, OSISUtil.getHeadings(osis));
448 }
449
450 if (doc.getFields().size() > 1) {
452 writer.addDocument(doc);
453 }
454
455 rootName = subkey.getRootName();
457 if (!rootName.equals(oldRootName)) {
458 oldRootName = rootName;
459 job.setSectionName(rootName);
460 }
461
462 subCount++;
463 int oldPercent = percent;
464 percent = 95 * subCount / size;
465
466 if (oldPercent != percent) {
467 job.setWork(percent);
468 }
469
470 Thread.yield();
472 if (Thread.currentThread().isInterrupted()) {
473 break;
474 }
475 }
476 }
477 }
478
479 private void addField(Document doc, Field field, String text) {
480 if (text != null && text.length() > 0) {
481 field.setValue(text);
482 doc.add(field);
483 }
484 }
485
486
490 private static final Object CREATING = new Object();
491
492
495 private boolean active;
496
497
500 private static final Logger log = Logger.getLogger(LuceneIndex.class);
501
502
505 protected Book book;
506
507
510 private String path;
511
512
515 protected Directory directory;
516
517
520 protected Searcher searcher;
521 }
522