| LuceneAnalyzer.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2005
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id:LuceneIndex.java 984 2006-01-23 14:18:33 -0500 (Mon, 23 Jan 2006) dmsmith $
21 */
22 package org.crosswire.jsword.index.lucene.analysis;
23
24 import java.io.Reader;
25
26 import org.apache.lucene.analysis.Analyzer;
27 import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
28 import org.apache.lucene.analysis.SimpleAnalyzer;
29 import org.apache.lucene.analysis.TokenStream;
30 import org.crosswire.jsword.book.Book;
31 import org.crosswire.jsword.index.lucene.IndexMetadata;
32 import org.crosswire.jsword.index.lucene.LuceneIndex;
33
34 /**
35 * A specialized analyzer for Books that analyzes different fields differently.
36 * This is book specific since it is possible that each book has specialized
37 * search requirements.
38 *
39 * Uses AnalyzerFactory for InstalledIndexVersion > 1.1
40 *
41 * @see gnu.lgpl.License for license details.<br>
42 * The copyright to this program is held by it's authors.
43 * @author DM Smith [dmsmith555 at yahoo dot com]
44 */
45 public class LuceneAnalyzer extends Analyzer {
46
47 public LuceneAnalyzer(Book book) {
48 // The default analysis
49 analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
50
51 if (IndexMetadata.instance().getInstalledIndexVersion() > IndexMetadata.INDEX_VERSION_1_1) {
52 // Content is analyzed using natural language analyzer
53 // (stemming, stopword etc)
54 Analyzer myNaturalLanguageAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(book);
55 analyzer.addAnalyzer(LuceneIndex.FIELD_BODY, myNaturalLanguageAnalyzer);
56 }
57
58 // Keywords are normalized to osisIDs
59 analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
60
61 // Strong's Numbers are normalized to a consistent representation
62 analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG, new StrongsNumberAnalyzer());
63
64 // XRefs are normalized from ranges into a list of osisIDs
65 analyzer.addAnalyzer(LuceneIndex.FIELD_XREF, new XRefAnalyzer());
66 }
67
68 @Override
69 public TokenStream tokenStream(String fieldName, Reader reader) {
70 return analyzer.tokenStream(fieldName, reader);
71 }
72
73 private PerFieldAnalyzerWrapper analyzer;
74 }
75