1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id:LuceneIndex.java 984 2006-01-23 14:18:33 -0500 (Mon, 23 Jan 2006) dmsmith $
21   */
22  package org.crosswire.jsword.index.lucene.analysis;
23  
24  import java.io.Reader;
25  
26  import org.apache.lucene.analysis.Analyzer;
27  import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
28  import org.apache.lucene.analysis.SimpleAnalyzer;
29  import org.apache.lucene.analysis.TokenStream;
30  import org.crosswire.jsword.book.Book;
31  import org.crosswire.jsword.index.lucene.IndexMetadata;
32  import org.crosswire.jsword.index.lucene.LuceneIndex;
33  
34  /**
35   * A specialized analyzer for Books that analyzes different fields differently.
36   * This is book specific since it is possible that each book has specialized
37   * search requirements.
38   * 
39   * Uses AnalyzerFactory for InstalledIndexVersion > 1.1
40   * 
41   * @see gnu.lgpl.License for license details.<br>
42   *      The copyright to this program is held by it's authors.
43   * @author DM Smith [dmsmith555 at yahoo dot com]
44   */
45  public class LuceneAnalyzer extends Analyzer {
46  
47      public LuceneAnalyzer(Book book) {
48          // The default analysis
49          analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
50  
51          if (IndexMetadata.instance().getInstalledIndexVersion() > IndexMetadata.INDEX_VERSION_1_1) {
52              // Content is analyzed using natural language analyzer
53              // (stemming, stopword etc)
54              Analyzer myNaturalLanguageAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(book);
55              analyzer.addAnalyzer(LuceneIndex.FIELD_BODY, myNaturalLanguageAnalyzer);
56          }
57  
58          // Keywords are normalized to osisIDs
59          analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
60  
61          // Strong's Numbers are normalized to a consistent representation
62          analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG, new StrongsNumberAnalyzer());
63  
64          // XRefs are normalized from ranges into a list of osisIDs
65          analyzer.addAnalyzer(LuceneIndex.FIELD_XREF, new XRefAnalyzer());
66      }
67  
68      @Override
69      public TokenStream tokenStream(String fieldName, Reader reader) {
70          return analyzer.tokenStream(fieldName, reader);
71      }
72  
73      private PerFieldAnalyzerWrapper analyzer;
74  }
75