1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2007
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
21   */
22  package org.crosswire.jsword.index.lucene.analysis;
23  
24  import java.io.IOException;
25  import java.io.Reader;
26  
27  import org.apache.lucene.analysis.LowerCaseTokenizer;
28  import org.apache.lucene.analysis.PorterStemFilter;
29  import org.apache.lucene.analysis.StopAnalyzer;
30  import org.apache.lucene.analysis.StopFilter;
31  import org.apache.lucene.analysis.TokenStream;
32  import org.apache.lucene.util.Version;
33  
34  /**
35   * English Analyzer works like lucene SimpleAnalyzer + Stemming.
36   * (LowerCaseTokenizer > PorterStemFilter). Like the AbstractAnalyzer,
37   * {@link StopFilter} is off by default.
38   * 
39   * 
40   * @see gnu.lgpl.License for license details.<br>
41   *      The copyright to this program is held by it's authors.
42   * @author sijo cherian [sijocherian at yahoo dot com]
43   */
44  public class EnglishLuceneAnalyzer extends AbstractBookAnalyzer {
45  
46      public EnglishLuceneAnalyzer() {
47          stopSet = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
48      }
49  
50      /**
51       * Constructs a {@link LowerCaseTokenizer} filtered by a language filter
52       * {@link StopFilter} and {@link PorterStemFilter} for English.
53       */
54      @Override
55      public final TokenStream tokenStream(String fieldName, Reader reader) {
56          TokenStream result = new LowerCaseTokenizer(reader);
57  
58          if (doStopWords && stopSet != null) {
59              result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
60          }
61  
62          // Using Porter Stemmer
63          if (doStemming) {
64              result = new PorterStemFilter(result);
65          }
66  
67          return result;
68      }
69  
70      /* (non-Javadoc)
71       * @see org.apache.lucene.analysis.Analyzer#reusableTokenStream(java.lang.String, java.io.Reader)
72       */
73      @Override
74      public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
75          SavedStreams streams = (SavedStreams) getPreviousTokenStream();
76          if (streams == null) {
77              streams = new SavedStreams(new LowerCaseTokenizer(reader));
78              if (doStopWords && stopSet != null) {
79                  streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet));
80              }
81  
82              if (doStemming) {
83                  streams.setResult(new PorterStemFilter(streams.getResult()));
84              }
85  
86              setPreviousTokenStream(streams);
87          } else {
88              streams.getSource().reset(reader);
89          }
90          return streams.getResult();
91      }
92  
93      private final Version matchVersion = Version.LUCENE_29;
94  }
95