| EnglishLuceneAnalyzer.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2007
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
21 */
22 package org.crosswire.jsword.index.lucene.analysis;
23
24 import java.io.IOException;
25 import java.io.Reader;
26
27 import org.apache.lucene.analysis.LowerCaseTokenizer;
28 import org.apache.lucene.analysis.PorterStemFilter;
29 import org.apache.lucene.analysis.StopAnalyzer;
30 import org.apache.lucene.analysis.StopFilter;
31 import org.apache.lucene.analysis.TokenStream;
32 import org.apache.lucene.util.Version;
33
34 /**
35 * English Analyzer works like lucene SimpleAnalyzer + Stemming.
36 * (LowerCaseTokenizer > PorterStemFilter). Like the AbstractAnalyzer,
37 * {@link StopFilter} is off by default.
38 *
39 *
40 * @see gnu.lgpl.License for license details.<br>
41 * The copyright to this program is held by it's authors.
42 * @author sijo cherian [sijocherian at yahoo dot com]
43 */
44 public class EnglishLuceneAnalyzer extends AbstractBookAnalyzer {
45
46 public EnglishLuceneAnalyzer() {
47 stopSet = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
48 }
49
50 /**
51 * Constructs a {@link LowerCaseTokenizer} filtered by a language filter
52 * {@link StopFilter} and {@link PorterStemFilter} for English.
53 */
54 @Override
55 public final TokenStream tokenStream(String fieldName, Reader reader) {
56 TokenStream result = new LowerCaseTokenizer(reader);
57
58 if (doStopWords && stopSet != null) {
59 result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
60 }
61
62 // Using Porter Stemmer
63 if (doStemming) {
64 result = new PorterStemFilter(result);
65 }
66
67 return result;
68 }
69
70 /* (non-Javadoc)
71 * @see org.apache.lucene.analysis.Analyzer#reusableTokenStream(java.lang.String, java.io.Reader)
72 */
73 @Override
74 public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
75 SavedStreams streams = (SavedStreams) getPreviousTokenStream();
76 if (streams == null) {
77 streams = new SavedStreams(new LowerCaseTokenizer(reader));
78 if (doStopWords && stopSet != null) {
79 streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet));
80 }
81
82 if (doStemming) {
83 streams.setResult(new PorterStemFilter(streams.getResult()));
84 }
85
86 setPreviousTokenStream(streams);
87 } else {
88 streams.getSource().reset(reader);
89 }
90 return streams.getResult();
91 }
92
93 private final Version matchVersion = Version.LUCENE_29;
94 }
95