1
22 package org.crosswire.jsword.index.lucene.analysis;
23
24 import java.io.IOException;
25 import java.io.Reader;
26 import java.util.HashMap;
27 import java.util.Set;
28 import java.util.regex.Pattern;
29
30 import org.apache.lucene.analysis.LowerCaseTokenizer;
31 import org.apache.lucene.analysis.PorterStemFilter;
32 import org.apache.lucene.analysis.StopAnalyzer;
33 import org.apache.lucene.analysis.StopFilter;
34 import org.apache.lucene.analysis.TokenStream;
35 import org.apache.lucene.analysis.de.GermanAnalyzer;
36 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
37 import org.apache.lucene.analysis.nl.DutchAnalyzer;
38 import org.apache.lucene.analysis.snowball.SnowballFilter;
39 import org.apache.lucene.util.Version;
40 import org.crosswire.jsword.book.Book;
41
42
76 public class ConfigurableSnowballAnalyzer extends AbstractBookAnalyzer {
77 public ConfigurableSnowballAnalyzer() {
78 }
79
80
84 @Override
85 public final TokenStream tokenStream(String fieldName, Reader reader) {
86 TokenStream result = new LowerCaseTokenizer(reader);
87 if (doStopWords && stopSet != null) {
88 result = new StopFilter(false, result, stopSet);
89 }
90
91 if (doStemming) {
93 result = new SnowballFilter(result, stemmerName);
94 }
95
96 return result;
97 }
98
99
102 @Override
103 public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
104 SavedStreams streams = (SavedStreams) getPreviousTokenStream();
105 if (streams == null) {
106 streams = new SavedStreams(new LowerCaseTokenizer(reader));
107 if (doStopWords && stopSet != null) {
108 streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet));
109 }
110
111 if (doStemming) {
112 streams.setResult(new PorterStemFilter(streams.getResult()));
113 }
114
115 setPreviousTokenStream(streams);
116 } else {
117 streams.getSource().reset(reader);
118 }
119 return streams.getResult();
120 }
121
122 @Override
123 public void setBook(Book newBook) {
124 book = newBook;
125 stemmerName = null;
126 if (book != null) {
127 pickStemmer(book.getLanguage().getName());
129 }
130 }
131
132
137 public void pickStemmer(String language) {
138 stemmerName = language;
139 if (stemmerName != null) {
140 if (!allowedStemmers.matcher(stemmerName).matches()) {
142 throw new IllegalArgumentException("SnowballAnalyzer configured for unavailable stemmer " + stemmerName);
143 }
144
145 if (defaultStopWordMap.containsKey(stemmerName)) {
147 stopSet = defaultStopWordMap.get(stemmerName);
148 }
149 }
150 }
151
152
155 private String stemmerName;
156
157 private static Pattern allowedStemmers = Pattern
158 .compile("(Danish|Dutch|English|Finnish|French|German2|German|Italian|Kp|Lovins|Norwegian|Porter|Portuguese|Russian|Spanish|Swedish)");
159
160 private static HashMap<String, Set<?>> defaultStopWordMap = new HashMap<String, Set<?>>();
162 static {
163 defaultStopWordMap.put("French", FrenchAnalyzer.getDefaultStopSet());
164 defaultStopWordMap.put("German", GermanAnalyzer.getDefaultStopSet());
165 defaultStopWordMap.put("German2", GermanAnalyzer.getDefaultStopSet());
166 defaultStopWordMap.put("Dutch", DutchAnalyzer.getDefaultStopSet());
167 defaultStopWordMap.put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
168 defaultStopWordMap.put("Porter", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
169 }
170
171 private final Version matchVersion = Version.LUCENE_29;
172 }
173