| GreekLuceneAnalyzer.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2007
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
21 */
22 package org.crosswire.jsword.index.lucene.analysis;
23
24 import java.io.IOException;
25 import java.io.Reader;
26
27 import org.apache.lucene.analysis.StopFilter;
28 import org.apache.lucene.analysis.TokenStream;
29 import org.apache.lucene.analysis.el.GreekAnalyzer;
30 import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
31 import org.apache.lucene.analysis.standard.StandardTokenizer;
32 import org.apache.lucene.util.Version;
33
34 /**
35 * Uses org.apache.lucene.analysis.el.GreekAnalyzer to do lowercasing and
36 * stopword(off by default). Stemming not implemented yet
37 *
38 * @see gnu.lgpl.License for license details.<br>
39 * The copyright to this program is held by it's authors.
40 * @author Sijo Cherian [sijocherian at yahoo dot com]
41 */
42 public class GreekLuceneAnalyzer extends AbstractBookAnalyzer {
43 public GreekLuceneAnalyzer() {
44 stopSet = GreekAnalyzer.getDefaultStopSet();
45 }
46
47 /**
48 * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
49 *
50 * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
51 * {@link GreekLowerCaseFilter} and {@link StopFilter}
52 */
53 @Override
54 public TokenStream tokenStream(String fieldName, Reader reader) {
55 TokenStream result = new StandardTokenizer(matchVersion, reader);
56 result = new GreekLowerCaseFilter(result);
57 if (doStopWords && stopSet != null) {
58 result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
59 }
60 return result;
61 }
62
63 /**
64 * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text
65 * in the provided {@link Reader}.
66 *
67 * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
68 * {@link GreekLowerCaseFilter} and {@link StopFilter}
69 */
70 @Override
71 public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
72 SavedStreams streams = (SavedStreams) getPreviousTokenStream();
73 if (streams == null) {
74 streams = new SavedStreams(new StandardTokenizer(matchVersion, reader));
75 streams.setResult(new GreekLowerCaseFilter(streams.getResult()));
76 if (doStopWords && stopSet != null) {
77 streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet));
78 }
79 setPreviousTokenStream(streams);
80 } else {
81 streams.getSource().reset(reader);
82 }
83 return streams.getResult();
84 }
85
86 private final Version matchVersion = Version.LUCENE_29;
87 }
88