1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2007
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
21   */
22  package org.crosswire.jsword.index.lucene.analysis;
23  
24  import java.io.IOException;
25  import java.io.Reader;
26  
27  import org.apache.lucene.analysis.StopFilter;
28  import org.apache.lucene.analysis.TokenStream;
29  import org.apache.lucene.analysis.el.GreekAnalyzer;
30  import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
31  import org.apache.lucene.analysis.standard.StandardTokenizer;
32  import org.apache.lucene.util.Version;
33  
34  /**
35   * Uses org.apache.lucene.analysis.el.GreekAnalyzer to do lowercasing and
36   * stopword(off by default). Stemming not implemented yet
37   * 
38   * @see gnu.lgpl.License for license details.<br>
39   *      The copyright to this program is held by it's authors.
40   * @author Sijo Cherian [sijocherian at yahoo dot com]
41   */
42  public class GreekLuceneAnalyzer extends AbstractBookAnalyzer {
43      public GreekLuceneAnalyzer() {
44          stopSet = GreekAnalyzer.getDefaultStopSet();
45      }
46  
47      /**
48       * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
49       *
50       * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
51       *                  {@link GreekLowerCaseFilter} and {@link StopFilter}
52       */
53      @Override
54      public TokenStream tokenStream(String fieldName, Reader reader) {
55          TokenStream result = new StandardTokenizer(matchVersion, reader);
56          result = new GreekLowerCaseFilter(result);
57          if (doStopWords && stopSet != null) {
58              result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
59          }
60          return result;
61      }
62  
63      /**
64       * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
65       * in the provided {@link Reader}.
66       *
67       * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
68       *                  {@link GreekLowerCaseFilter} and {@link StopFilter}
69       */
70      @Override
71      public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
72          SavedStreams streams = (SavedStreams) getPreviousTokenStream();
73          if (streams == null) {
74              streams = new SavedStreams(new StandardTokenizer(matchVersion, reader));
75              streams.setResult(new GreekLowerCaseFilter(streams.getResult()));
76              if (doStopWords && stopSet != null) {
77                  streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet));
78              }
79              setPreviousTokenStream(streams);
80          } else {
81              streams.getSource().reset(reader);
82          }
83          return streams.getResult();
84      }
85  
86      private final Version matchVersion = Version.LUCENE_29;
87  }
88