1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2007
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
21   */
22  package org.crosswire.jsword.index.lucene.analysis;
23  
24  import java.io.Reader;
25  
26  import org.apache.lucene.analysis.ASCIIFoldingFilter;
27  import org.apache.lucene.analysis.LowerCaseTokenizer;
28  import org.apache.lucene.analysis.TokenStream;
29  
30  /**
31   * Simple Analyzer providing same function as
32   * org.apache.lucene.analysis.SimpleAnalyzer This is intended to be the default
33   * analyzer for natural language fields. Additionally performs: Normalize
34   * Diacritics (Changes Accented characters to their unaccented equivalent) for
35   * ISO 8859-1 languages
36   * 
37   * Note: Next Lucene release (beyond 2.2.0) will have a major performance
38   * enhancement using method - public TokenStream reusableTokenStream(String
39   * fieldName, Reader reader) We should use that. Ref:
40   * https://issues.apache.org/jira/browse/LUCENE-969
41   * 
42   * @see gnu.lgpl.License for license details.<br>
43   *      The copyright to this program is held by it's authors.
44   * @author Sijo Cherian [sijocherian at yahoo dot com]
45   */
46  public class SimpleLuceneAnalyzer extends AbstractBookAnalyzer {
47  
48      public SimpleLuceneAnalyzer() {
49          doStemming = false;
50      }
51  
52      @Override
53      public TokenStream tokenStream(String fieldName, Reader reader) {
54          TokenStream result = new LowerCaseTokenizer(reader);
55          result = new ASCIIFoldingFilter(result);
56          return result;
57      }
58  }
59