| SimpleLuceneAnalyzer.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2007
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
21 */
22 package org.crosswire.jsword.index.lucene.analysis;
23
24 import java.io.Reader;
25
26 import org.apache.lucene.analysis.ASCIIFoldingFilter;
27 import org.apache.lucene.analysis.LowerCaseTokenizer;
28 import org.apache.lucene.analysis.TokenStream;
29
30 /**
31 * Simple Analyzer providing same function as
32 * org.apache.lucene.analysis.SimpleAnalyzer This is intended to be the default
33 * analyzer for natural language fields. Additionally performs: Normalize
34 * Diacritics (Changes Accented characters to their unaccented equivalent) for
35 * ISO 8859-1 languages
36 *
37 * Note: Next Lucene release (beyond 2.2.0) will have a major performance
38 * enhancement using method - public TokenStream reusableTokenStream(String
39 * fieldName, Reader reader) We should use that. Ref:
40 * https://issues.apache.org/jira/browse/LUCENE-969
41 *
42 * @see gnu.lgpl.License for license details.<br>
43 * The copyright to this program is held by it's authors.
44 * @author Sijo Cherian [sijocherian at yahoo dot com]
45 */
46 public class SimpleLuceneAnalyzer extends AbstractBookAnalyzer {
47
48 public SimpleLuceneAnalyzer() {
49 doStemming = false;
50 }
51
52 @Override
53 public TokenStream tokenStream(String fieldName, Reader reader) {
54 TokenStream result = new LowerCaseTokenizer(reader);
55 result = new ASCIIFoldingFilter(result);
56 return result;
57 }
58 }
59