Coverage Report

Coverage Report - org.crosswire.jsword.index.lucene.analysis.PersianLuceneAnalyzer

Classes in this File

Line Coverage

Branch Coverage

Complexity

PersianLuceneAnalyzer

0/22

0/10

2.667

 /**
  * Distribution License:
  * JSword is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License, version 2.1 or later
  * as published by the Free Software Foundation. This program is distributed
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  * See the GNU Lesser General Public License for more details.
  *
  * The License is available on the internet at:
  *      http://www.gnu.org/copyleft/lgpl.html
  * or by writing to:
  *      Free Software Foundation, Inc.
  *      59 Temple Place - Suite 330
  *      Boston, MA 02111-1307, USA
  *
  * © CrossWire Bible Society, 2009 - 2016
  *
  */
 package org.crosswire.jsword.index.lucene.analysis;
 
 import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.fa.PersianAnalyzer;
 import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
 import org.apache.lucene.util.Version;
 
 /**
  * An Analyzer whose {@link TokenStream} is built from a
  * {@link ArabicLetterTokenizer} filtered with {@link LowerCaseFilter},
  * {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} and
  * Persian {@link StopFilter} (optional)
  * 
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
  * @author DM Smith
  */
 public class PersianLuceneAnalyzer extends AbstractBookAnalyzer {
     public PersianLuceneAnalyzer() {
         stopSet = PersianAnalyzer.getDefaultStopSet();
     }
 
     /*
      * (non-Javadoc)
      * 
      * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
      * java.io.Reader)
      */
     @Override
     public final TokenStream tokenStream(String fieldName, Reader reader) {
         TokenStream result = new ArabicLetterTokenizer(reader);
         result = new LowerCaseFilter(result);
         result = new ArabicNormalizationFilter(result);
         /* additional persian-specific normalization */
         result = new PersianNormalizationFilter(result);
         /*
          * the order here is important: the stop set is normalized with the
          * above!
          */
         if (doStopWords && stopSet != null) {
             result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
         }
 
         return result;
     }
 
     /**
      * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
      * text in the provided {@link Reader}.
      * 
      * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
      *         filtered with {@link LowerCaseFilter},
      *         {@link ArabicNormalizationFilter},
      *         {@link PersianNormalizationFilter} and Persian Stop words
      */
     @Override
     public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
         SavedStreams streams = (SavedStreams) getPreviousTokenStream();
         if (streams == null) {
             streams = new SavedStreams(new ArabicLetterTokenizer(reader));
             streams.setResult(new LowerCaseFilter(streams.getResult()));
             streams.setResult(new ArabicNormalizationFilter(streams.getResult()));
             /* additional persian-specific normalization */
             streams.setResult(new PersianNormalizationFilter(streams.getResult()));
             /*
              * the order here is important: the stop set is normalized with the
              * above!
              */
             if (doStopWords && stopSet != null) {
                 streams.setResult(new StopFilter(false, streams.getResult(), stopSet));
             }
             setPreviousTokenStream(streams);
         } else {
             streams.getSource().reset(reader);
         }
         return streams.getResult();
     }
     private final Version matchVersion = Version.LUCENE_29;
 }

1		/**
2		* Distribution License:
3		* JSword is free software; you can redistribute it and/or modify it under
4		* the terms of the GNU Lesser General Public License, version 2.1 or later
5		* as published by the Free Software Foundation. This program is distributed
6		* in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7		* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8		* See the GNU Lesser General Public License for more details.
9		*
10		* The License is available on the internet at:
11		* http://www.gnu.org/copyleft/lgpl.html
12		* or by writing to:
13		* Free Software Foundation, Inc.
14		* 59 Temple Place - Suite 330
15		* Boston, MA 02111-1307, USA
16		*
17		* © CrossWire Bible Society, 2009 - 2016
18		*
19		*/
20		package org.crosswire.jsword.index.lucene.analysis;
21
22		import java.io.IOException;
23		import java.io.Reader;
24
25		import org.apache.lucene.analysis.LowerCaseFilter;
26		import org.apache.lucene.analysis.StopFilter;
27		import org.apache.lucene.analysis.TokenStream;
28		import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
29		import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
30		import org.apache.lucene.analysis.fa.PersianAnalyzer;
31		import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
32		import org.apache.lucene.util.Version;
33
34		/**
35		* An Analyzer whose {@link TokenStream} is built from a
36		* {@link ArabicLetterTokenizer} filtered with {@link LowerCaseFilter},
37		* {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} and
38		* Persian {@link StopFilter} (optional)
39		*
40		* @see gnu.lgpl.License The GNU Lesser General Public License for details.
41		* @author DM Smith
42		*/
43		public class PersianLuceneAnalyzer extends AbstractBookAnalyzer {
44	0	public PersianLuceneAnalyzer() {
45	0	stopSet = PersianAnalyzer.getDefaultStopSet();
46	0	}
47
48		/*
49		* (non-Javadoc)
50		*
51		* @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
52		* java.io.Reader)
53		*/
54		@Override
55		public final TokenStream tokenStream(String fieldName, Reader reader) {
56	0	TokenStream result = new ArabicLetterTokenizer(reader);
57	0	result = new LowerCaseFilter(result);
58	0	result = new ArabicNormalizationFilter(result);
59		/* additional persian-specific normalization */
60	0	result = new PersianNormalizationFilter(result);
61		/*
62		* the order here is important: the stop set is normalized with the
63		* above!
64		*/
65	0	if (doStopWords && stopSet != null) {
66	0	result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
67		}
68
69	0	return result;
70		}
71
72		/**
73		* Returns a (possibly reused) {@link TokenStream} which tokenizes all the
74		* text in the provided {@link Reader}.
75		*
76		* @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
77		* filtered with {@link LowerCaseFilter},
78		* {@link ArabicNormalizationFilter},
79		* {@link PersianNormalizationFilter} and Persian Stop words
80		*/
81		@Override
82		public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
83	0	SavedStreams streams = (SavedStreams) getPreviousTokenStream();
84	0	if (streams == null) {
85	0	streams = new SavedStreams(new ArabicLetterTokenizer(reader));
86	0	streams.setResult(new LowerCaseFilter(streams.getResult()));
87	0	streams.setResult(new ArabicNormalizationFilter(streams.getResult()));
88		/* additional persian-specific normalization */
89	0	streams.setResult(new PersianNormalizationFilter(streams.getResult()));
90		/*
91		* the order here is important: the stop set is normalized with the
92		* above!
93		*/
94	0	if (doStopWords && stopSet != null) {
95	0	streams.setResult(new StopFilter(false, streams.getResult(), stopSet));
96		}
97	0	setPreviousTokenStream(streams);
98		} else {
99	0	streams.getSource().reset(reader);
100		}
101	0	return streams.getResult();
102		}
103	0	private final Version matchVersion = Version.LUCENE_29;
104		}