Coverage Report - org.crosswire.jsword.index.lucene.analysis.PersianLuceneAnalyzer
 
Classes in this File Line Coverage Branch Coverage Complexity
PersianLuceneAnalyzer
0%
0/22
0%
0/10
2.667
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2009 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.jsword.index.lucene.analysis;
 21  
 
 22  
 import java.io.IOException;
 23  
 import java.io.Reader;
 24  
 
 25  
 import org.apache.lucene.analysis.LowerCaseFilter;
 26  
 import org.apache.lucene.analysis.StopFilter;
 27  
 import org.apache.lucene.analysis.TokenStream;
 28  
 import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
 29  
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 30  
 import org.apache.lucene.analysis.fa.PersianAnalyzer;
 31  
 import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
 32  
 import org.apache.lucene.util.Version;
 33  
 
 34  
 /**
 35  
  * An Analyzer whose {@link TokenStream} is built from a
 36  
  * {@link ArabicLetterTokenizer} filtered with {@link LowerCaseFilter},
 37  
  * {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} and
 38  
  * Persian {@link StopFilter} (optional)
 39  
  * 
 40  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 41  
  * @author DM Smith
 42  
  */
 43  
 public class PersianLuceneAnalyzer extends AbstractBookAnalyzer {
 44  0
     public PersianLuceneAnalyzer() {
 45  0
         stopSet = PersianAnalyzer.getDefaultStopSet();
 46  0
     }
 47  
 
 48  
     /*
 49  
      * (non-Javadoc)
 50  
      * 
 51  
      * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
 52  
      * java.io.Reader)
 53  
      */
 54  
     @Override
 55  
     public final TokenStream tokenStream(String fieldName, Reader reader) {
 56  0
         TokenStream result = new ArabicLetterTokenizer(reader);
 57  0
         result = new LowerCaseFilter(result);
 58  0
         result = new ArabicNormalizationFilter(result);
 59  
         /* additional persian-specific normalization */
 60  0
         result = new PersianNormalizationFilter(result);
 61  
         /*
 62  
          * the order here is important: the stop set is normalized with the
 63  
          * above!
 64  
          */
 65  0
         if (doStopWords && stopSet != null) {
 66  0
             result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
 67  
         }
 68  
 
 69  0
         return result;
 70  
     }
 71  
 
 72  
     /**
 73  
      * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
 74  
      * text in the provided {@link Reader}.
 75  
      * 
 76  
      * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
 77  
      *         filtered with {@link LowerCaseFilter},
 78  
      *         {@link ArabicNormalizationFilter},
 79  
      *         {@link PersianNormalizationFilter} and Persian Stop words
 80  
      */
 81  
     @Override
 82  
     public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
 83  0
         SavedStreams streams = (SavedStreams) getPreviousTokenStream();
 84  0
         if (streams == null) {
 85  0
             streams = new SavedStreams(new ArabicLetterTokenizer(reader));
 86  0
             streams.setResult(new LowerCaseFilter(streams.getResult()));
 87  0
             streams.setResult(new ArabicNormalizationFilter(streams.getResult()));
 88  
             /* additional persian-specific normalization */
 89  0
             streams.setResult(new PersianNormalizationFilter(streams.getResult()));
 90  
             /*
 91  
              * the order here is important: the stop set is normalized with the
 92  
              * above!
 93  
              */
 94  0
             if (doStopWords && stopSet != null) {
 95  0
                 streams.setResult(new StopFilter(false, streams.getResult(), stopSet));
 96  
             }
 97  0
             setPreviousTokenStream(streams);
 98  
         } else {
 99  0
             streams.getSource().reset(reader);
 100  
         }
 101  0
         return streams.getResult();
 102  
     }
 103  0
     private final Version matchVersion = Version.LUCENE_29;
 104  
 }