[jsword-svn] r1865 - in trunk/jsword/src: main/java/org/crosswire/jsword/book main/java/org/crosswire/jsword/book/study main/java/org/crosswire/jsword/index/lucene main/java/org/crosswire/jsword/index/lucene/analysis test/java/org/crosswire/jsword/index/lucene/analysis

dmsmith at www.crosswire.org dmsmith at www.crosswire.org
Sat May 17 19:15:03 MST 2008


Author: dmsmith
Date: 2008-05-17 19:15:02 -0700 (Sat, 17 May 2008)
New Revision: 1865

Added:
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookTokenFilter.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyFilter.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/LuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberFilter.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefFilter.java
Removed:
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java
Modified:
   trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
   trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java
   trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java
   trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java
   trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java
   trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java
   trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java
   trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java
Log:
Changes to indexing:
Refactored analyzers into analyzer package.
Stemming is now enabled
Strong's numbers now index H19a and the like
Made analyzers and filters take a book argument and not a language.
Optimized filters to reuse tokens.

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -115,7 +115,7 @@
     public static final String HI_UNDERLINE = "underline"; //$NON-NLS-1$
 
     /**
-     * Constant for rendering uppercase text
+     * Constant for rendering upper case text
      */
     public static final String HI_X_CAPS = "x-caps"; //$NON-NLS-1$
 
@@ -704,37 +704,19 @@
             String attr = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_LEMMA);
             if (attr != null)
             {
-                if (buffer.length() > 0)
+                Matcher matcher = strongsNumberPattern.matcher(attr);
+                while (matcher.find())
                 {
-                    buffer.append(' ');
+                    String strongsNum = matcher.group(1);
+                    if (buffer.length() > 0)
+                    {
+                        buffer.append(' ');
+                    }
+                    buffer.append(strongsNum);
                 }
-
-                buffer.append(attr);
             }
         }
 
-        String lemmas = buffer.toString();
-
-        // Clear out the buffer for re-use
-        int len = buffer.length();
-        if (len > 0)
-        {
-            buffer.delete(0, len);
-        }
-
-        Matcher matcher = strongsNumberPattern.matcher(lemmas);
-        while (matcher.find())
-        {
-            String strongType = matcher.group(1);
-            String strongsNum = matcher.group(2);
-            if (buffer.length() > 0)
-            {
-                buffer.append(' ');
-            }
-            buffer.append(strongType);
-            buffer.append(strongsNum);
-        }
-
         return buffer.toString().trim();
     }
 
@@ -1204,6 +1186,6 @@
         }
     }
 
-    private static String strongsNumber = "strong:([GH])0*([0-9]+)"; //$NON-NLS-1$
+    private static String strongsNumber = "strong:([GgHh][0-9]+!?[A-Za-z]*)"; //$NON-NLS-1$
     private static Pattern strongsNumberPattern = Pattern.compile(strongsNumber);
 }

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -69,13 +69,26 @@
      */
     public StrongsNumber(char language, short strongsNumber) throws BookException
     {
+        this(language, strongsNumber, null);
+    }
+
+    /**
+     * Build an immutable Strong's Number.
+     * If the language is not 'G' or 'H' or the number is invalid, a BookException.
+     * @param language
+     * @param strongsNumber
+     * @throws BookException
+     */
+    public StrongsNumber(char language, short strongsNumber, String part) throws BookException
+    {
         this.language = language;
         this.strongsNumber = strongsNumber;
+        this.part = part;
         validate();
     }
 
     /**
-     * Return the canonical form of a Strong's Number.
+     * Return the canonical form of a Strong's Number, without the part.
      * @return the strongsNumber
      */
     public String getStrongsNumber()
@@ -86,6 +99,72 @@
         return buf.toString();
     }
 
+    /**
+     * Return the canonical form of a Strong's Number, with the part, if any
+     * @return the strongsNumber
+     */
+    public String getFullStrongsNumber()
+    {
+        StringBuffer buf = new StringBuffer(5);
+        buf.append(language);
+        buf.append(ZERO_PAD.format(strongsNumber));
+        if (part != null)
+        {
+            buf.append(part);
+        }
+        return buf.toString();
+    }
+
+    /**
+     * @return true if the Strong's number is for Greek
+     */
+    public boolean isGreek()
+    {
+        return language == 'G';
+    }
+
+    /**
+     * @return true if the Strong's number is for Hebrew
+     */
+    public boolean isHebrew()
+    {
+        return language == 'G';
+    }
+
+    /**
+     * @return true if this Strong's number is identified by a sub part
+     */
+    public boolean isPart()
+    {
+        return part != null;
+    }
+
+    /**
+     * Validates the number portion of this StrongsNumber.
+     * Hebrew Strong's numbers are in the range of: 1-8674
+     * Greek Strong's numbers in the range of: 1-5624 (but not 1418, 2717, 3203-3302, 4452)
+     * @return true if the Strong's number is in range.
+     */
+    public boolean isValid()
+    {
+        if (language == 'H' && (strongsNumber < 1 || strongsNumber > 8674))
+        {
+            return false;
+        }
+
+        if (language == 'G'
+            && (strongsNumber < 0
+                            || strongsNumber > 5624
+                            || strongsNumber == 1418
+                            || strongsNumber == 2717
+                            || (strongsNumber >= 3203 || strongsNumber <= 3302)
+                            || strongsNumber == 4452))
+        {
+            return false;
+        }
+        return true;
+    }
+
     /* (non-Javadoc)
      * @see java.lang.Object#hashCode()
      */
@@ -155,6 +234,9 @@
 
         // Get the number after the G or H
         strongsNumber = Short.parseShort(m.group(2));
+
+        // FYI: OSIS refers to what follows a ! as a grain
+        part = m.group(3);
     }
 
     private void validate() throws BookException
@@ -163,24 +245,6 @@
         {
             throw new BookException(UserMsg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
         }
-
-        // Greek Strong's numbers are in the range of: 1-8674
-        if (language == 'H' && (strongsNumber < 1 || strongsNumber > 8674))
-        {
-            throw new BookException(UserMsg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
-        }
-
-        // Greek Strong's numbers are in the range of: 1-5624 (but not 1418, 2717, 3203-3302, 4452)
-        if (language == 'G'
-            && (strongsNumber < 0
-                            || strongsNumber > 5624
-                            || strongsNumber == 1418
-                            || strongsNumber == 2717
-                            || (strongsNumber >= 3203 || strongsNumber <= 3302)
-                            || strongsNumber == 4452))
-        {
-            throw new BookException(UserMsg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
-        }
     }
 
     /**
@@ -194,8 +258,13 @@
     private short strongsNumber;
 
     /**
-     * The pattern of an acceptable strongs number.
+     * The part if any.
      */
-    private static final Pattern STRONGS_PATTERN = Pattern.compile("([GgHh])([0-9]+)"); //$NON-NLS-1$
+    private String part;
+
+    /**
+     * The pattern of an acceptable Strong's number.
+     */
+    private static final Pattern STRONGS_PATTERN = Pattern.compile("([GgHh])0*([1-9][0-9]*)!?([A-Za-z]+)?"); //$NON-NLS-1$
     private static final DecimalFormat ZERO_PAD = new DecimalFormat("0000"); //$NON-NLS-1$
 }

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -82,7 +82,7 @@
     public static final String   LATEST_INDEX_VERSION = "Latest.Index.Version";               //$NON-NLS-1$
     public static final String   LUCENE_VERSION       = "Lucene.Version";                     //$NON-NLS-1$
     public static final float    INDEX_VERSION_1_1    = 1.1f;
-    public static final float    INDEX_VERSION_1_2    = 1.1f;
+    public static final float    INDEX_VERSION_1_2    = 1.2f;
 
     private static final Logger  log                  = Logger.getLogger(IndexMetadata.class);
     private static IndexMetadata myInstance;

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,4 +1,4 @@
 
-Installed.Index.Version=1.1
+Installed.Index.Version=1.2
 Latest.Index.Version=1.2
 Lucene.Version=2.3

Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,46 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- *       http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- *      Free Software Foundation, Inc.
- *      59 Temple Place - Suite 330
- *      Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- *     The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordTokenizer;
-import org.apache.lucene.analysis.TokenStream;
-
-/**
- * A specialized analyzer that normalizes Strong's Numbers.
- *
- * @see gnu.lgpl.License for license details.
- *      The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class KeyAnalyzer extends Analyzer
-{
-    /* (non-Javadoc)
-     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
-     */
-    public TokenStream tokenStream(String fieldName, Reader reader)
-    {
-        return new KeyFilter(new KeywordTokenizer(reader));
-    }
-}

Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,55 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- *       http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- *      Free Software Foundation, Inc.
- *      59 Temple Place - Suite 330
- *      Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- *     The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
-/**
- * A KeyFilter normalizes Key.
- *
- * @see gnu.lgpl.License for license details.
- *      The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class KeyFilter extends TokenFilter
-{
-    /**
-     * Construct filtering <i>in</i>.
-     */
-    public KeyFilter(TokenStream in)
-    {
-      super(in);
-    }
-
-    /* (non-Javadoc)
-     * @see org.apache.lucene.analysis.TokenStream#next()
-     */
-    public final Token next() throws IOException
-    {
-        // TODO(DMS): actually normalize
-        return input.next();
-    }
-}

Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,76 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- *       http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- *      Free Software Foundation, Inc.
- *      59 Temple Place - Suite 330
- *      Boston, MA 02111-1307, USA
- *
- * Copyright: 2005
- *     The copyright to this program is held by it's authors.
- *
- * ID: $Id:LuceneIndex.java 984 2006-01-23 14:18:33 -0500 (Mon, 23 Jan 2006) dmsmith $
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.crosswire.jsword.index.lucene.analysis.AnalyzerFactory;
-
-/**
- * A specialized analyzer for Books that analyzes different fields differently.
- * Uses AnalyzerFactory for InstalledIndexVersion > 1.1
- * @see gnu.lgpl.License for license details.
- *      The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class LuceneAnalyzer extends Analyzer
-{
-
-    public LuceneAnalyzer()
-    {
-        this(AnalyzerFactory.DEFAULT_ID);
-    }
-
-    public LuceneAnalyzer(String naturalLanguageID)
-    {
-        // The default analysis
-        analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
-
-        if (IndexMetadata.instance().getInstalledIndexVersion() > IndexMetadata.INDEX_VERSION_1_1)
-        {
-            // Content is analyzed using natural language analyzer
-            // (stemming, stopword etc)
-            Analyzer myNaturalLanguageAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(naturalLanguageID);
-            analyzer.addAnalyzer(LuceneIndex.FIELD_BODY, myNaturalLanguageAnalyzer);
-        }
-
-        // Keywords are normalized to osisIDs
-        analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
-
-        // Strong's Numbers are normalized to a consistent representation
-        analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG, new StrongsNumberAnalyzer());
-
-        // XRefs are normalized from ranges into a list of osisIDs
-        analyzer.addAnalyzer(LuceneIndex.FIELD_XREF, new XRefAnalyzer());
-    }
-
-    public TokenStream tokenStream(String fieldName, Reader reader)
-    {
-        return analyzer.tokenStream(fieldName, reader);
-    }
-
-    private PerFieldAnalyzerWrapper analyzer;
-}

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -56,6 +56,7 @@
 import org.crosswire.jsword.book.OSISUtil;
 import org.crosswire.jsword.index.AbstractIndex;
 import org.crosswire.jsword.index.IndexStatus;
+import org.crosswire.jsword.index.lucene.analysis.LuceneAnalyzer;
 import org.crosswire.jsword.index.search.SearchModifier;
 import org.crosswire.jsword.passage.AbstractPassage;
 import org.crosswire.jsword.passage.Key;
@@ -74,7 +75,40 @@
  */
 public class LuceneIndex extends AbstractIndex implements Activatable
 {
+    /* The following fields are named the same as Sword in the hopes of
+     * sharing indexes.
+     */
     /**
+     * The Lucene field for the osisID
+     */
+    public static final String FIELD_KEY = "key"; //$NON-NLS-1$
+
+    /**
+     * The Lucene field for the text contents
+     */
+    public static final String FIELD_BODY = "content"; //$NON-NLS-1$
+
+    /**
+     * The Lucene field for the strong numbers
+     */
+    public static final String FIELD_STRONG = "strong"; //$NON-NLS-1$
+
+    /**
+     * The Lucene field for headings
+     */
+    public static final String FIELD_HEADING = "heading"; //$NON-NLS-1$
+
+    /**
+     * The Lucene field for cross references
+     */
+    public static final String FIELD_XREF = "xref"; //$NON-NLS-1$
+
+    /**
+     * The Lucene field for the notes
+     */
+    public static final String FIELD_NOTE = "note"; //$NON-NLS-1$
+
+    /**
      * Read an existing index and use it.
      * @throws BookException If we fail to read the index files
      */
@@ -119,8 +153,7 @@
 
         IndexStatus finalStatus = IndexStatus.UNDONE;
 
-        String bookLang = book.getLanguage().getName();
-        Analyzer analyzer = new LuceneAnalyzer(bookLang);
+        Analyzer analyzer = new LuceneAnalyzer(book);
 
         List errors = new ArrayList();
         File tempPath = new File(path + '.' + IndexStatus.CREATING.toString());
@@ -207,8 +240,7 @@
         {
             try
             {
-                String bookLang = book.getLanguage().getName();
-                Analyzer analyzer = new LuceneAnalyzer(bookLang);
+                Analyzer analyzer = new LuceneAnalyzer(book);
 
                 QueryParser parser = new QueryParser(LuceneIndex.FIELD_BODY, analyzer);
                 parser.setAllowLeadingWildcard(true);
@@ -479,40 +511,7 @@
      */
     private static final Logger log = Logger.getLogger(LuceneIndex.class);
 
-    /* The following fields are named the same as Sword in the hopes of
-     * sharing indexes.
-     */
     /**
-     * The Lucene field for the osisID
-     */
-    protected static final String FIELD_KEY = "key"; //$NON-NLS-1$
-
-    /**
-     * The Lucene field for the text contents
-     */
-    protected static final String FIELD_BODY = "content"; //$NON-NLS-1$
-
-    /**
-     * The Lucene field for the strong numbers
-     */
-    protected static final String FIELD_STRONG = "strong"; //$NON-NLS-1$
-
-    /**
-     * The Lucene field for headings
-     */
-    protected static final String FIELD_HEADING = "heading"; //$NON-NLS-1$
-
-    /**
-     * The Lucene field for cross references
-     */
-    protected static final String FIELD_XREF = "xref"; //$NON-NLS-1$
-
-    /**
-     * The Lucene field for the notes
-     */
-    protected static final String FIELD_NOTE = "note"; //$NON-NLS-1$
-
-    /**
      * The Book that we are indexing
      */
     protected Book book;

Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,46 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- *       http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- *      Free Software Foundation, Inc.
- *      59 Temple Place - Suite 330
- *      Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- *     The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
-
-/**
- * A specialized analyzer that normalizes JSword keys.
- *
- * @see gnu.lgpl.License for license details.
- *      The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class StrongsNumberAnalyzer extends Analyzer
-{
-    /* (non-Javadoc)
-     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
-     */
-    public TokenStream tokenStream(String fieldName, Reader reader)
-    {
-        return new StrongsNumberFilter(new WhitespaceTokenizer(reader));
-    }
-}

Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,76 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- *       http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- *      Free Software Foundation, Inc.
- *      59 Temple Place - Suite 330
- *      Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- *     The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.crosswire.jsword.book.BookException;
-import org.crosswire.jsword.book.DataPolice;
-import org.crosswire.jsword.book.study.StrongsNumber;
-
-/**
- * A StrongsNumberFilter normalizes Strong's Numbers.
- *
- * @see gnu.lgpl.License for license details.
- *      The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class StrongsNumberFilter extends TokenFilter
-{
-    /**
-     * Construct filtering <i>in</i>.
-     */
-    public StrongsNumberFilter(TokenStream in)
-    {
-      super(in);
-    }
-
-    /* (non-Javadoc)
-     * @see org.apache.lucene.analysis.TokenStream#next()
-     */
-    public final Token next() throws IOException
-    {
-        Token token = input.next();
-        if (token == null)
-        {
-            return null;
-        }
-
-        try
-        {
-            String s = new StrongsNumber(token.termText()).getStrongsNumber();
-            if (!s.equals(token.termText()))
-            {
-                token.setTermText(s);
-            }
-        }
-        catch (BookException e)
-        {
-            DataPolice.report(e.getDetailedMessage());
-        }
-
-        return token;
-    }
-}

Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,46 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- *       http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- *      Free Software Foundation, Inc.
- *      59 Temple Place - Suite 330
- *      Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- *     The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
-
-/**
- * A specialized analyzer that normalizes Strong's Numbers.
- *
- * @see gnu.lgpl.License for license details.
- *      The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class XRefAnalyzer extends Analyzer
-{
-    /* (non-Javadoc)
-     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
-     */
-    public TokenStream tokenStream(String fieldName, Reader reader)
-    {
-        return new KeyFilter(new WhitespaceTokenizer(reader));
-    }
-}

Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,55 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- *       http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- *      Free Software Foundation, Inc.
- *      59 Temple Place - Suite 330
- *      Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- *     The copyright to this program is held by it's authors.
- *
- * ID: $Id$
- */
-package org.crosswire.jsword.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
-/**
- * A KeyFilter normalizes OSISrefs.
- *
- * @see gnu.lgpl.License for license details.
- *      The copyright to this program is held by it's authors.
- * @author DM Smith [dmsmith555 at yahoo dot com]
- */
-public class XRefFilter extends TokenFilter
-{
-    /**
-     * Construct filtering <i>in</i>.
-     */
-    public XRefFilter(TokenStream in)
-    {
-      super(in);
-    }
-
-    /* (non-Javadoc)
-     * @see org.apache.lucene.analysis.TokenStream#next()
-     */
-    public final Token next() throws IOException
-    {
-        // TODO(DMS): actually normalize
-        return input.next();
-    }
-}

Deleted: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -1,88 +0,0 @@
-/**
- * Distribution License:
- * JSword is free software; you can redistribute it and/or modify it under
- * the terms of the GNU Lesser General Public License, version 2.1 as published by
- * the Free Software Foundation. This program is distributed in the hope
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the
- * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU Lesser General Public License for more details.
- *
- * The License is available on the internet at:
- *       http://www.gnu.org/copyleft/lgpl.html
- * or by writing to:
- *      Free Software Foundation, Inc.
- *      59 Temple Place - Suite 330
- *      Boston, MA 02111-1307, USA
- *
- * Copyright: 2007
- *     The copyright to this program is held by it's authors.
- *
- * ID: $Id:  $
- */
-package org.crosswire.jsword.index.lucene.analysis;
-
-import java.util.Set;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.StopFilter;
-
-/**
- * Base class for Analyzers. 
- * Note: All analyzers configured in AnalyzerFactory.properties should be of this type
- *
- * @see gnu.lgpl.License for license details.<br>
- *      The copyright to this program is held by it's authors.
- * @author sijo cherian [sijocherian at yahoo dot com]
- */
-public abstract class AbstractAnalyzer extends Analyzer
-{
-
-    public AbstractAnalyzer()
-    {
-        doStopWords = false;
-        doStemming = true;
-        naturalLanguage = null;
-    }
-
-    public void setDoStopWords(boolean doIt)
-    {
-        doStopWords = doIt;
-    }
-
-    public boolean getDoStopWords()
-    {
-        return doStopWords;
-    }
-
-    public void setStopWords(String[] stopWords)
-    {
-        stopSet = StopFilter.makeStopSet(stopWords);
-    }
-
-    public void setDoStemming(boolean stemming)
-    {
-        doStemming = stemming;
-    }
-
-    public void setNaturalLanguage(String lang)
-    {
-        naturalLanguage = lang;
-    }
-
-    public String getNaturalLanguage()
-    {
-        return naturalLanguage;
-    }
-
-    protected Set     stopSet;
-
-    // for turning on/off stopword removal during analysis
-    protected boolean doStopWords;
-
-    // for turning on/off stemming
-    protected boolean doStemming;
-
-    // Natural language of text that is being analyzed (optional parameter)
-    protected String  naturalLanguage;
-
-}

Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookAnalyzer.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,117 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id:  $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * Base class for Analyzers. 
+ * Note: All analyzers configured in AnalyzerFactory.properties should be of this type
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ *      The copyright to this program is held by it's authors.
+ * @author sijo cherian [sijocherian at yahoo dot com]
+ */
+public abstract class AbstractBookAnalyzer extends Analyzer
+{
+
+    public AbstractBookAnalyzer()
+    {
+        this(null);
+    }
+
+    public AbstractBookAnalyzer(Book book)
+    {
+        this.book = book;
+        doStopWords = false;
+        doStemming = true;
+        naturalLanguage = null;
+    }
+
+    /**
+     * The book for which analysis is being performed.
+     * @param newBook
+     */
+    public void setBook(Book newBook)
+    {
+        book = newBook;
+    }
+
+    /**
+     * @return the book for which analysis is being performed.
+     */
+    public Book getBook()
+    {
+        return book;
+    }
+
+    public void setDoStopWords(boolean doIt)
+    {
+        doStopWords = doIt;
+    }
+
+    public boolean getDoStopWords()
+    {
+        return doStopWords;
+    }
+
+    public void setStopWords(String[] stopWords)
+    {
+        stopSet = StopFilter.makeStopSet(stopWords);
+    }
+
+    public void setDoStemming(boolean stemming)
+    {
+        doStemming = stemming;
+    }
+
+    public void setNaturalLanguage(String lang)
+    {
+        naturalLanguage = lang;
+    }
+
+    public String getNaturalLanguage()
+    {
+        return naturalLanguage;
+    }
+
+    /**
+     * The book against which analysis is performed.
+     */
+    protected Book    book;
+
+    protected Set     stopSet;
+
+    // for turning on/off stop word removal during analysis
+    protected boolean doStopWords;
+
+    // for turning on/off stemming
+    protected boolean doStemming;
+
+    // Natural language of text that is being analyzed (optional parameter)
+    protected String  naturalLanguage;
+
+}

Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookTokenFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookTokenFilter.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractBookTokenFilter.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,76 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2008
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: org.eclipse.jdt.ui.prefs 1178 2006-11-06 12:48:02Z dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * An AbstractBookTokenFilter ties a Lucene TokenFilter to a Book.
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class AbstractBookTokenFilter extends TokenFilter
+{
+
+    /**
+     * Create a TokenFilter not tied to a Book.
+     * 
+     * @param input the token stream to filter
+     */
+    public AbstractBookTokenFilter(TokenStream input)
+    {
+        this(null, input);
+    }
+
+    /**
+     * Create a TokenFilter tied to a Book.
+     * 
+     * @param input the token stream to filter
+     */
+    public AbstractBookTokenFilter(Book book, TokenStream input)
+    {
+        super(input);
+        this.book = book;
+    }
+
+    /**
+     * @return the book
+     */
+    public Book getBook()
+    {
+        return book;
+    }
+
+    /**
+     * @param book the book to set
+     */
+    public void setBook(Book book)
+    {
+        this.book = book;
+    }
+
+    private Book book;
+}

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -27,6 +27,7 @@
 import org.crosswire.common.util.ClassUtil;
 import org.crosswire.common.util.Logger;
 import org.crosswire.common.util.ResourceUtil;
+import org.crosswire.jsword.book.Book;
 
 /**
  * A factory creating the appropriate Analyzer for natural language analysis of text for Lucene 
@@ -45,9 +46,10 @@
  */
 public class AnalyzerFactory
 {
-    public AbstractAnalyzer createAnalyzer(String lang)
+    public AbstractBookAnalyzer createAnalyzer(Book book)
     {
-        AbstractAnalyzer newObject = null;
+        AbstractBookAnalyzer newObject = null;
+        String lang = book == null ? null : book.getLanguage().getName();
         if (lang != null)
         {
             String adjustLang = lang;
@@ -67,7 +69,7 @@
                 {
                     Class impl = ClassUtil.forName(aClass);
 
-                    newObject = (AbstractAnalyzer) impl.newInstance();
+                    newObject = (AbstractBookAnalyzer) impl.newInstance();
                 }
                 catch (ClassNotFoundException e)
                 {
@@ -90,6 +92,7 @@
         }
 
         // Configure the analyzer
+        newObject.setBook(book);
         newObject.setDoStemming(getDefaultStemmingProperty());
         newObject.setDoStopWords(getDefaultStopWordProperty());
         newObject.setNaturalLanguage(lang);

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -38,7 +38,7 @@
  *      The copyright to this program is held by it's authors.
  * @author Sijo Cherian [sijocherian at yahoo dot com]
  */
-public class ChineseLuceneAnalyzer extends AbstractAnalyzer
+public class ChineseLuceneAnalyzer extends AbstractBookAnalyzer
 {
     public ChineseLuceneAnalyzer()
     {

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -64,7 +64,7 @@
  *      The copyright to this program is held by it's authors.
  * @author sijo cherian [sijocherian at yahoo dot com]
  */
-public class ConfigurableSnowballAnalyzer extends AbstractAnalyzer
+public class ConfigurableSnowballAnalyzer extends AbstractBookAnalyzer
 {
     public ConfigurableSnowballAnalyzer()
     {

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -35,7 +35,7 @@
  *      The copyright to this program is held by it's authors.
  * @author Sijo Cherian [sijocherian at yahoo dot com]
  */
-public class CzechLuceneAnalyzer extends AbstractAnalyzer
+public class CzechLuceneAnalyzer extends AbstractBookAnalyzer
 {
     public CzechLuceneAnalyzer()
     {

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -39,7 +39,7 @@
  *      The copyright to this program is held by it's authors.
  * @author sijo cherian [sijocherian at yahoo dot com]
  */
-public class EnglishLuceneAnalyzer extends AbstractAnalyzer
+public class EnglishLuceneAnalyzer extends AbstractBookAnalyzer
 {
 
     public EnglishLuceneAnalyzer()

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -36,7 +36,7 @@
  *      The copyright to this program is held by it's authors.
  * @author Sijo Cherian [sijocherian at yahoo dot com]
  */
-public class GermanLuceneAnalyzer extends AbstractAnalyzer
+public class GermanLuceneAnalyzer extends AbstractBookAnalyzer
 {
 
     public TokenStream tokenStream(String fieldName, Reader reader)

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -35,7 +35,7 @@
  *      The copyright to this program is held by it's authors.
  * @author Sijo Cherian [sijocherian at yahoo dot com]
  */
-public class GreekLuceneAnalyzer extends AbstractAnalyzer
+public class GreekLuceneAnalyzer extends AbstractBookAnalyzer
 {
     public GreekLuceneAnalyzer()
     {

Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyAnalyzer.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,61 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A specialized analyzer that normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class KeyAnalyzer extends AbstractBookAnalyzer
+{
+    /**
+     * Construct a default KeyAnalyzer.
+     */
+    public KeyAnalyzer()
+    {
+    }
+
+    /**
+     * Construct an KeyAnalyzer tied to a book.
+     */
+    public KeyAnalyzer(Book book)
+    {
+        setBook(book);
+    }
+    
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+     */
+    public TokenStream tokenStream(String fieldName, Reader reader)
+    {
+        return new KeyFilter(getBook(), new KeywordTokenizer(reader));
+    }
+}

Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyFilter.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyFilter.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/KeyFilter.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,66 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A KeyFilter normalizes Key.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class KeyFilter extends AbstractBookTokenFilter
+{
+    /**
+     * Construct a KeyFilter not tied to a Book.
+     * @param in the input TokenStream
+     */
+    public KeyFilter(TokenStream in)
+    {
+      this(null, in);
+    }
+
+    /**
+     * Construct a KeyFilter tied to a Book.
+     * @param book the book to which this TokenFilter is tied.
+     * @param in the input TokenStream
+     */
+    public KeyFilter(Book book, TokenStream in)
+    {
+      super(book, in);
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis.Token)
+     */
+    public final Token next(Token result) throws IOException
+    {
+        // TODO(DMS): actually normalize
+        return input.next(result);
+    }
+}

Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/LuceneAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/LuceneAnalyzer.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/LuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,75 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id:LuceneIndex.java 984 2006-01-23 14:18:33 -0500 (Mon, 23 Jan 2006) dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+import org.crosswire.jsword.index.lucene.IndexMetadata;
+import org.crosswire.jsword.index.lucene.LuceneIndex;
+
+/**
+ * A specialized analyzer for Books that analyzes different fields differently.
+ * This is book specific since it is possible that each book has specialized search requirements.
+ * 
+ * Uses AnalyzerFactory for InstalledIndexVersion > 1.1
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class LuceneAnalyzer extends Analyzer
+{
+
+    public LuceneAnalyzer(Book book)
+    {
+        // The default analysis
+        analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
+
+        if (IndexMetadata.instance().getInstalledIndexVersion() > IndexMetadata.INDEX_VERSION_1_1)
+        {
+            // Content is analyzed using natural language analyzer
+            // (stemming, stopword etc)
+            Analyzer myNaturalLanguageAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(book);
+            analyzer.addAnalyzer(LuceneIndex.FIELD_BODY, myNaturalLanguageAnalyzer);
+        }
+
+        // Keywords are normalized to osisIDs
+        analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
+
+        // Strong's Numbers are normalized to a consistent representation
+        analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG, new StrongsNumberAnalyzer());
+
+        // XRefs are normalized from ranges into a list of osisIDs
+        analyzer.addAnalyzer(LuceneIndex.FIELD_XREF, new XRefAnalyzer());
+    }
+
+    public TokenStream tokenStream(String fieldName, Reader reader)
+    {
+        return analyzer.tokenStream(fieldName, reader);
+    }
+
+    private PerFieldAnalyzerWrapper analyzer;
+}

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -43,7 +43,7 @@
  *      The copyright to this program is held by it's authors.
  * @author Sijo Cherian [sijocherian at yahoo dot com]
  */
-public class SimpleLuceneAnalyzer extends AbstractAnalyzer
+public class SimpleLuceneAnalyzer extends AbstractBookAnalyzer
 {
 
     public SimpleLuceneAnalyzer()

Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberAnalyzer.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,61 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A specialized analyzer that normalizes JSword keys.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumberAnalyzer extends AbstractBookAnalyzer
+{
+    /**
+     * Construct a default StrongsNumberAnalyzer.
+     */
+    public StrongsNumberAnalyzer()
+    {
+    }
+
+    /**
+     * Construct an StrongsNumberAnalyzer tied to a book.
+     */
+    public StrongsNumberAnalyzer(Book book)
+    {
+        setBook(book);
+    }
+    
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+     */
+    public TokenStream tokenStream(String fieldName, Reader reader)
+    {
+        return new StrongsNumberFilter(getBook(), new WhitespaceTokenizer(reader));
+    }
+}

Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberFilter.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberFilter.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/StrongsNumberFilter.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,110 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.book.DataPolice;
+import org.crosswire.jsword.book.study.StrongsNumber;
+
+/**
+ * A StrongsNumberFilter normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumberFilter extends AbstractBookTokenFilter
+{
+
+    /**
+     * Construct filtering <i>in</i>.
+     */
+    public StrongsNumberFilter(TokenStream in)
+    {
+      this(null, in);
+    }
+
+    /**
+     * Construct filtering <i>in</i>.
+     */
+    public StrongsNumberFilter(Book book, TokenStream in)
+    {
+      super(book, in);
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis.Token)
+     */
+    public final Token next(Token result) throws IOException
+    {
+        // If the token is suffixed with '!a' or 'a', where 'a' is a sequence of 1 or more letters
+        // then create a token without the suffix and also for the whole.
+        Token token = result;
+        if (lastToken == null)
+        {
+            token = input.next(token);
+            if (token == null)
+            {
+                return null;
+            }
+    
+            try
+            {
+                char[] buf = result.termBuffer();
+                String tokenText = new String(buf, 0, result.termLength());
+
+                number = new StrongsNumber(tokenText);
+                String s = number.getStrongsNumber();
+
+                if (!s.equals(tokenText))
+                {
+                    result.setTermBuffer(s.toCharArray(), 0, s.length());
+                }
+    
+                if (number.isPart())
+                {
+                    lastToken = result;
+                }
+            }
+            catch (BookException e)
+            {
+                DataPolice.report(e.getDetailedMessage());
+            }
+        }
+        else
+        {
+            token = lastToken;
+            lastToken = null;
+            String s = number.getFullStrongsNumber();
+            result.setTermBuffer(s.toCharArray(), 0, s.length());
+        }
+        return token;
+    }
+
+    private Token lastToken;
+    private StrongsNumber number;
+}

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -36,7 +36,7 @@
  *      The copyright to this program is held by it's authors.
  * @author sijo cherian [sijocherian at yahoo dot com]
  */
-public class ThaiLuceneAnalyzer extends AbstractAnalyzer
+public class ThaiLuceneAnalyzer extends AbstractBookAnalyzer
 {
 
     public TokenStream tokenStream(String fieldName, Reader reader)

Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefAnalyzer.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefAnalyzer.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefAnalyzer.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,61 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A specialized analyzer that normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class XRefAnalyzer extends AbstractBookAnalyzer
+{
+    /**
+     * Construct a default XRefAnalyzer.
+     */
+    public XRefAnalyzer()
+    {
+    }
+
+    /**
+     * Construct an XRefAnalyzer tied to a book.
+     */
+    public XRefAnalyzer(Book book)
+    {
+        setBook(book);
+    }
+    
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+     */
+    public TokenStream tokenStream(String fieldName, Reader reader)
+    {
+        return new KeyFilter(getBook(), new WhitespaceTokenizer(reader));
+    }
+}

Copied: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefFilter.java (from rev 1853, trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java)
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefFilter.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/XRefFilter.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -0,0 +1,65 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.Book;
+
+/**
+ * A KeyFilter normalizes OSISrefs.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class XRefFilter extends AbstractBookTokenFilter
+{
+    /**
+     * Construct filtering <i>in</i>.
+     */
+    public XRefFilter(TokenStream in)
+    {
+      this(null, in);
+    }
+
+    /**
+     * Construct an XRefFilter tied to a Book.
+     * @param book the book to which this TokenFilter is tied.
+     * @param in the input TokenStream
+     */
+    public XRefFilter(Book book, TokenStream in)
+    {
+      super(book, in);
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis.Token)
+     */
+    public final Token next(Token result) throws IOException
+    {
+        // TODO(DMS): actually normalize
+        return input.next(result);
+    }
+}

Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -59,18 +59,16 @@
      */
     public void testCreateAnalyzer()
     {
-        Analyzer myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(""); //$NON-NLS-1$
+        Analyzer myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(null);
         assertTrue(myAnalyzer!=null);
         
         myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(null);
         assertTrue(myAnalyzer!=null);
-        myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer("Unknown"); //$NON-NLS-1$
-        assertTrue(myAnalyzer!=null);
     }
 
     public void testEngStemming() throws ParseException
     {
-        AbstractAnalyzer myAnalyzer = new EnglishLuceneAnalyzer();
+        AbstractBookAnalyzer myAnalyzer = new EnglishLuceneAnalyzer();
         
         QueryParser parser = new QueryParser(field, myAnalyzer);
         
@@ -105,7 +103,7 @@
 
         System.out.println(query.toString());
     }
-    
+/*    
     public void testLatin1Language() throws ParseException {
         Analyzer myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer("Latin"); //$NON-NLS-1$
         
@@ -124,6 +122,6 @@
         
 
     }
-    
+*/
     protected static final String field = "content"; //$NON-NLS-1$
 }

Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -62,6 +62,6 @@
     }
 
     protected static final String field = "content"; //$NON-NLS-1$
-    private AbstractAnalyzer myAnalyzer;
+    private AbstractBookAnalyzer myAnalyzer;
     private QueryParser parser;
 }

Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -141,6 +141,6 @@
         
     }      
     protected static final String field = "content"; //$NON-NLS-1$
-    private AbstractAnalyzer myAnalyzer;
+    private AbstractBookAnalyzer myAnalyzer;
     private QueryParser parser;    
 }

Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -101,6 +101,6 @@
  
 
     protected static final String field = "content"; //$NON-NLS-1$
-    private AbstractAnalyzer myAnalyzer;
+    private AbstractBookAnalyzer myAnalyzer;
     private QueryParser parser;
 }

Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -64,6 +64,6 @@
     }
 
     protected static final String field = "content"; //$NON-NLS-1$
-    private AbstractAnalyzer myAnalyzer;
+    private AbstractBookAnalyzer myAnalyzer;
     private QueryParser parser;
 }

Modified: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java	2008-05-14 21:55:07 UTC (rev 1864)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java	2008-05-18 02:15:02 UTC (rev 1865)
@@ -74,6 +74,6 @@
        
     
     protected static final String field = "content"; //$NON-NLS-1$
-    private AbstractAnalyzer myAnalyzer;
+    private AbstractBookAnalyzer myAnalyzer;
     private QueryParser parser;
 }




More information about the jsword-svn mailing list