[jsword-svn] jsword/java/jsword/org/crosswire/jsword/book/search/lucene s

jswordcvs at crosswire.org jswordcvs at crosswire.org
Sun Apr 24 18:22:25 MST 2005


Update of /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene
In directory www.crosswire.org:/tmp/cvs-serv22949/java/jsword/org/crosswire/jsword/book/search/lucene

Modified Files:
	Msg.java LuceneIndex.java Msg.properties 
Added Files:
	BaseQuery.java ExtendedLuceneSyntax.java AndQuery.java 
	Query.java IndexSearcher.java QueryBuilder.java 
	AndNotQuery.java BlurQuery.java OrQuery.java RangeQuery.java 
	BinaryQuery.java 
Log Message:
Re-implemented the search language to use lucene syntax with range and blur as extensions.

--- NEW FILE: RangeQuery.java ---
package org.crosswire.jsword.book.search.lucene;

import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.passage.Key;
import org.crosswire.jsword.passage.NoSuchKeyException;

/**
 * A range token specifies how a range should be included in the search.
 * It provides a range, a modifier (AND [+] or AND NOT [-]).
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author DM Smith [ dmsmith555 at hotmail dot com]
 * @version $Id: RangeQuery.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class RangeQuery implements Query
{

    /**
     * 
     */
    public RangeQuery(String theRange)
    {
        range = theRange;
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.parse.Query#find(org.crosswire.jsword.book.search.Index)
     */
    public Key find(Index index) throws BookException
    {
        try
        {
            return index.getKey(range);
        }
        catch (NoSuchKeyException e)
        {
            throw new BookException(Msg.ILLEGAL_PASSAGE, e, new Object[] { range });
        }
    }

    /**
     * @return the range
     */
    public String getRange()
    {
        return range;
    }

    private String range;
}

--- NEW FILE: IndexSearcher.java ---
package org.crosswire.jsword.book.search.lucene;

import java.util.Iterator;
import java.util.List;

import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.book.search.SearchRequest;
import org.crosswire.jsword.book.search.Searcher;
import org.crosswire.jsword.book.search.basic.DefaultSearchRequest;
import org.crosswire.jsword.passage.Key;

/**
 * The central interface to all searching.
 *
 * Functionality the I invisage includes:<ul>
 * <li>A simple search syntax that goes something like this.<ul>
 * <li>aaron, moses     (verses containing aaron and moses. Can also use & or +)
 * <li>aaron/moses      (verses containing aaron or moses. Can also use |)
 * <li>aaron - moses    (verses containing aaron but not moses)
 * <li>aaron ~5 , moses (verses with aaron within 5 verses of moses)
 * <li>soundslike aaron (verses with words that sound like aaron. Can also use sl ...)
 * <li>thesaurus happy  (verses with words that mean happy. Can also use th ...)
 * <li>grammar have     (words like has have had and so on. Can also use gr ...)</ul>
 * <li>The ability to add soundslike type extensions.</ul>
 *
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: IndexSearcher.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class IndexSearcher implements Searcher
{
    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.Searcher#init(org.crosswire.jsword.book.search.Index)
     */
    public void init(Index newindex)
    {
        this.index = newindex;
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.Searcher#search(java.lang.String)
     */
    public Key search(String request) throws BookException
    {
        return search(new DefaultSearchRequest(request));
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.Searcher#search(org.crosswire.jsword.book.search.SearchRequest)
     */
    public Key search(SearchRequest request) throws BookException
    {
        index.setSearchModifier(request.getSearchModifier());
        List output = QueryBuilder.tokenize(request.getRequest());
        Key results = search(output);
        index.setSearchModifier(null);
        return results;
    }

    /**
     * Take a search string and decipher it into a Key.
     * @return The matching verses
     */
    protected Key search(List matches) throws BookException
    {
        // Get an empty key
        Key key = index.find(null);
        Iterator iter = matches.iterator();
        while (iter.hasNext())
        {
            Query token = (Query) iter.next();
            key.addAll(token.find(index));
        }
        return key;
    }

    /**
     * Accessor for the Bible to search.
     * @return The current Bible
     */
    protected Index getIndex()
    {
        return index;
    }

    /**
     * The index
     */
    private Index index;
}

Index: Msg.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene/Msg.java,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** Msg.java	24 Mar 2005 03:14:29 -0000	1.12
--- Msg.java	25 Apr 2005 01:22:23 -0000	1.13
***************
*** 39,42 ****
--- 39,43 ----
      static final Msg INSTALL_FAIL = new Msg("LuceneIndex.InstallFailed"); //$NON-NLS-1$
      static final Msg BAD_VERSE = new Msg("LuceneIndex.BadVerse"); //$NON-NLS-1$
+     static final Msg ILLEGAL_PASSAGE = new Msg("RangeQuery.IllegalPassage"); //$NON-NLS-1$
  
      /**

Index: LuceneIndex.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene/LuceneIndex.java,v
retrieving revision 1.15
retrieving revision 1.16
diff -C2 -d -r1.15 -r1.16
*** LuceneIndex.java	5 Apr 2005 22:19:59 -0000	1.15
--- LuceneIndex.java	25 Apr 2005 01:22:23 -0000	1.16
***************
*** 167,171 ****
       * @see org.crosswire.jsword.book.search.Index#findWord(java.lang.String)
       */
!     public Key findWord(String search) throws BookException
      {
          checkActive();
--- 167,171 ----
       * @see org.crosswire.jsword.book.search.Index#findWord(java.lang.String)
       */
!     public Key find(String search) throws BookException
      {
          checkActive();

--- NEW FILE: OrQuery.java ---
package org.crosswire.jsword.book.search.lucene;

import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.passage.Key;

/**
 * An or token specifies that a result needs to be in either the left and the right token.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author DM Smith [ dmsmith555 at hotmail dot com]
 * @version $Id: OrQuery.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class OrQuery extends BinaryQuery
{

    /**
     * 
     */
    public OrQuery(Query theLeftToken, Query theRightToken)
    {
        super(theLeftToken, theRightToken);
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.parse.Query#find(org.crosswire.jsword.book.search.Index)
     */
    public Key find(Index index) throws BookException
    {
        Key left = getLeftToken().find(index);
        Key right = getRightToken().find(index);
        left.addAll(right);
        return left;
    }
}

--- NEW FILE: ExtendedLuceneSyntax.java ---
package org.crosswire.jsword.book.search.lucene;

import org.crosswire.common.util.StringUtil;
import org.crosswire.jsword.book.search.SearchSyntax;

/**
 *
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author DM Smith [ dmsmith555 at hotmail dot com]
 * @version $Id: ExtendedLuceneSyntax.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class ExtendedLuceneSyntax implements SearchSyntax
{
    private char quote = '"';
    private char plus = '+';
    private char minus = '-';
    private char open = '[';
    private char close = ']';
    private char fuzzy = '~';
    private char wild = '*';

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.SearchSyntax#decorateAllWords(java.lang.String)
     */
    public String decorateAllWords(String queryWords)
    {
        String[] words = queryWords.split(SPACE);
        StringBuffer search = new StringBuffer();
        search.append(plus);
        search.append(StringUtil.join(words, SPACE + plus));
        return search.toString();
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.SearchSyntax#decorateAnyWords(java.lang.String)
     */
    public String decorateAnyWords(String queryWords)
    {
        // Don't need to do anything, this is the default behavior
        return queryWords;
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.SearchSyntax#decoratePhrase(java.lang.String)
     */
    public String decoratePhrase(String queryWords)
    {
        // This performs a best match
        StringBuffer search = new StringBuffer();
        search.append(quote);
        search.append(queryWords);
        search.append(quote);
        return search.toString();
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.SearchSyntax#decorateNotWords(java.lang.String)
     */
    public String decorateNotWords(String queryWords)
    {
        String[] words = queryWords.split(SPACE);
        StringBuffer search = new StringBuffer();
        search.append(minus);
        search.append(StringUtil.join(words, SPACE + minus));
        return search.toString();
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.SearchSyntax#decorateRange(java.lang.String)
     */
    public String decorateRange(String queryWords)
    {
        StringBuffer search = new StringBuffer();
        search.append(plus);
        search.append(open);
        search.append(queryWords);
        search.append(close);
        return search.toString();
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.SearchSyntax#decorateSpellWords(java.lang.String)
     */
    public String decorateSpellWords(String queryWords)
    {
        String[] words = queryWords.split(SPACE);
        StringBuffer search = new StringBuffer(StringUtil.join(words, SPACE + fuzzy));
        search.append(fuzzy);
        return search.toString();
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.SearchSyntax#decorateStartWords(java.lang.String)
     */
    public String decorateStartWords(String queryWords)
    {
        String[] words = queryWords.split(SPACE);
        StringBuffer search = new StringBuffer(StringUtil.join(words, SPACE + wild));
        search.append(wild);
        return search.toString();
    }

    /**
     * In our parsing we use space quite a lot and this ensures there is only
     * one and that we don't have lots of NON-NLS comments everywhere
     */
    private static final String SPACE = " "; //$NON-NLS-1$
}

--- NEW FILE: BinaryQuery.java ---
package org.crosswire.jsword.book.search.lucene;

/**
 * A binary token has a left token and right token.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author DM Smith [ dmsmith555 at hotmail dot com]
 * @version $Id: BinaryQuery.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public abstract class BinaryQuery implements Query
{

    /**
     * 
     */
    public BinaryQuery(Query theLeftToken, Query theRightToken)
    {
        leftToken = theLeftToken;
        rightToken = theRightToken;
    }

    /**
     * @return Returns the leftToken.
     */
    public Query getLeftToken()
    {
        return leftToken;
    }
    /**
     * @return Returns the rightToken.
     */
    public Query getRightToken()
    {
        return rightToken;
    }

    private Query leftToken;
    private Query rightToken;
}

Index: Msg.properties
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene/Msg.properties,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** Msg.properties	24 Mar 2005 03:14:29 -0000	1.5
--- Msg.properties	25 Apr 2005 01:22:23 -0000	1.6
***************
*** 16,17 ****
--- 16,18 ----
  LuceneIndex.InstallFailed=Installation failed
  LuceneIndex.BadVerse=The following verses have errors and could not be indexed\n{0}
+ RangeQuery.IllegalPassage=Syntax Error: Invalid passage "{0}"

--- NEW FILE: BaseQuery.java ---
package org.crosswire.jsword.book.search.lucene;

import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.passage.Key;

/**
 * A base token is the smallest unit of search that the index can perform.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author DM Smith [ dmsmith555 at hotmail dot com]
 * @version $Id: BaseQuery.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class BaseQuery implements Query
{

    /**
     * 
     */
    public BaseQuery(String theQuery)
    {
        query = theQuery;
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.parse.Query#find(org.crosswire.jsword.book.search.Index)
     */
    public Key find(Index index) throws BookException
    {
        return index.find(query);
    }

    /**
     * @return the query
     */
    public String getQuery()
    {
        return query;
    }

    private String query;
}

--- NEW FILE: AndQuery.java ---
package org.crosswire.jsword.book.search.lucene;

import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.passage.Key;

/**
 * An and token specifies that a result needs to be in both the left and the right token.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author DM Smith [ dmsmith555 at hotmail dot com]
 * @version $Id: AndQuery.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class AndQuery extends BinaryQuery
{

    /**
     * 
     */
    public AndQuery(Query theLeftToken, Query theRightToken)
    {
        super(theLeftToken, theRightToken);
    }
    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.parse.Query#find(org.crosswire.jsword.book.search.Index)
     */
    public Key find(Index index) throws BookException
    {
        Key left = getLeftToken().find(index);
        Key right = getRightToken().find(index);
        left.retainAll(right);
        return left;
    }
}

--- NEW FILE: Query.java ---
package org.crosswire.jsword.book.search.lucene;

import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.passage.Key;

/**
 * The search.Searcher uses a List of Tokens to calculate a search.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: Query.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public interface Query
{
    public Key find(Index index) throws BookException;
}

--- NEW FILE: AndNotQuery.java ---
package org.crosswire.jsword.book.search.lucene;

import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.passage.Key;

/**
 * An "And Not" token specifies that a result needs to be in the left
 * but not in the right token.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author DM Smith [ dmsmith555 at hotmail dot com]
 * @version $Id: AndNotQuery.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class AndNotQuery extends BinaryQuery
{

    /**
     * 
     */
    public AndNotQuery(Query theLeftToken, Query theRightToken)
    {
        super(theLeftToken, theRightToken);
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.parse.Query#find(org.crosswire.jsword.book.search.Index)
     */
    public Key find(Index index) throws BookException
    {
        Key left = getLeftToken().find(index);
        Key right = getRightToken().find(index);
        left.removeAll(right);
        return left;
    }

}

--- NEW FILE: BlurQuery.java ---
package org.crosswire.jsword.book.search.lucene;

import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.passage.Key;
import org.crosswire.jsword.passage.RestrictionType;

/**
 * A blur token specifies much to blur the results of the right token.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author DM Smith [ dmsmith555 at hotmail dot com]
 * @version $Id: BlurQuery.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class BlurQuery extends BinaryQuery
{

    /**
     * 
     */
    public BlurQuery(Query theLeftToken, Query theRightToken, int theFactor)
    {
        super(theLeftToken, theRightToken);
        factor = theFactor;
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.parse.Query#find(org.crosswire.jsword.book.search.Index)
     */
    public Key find(Index index) throws BookException
    {
        Key left = getLeftToken().find(index);
        Key right = getRightToken().find(index);
        right.blur(factor, RestrictionType.getDefaultBlurRestriction());
        left.retainAll(right);
        return left;
    }

    /**
     * @return the blur factor
     */
    public int getFactor()
    {
        return factor;
    }

    private int factor;
}

--- NEW FILE: QueryBuilder.java ---
package org.crosswire.jsword.book.search.lucene;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A query can have a optional range specifier and an optional blur specifier.
 * The range specifier can be +[range], -[range] or just [range].
 * This must stand at the beginning of the query and may be surrounded by whitespace.
 * The blur specifier is either ~ or ~n, where ~ means adjacent verses,
 * but ~n means to blur by n verses.
 *
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: QueryBuilder.java,v 1.1 2005/04/25 01:22:23 dmsmith Exp $
 */
public class QueryBuilder
{
    /**
     * Prevent Instansiation
     */
    private QueryBuilder()
    {
    }

    /**
     * Tokenize a query into a list of Tokens.
     * @param sought The text to parse
     * @return A List of selected Tokens
     */
    public static List tokenize(String sought)
    {
        List output = new ArrayList();
        if (sought == null || sought.length()  == 0)
        {
            return output;
        }

        int i = 0;

        Query query = null;
        Query range = null;
        String rangeModifier = null;
        // Look for a range +[...], -[...], or [...]
        Matcher rangeMatcher = rangePattern.matcher(sought);
        if (rangeMatcher.find())
        {
            rangeModifier = rangeMatcher.group(1);
            range = new RangeQuery(rangeMatcher.group(2));
            sought = sought.substring(rangeMatcher.end()-1);
        }

        // Look for a blur ~n
        Matcher blurMatcher = blurPattern.matcher(sought);
        if (blurMatcher.find())
        {
            int blurFactor = 1;
            // Did we have ~ or ~n?
            if (blurMatcher.groupCount() > 0)
            {
                blurFactor = Integer.valueOf(blurMatcher.group(1)).intValue();
            }
            Query left = new BaseQuery(sought.substring(i, blurMatcher.start()));
            Query right = new BaseQuery(sought.substring(blurMatcher.end()));
            query = new BlurQuery(left, right, blurFactor);
        }
        else
        {
            query = new BaseQuery(sought);
        }
        
        if (range != null)
        {
            if (rangeModifier == null)
            {
                output.add(query);
                output.add(range);
            }
            else if (rangeModifier.charAt(0) == '+')
            {
                output.add(new AndQuery(query, range));
            }
            else
            {
                // AndNot needs to be after what it is restricting
                output.add(new AndNotQuery(query, range));
            }
        }
        else
        {
            output.add(query);
        }
        return output;
    }

    /**
     * The pattern of a range. This is anything that is
     * contained between a leading [] (but not containing a [ or ]),
     * with a + or - optional prefix,
     * perhaps surrounded by whitespace.
     */
    private static final Pattern rangePattern = Pattern.compile("^\\s*([-+]?)\\[([^\\[\\]]+)\\]\\s*"); //$NON-NLS-1$

    /**
     * The pattern of a blur. ~, optionally followed by a number, representing the number of verses
     */
    private static final Pattern blurPattern = Pattern.compile("\\s~(\\d*)?\\s"); //$NON-NLS-1$

}



More information about the jsword-svn mailing list