[jsword-svn] jsword/java/jsword/org/crosswire/jsword/book/search/lucene s

jswordcvs at crosswire.org jswordcvs at crosswire.org
Sat Oct 9 14:45:07 MST 2004


Update of /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene
In directory www.crosswire.org:/tmp/cvs-serv30045/java/jsword/org/crosswire/jsword/book/search/lucene

Modified Files:
	Msg.java LuceneIndex.java Msg.properties 
Added Files:
	LuceneIndexManager.java 
Log Message:
indexing updates

Index: LuceneIndex.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene/LuceneIndex.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** LuceneIndex.java	29 Sep 2004 22:21:24 -0000	1.1
--- LuceneIndex.java	9 Oct 2004 21:45:05 -0000	1.2
***************
*** 21,24 ****
--- 21,25 ----
  import org.crosswire.common.activate.Lock;
  import org.crosswire.common.progress.Job;
+ import org.crosswire.common.progress.JobManager;
  import org.crosswire.common.util.Logger;
  import org.crosswire.common.util.NetUtil;
***************
*** 27,32 ****
  import org.crosswire.jsword.book.BookData;
  import org.crosswire.jsword.book.BookException;
  import org.crosswire.jsword.book.search.Index;
- import org.crosswire.jsword.book.search.IndexManager;
  import org.crosswire.jsword.passage.BibleInfo;
  import org.crosswire.jsword.passage.Key;
--- 28,33 ----
  import org.crosswire.jsword.book.BookData;
  import org.crosswire.jsword.book.BookException;
+ import org.crosswire.jsword.book.IndexStatus;
  import org.crosswire.jsword.book.search.Index;
  import org.crosswire.jsword.passage.BibleInfo;
  import org.crosswire.jsword.passage.Key;
***************
*** 37,41 ****
  import org.crosswire.jsword.passage.Verse;
  import org.crosswire.jsword.passage.VerseFactory;
- import org.crosswire.jsword.util.Project;
  
  /**
--- 38,41 ----
***************
*** 65,99 ****
  public class LuceneIndex implements Index, Activatable
  {
!     /* (non-Javadoc)
!      * @see org.crosswire.jsword.book.search.SearchEngine#init(org.crosswire.jsword.book.Bible, java.net.URL)
       */
!     public void init(Book newBook) throws BookException
      {
          try
          {
!             book = newBook;
  
!             String driverName = book.getBookMetaData().getDriverName();
!             String bookName = book.getBookMetaData().getInitials();
  
!             assert driverName != null;
!             assert bookName != null;
  
!             URL base = Project.instance().getTempScratchSpace(DIR_LUCENE, false);
!             URL driver = NetUtil.lengthenURL(base, driverName);
!             url = NetUtil.lengthenURL(driver, bookName);
  
!             if (isIndexed())
              {
!                 // Opening Lucene indexes is quite quick I think, so we can try
!                 // it to see if it works to report errors that we want to drop
!                 // later
!                 searcher = new IndexSearcher(NetUtil.getAsFile(url).getCanonicalPath());
              }
          }
!         catch (IOException ex)
          {
              throw new BookException(Msg.LUCENE_INIT, ex);
          }
      }
  
--- 65,134 ----
  public class LuceneIndex implements Index, Activatable
  {
!     /**
!      * Read an existing index and use it.
!      * @throws BookException If we fail to read the index files
       */
!     public LuceneIndex(Book book, URL storage) throws BookException
      {
+         this.book = book;
+         this.storage = storage;
+ 
          try
          {
!             // Opening Lucene indexes is quite quick I think, so we can try
!             // it to see if it works to report errors that we want to drop
!             // later
!             searcher = new IndexSearcher(NetUtil.getAsFile(storage).getCanonicalPath());
!         }
!         catch (IOException ex)
!         {
!             throw new BookException(Msg.LUCENE_INIT, ex);
!         }
!     }
  
!     /**
!      * Generate an index to use, telling the job about progress as you go.
!      * @throws BookException If we fail to read the index files
!      */
!     public LuceneIndex(Book book, URL storage, boolean create) throws BookException
!     {
!         assert create;
  
!         this.book = book;
!         this.storage = storage;
  
!         Job job = JobManager.createJob(Msg.INDEX_START.toString(), Thread.currentThread(), false);
  
!         try
!         {
!             synchronized (creating)
              {
!                 book.getBookMetaData().setIndexStatus(IndexStatus.CREATING);
! 
!                 // An index is created by opening an IndexWriter with the
!                 // create argument set to true.
!                 IndexWriter writer = new IndexWriter(NetUtil.getAsFile(storage).getCanonicalPath(), new StandardAnalyzer(), true);
! 
!                 generateSearchIndexImpl(job, writer, book.getGlobalKeyList());
!         
!                 job.setProgress(95, Msg.OPTIMIZING.toString());
!         
!                 writer.optimize();
!                 writer.close();
! 
!                 searcher = new IndexSearcher(NetUtil.getAsFile(storage).getCanonicalPath());
! 
!                 book.getBookMetaData().setIndexStatus(IndexStatus.DONE);
              }
          }
!         catch (Exception ex)
          {
+             job.ignoreTimings();
              throw new BookException(Msg.LUCENE_INIT, ex);
          }
+         finally
+         {
+             job.done();
+         }                
      }
  
***************
*** 147,197 ****
  
      /* (non-Javadoc)
!      * @see org.crosswire.jsword.book.search.SearchEngine#delete()
       */
!     public void delete() throws BookException
      {
-         checkActive();
- 
          try
          {
!             NetUtil.delete(url);
          }
          catch (IOException ex)
          {
!             throw new BookException(Msg.DELETE_FAILED, ex);
          }
      }
  
      /* (non-Javadoc)
!      * @see org.crosswire.jsword.book.search.AbstractIndex#isIndexed()
       */
!     public boolean isIndexed()
      {
!         if (generating)
          {
!             return false;
          }
  
!         URL longer = NetUtil.lengthenURL(url, DIR_SEGMENTS);
!         return NetUtil.isFile(longer);
      }
  
!     /* (non-Javadoc)
!      * @see org.crosswire.jsword.book.search.AbstractIndex#generateSearchIndex(org.crosswire.common.progress.Job)
       */
!     public void generateSearchIndex(Job job) throws IOException, BookException
      {
!         // An index is created by opening an IndexWriter with the
!         // create argument set to true.
!         IndexWriter writer = new IndexWriter(NetUtil.getAsFile(url), new StandardAnalyzer(), true);
! 
!         generateSearchIndexImpl(job, writer, book.getGlobalKeyList());
! 
!         job.setProgress(95, Msg.OPTIMIZING.toString());
! 
!         writer.optimize();
!         writer.close();
! 
!         searcher = new IndexSearcher(NetUtil.getAsFile(url).getCanonicalPath());
      }
  
--- 182,228 ----
  
      /* (non-Javadoc)
!      * @see org.crosswire.jsword.book.search.SearchEngine#activate()
       */
!     public final void activate(Lock lock)
      {
          try
          {
!             searcher = new IndexSearcher(NetUtil.getAsFile(storage).getCanonicalPath());
          }
          catch (IOException ex)
          {
!             log.warn("second load failure", ex); //$NON-NLS-1$
          }
+ 
+         active = true;
      }
  
      /* (non-Javadoc)
!      * @see org.crosswire.jsword.book.search.SearchEngine#deactivate()
       */
!     public final void deactivate(Lock lock)
      {
!         try
          {
!             searcher.close();
!             searcher = null;
!         }
!         catch (IOException ex)
!         {
!             Reporter.informUser(this, ex);
          }
  
!         active = false;
      }
  
!     /**
!      * Helper method so we can quickly activate ourselves on access
       */
!     protected final void checkActive()
      {
!         if (!active)
!         {
!             Activator.activate(this);
!         }
      }
  
***************
*** 248,304 ****
      }
  
-     /* (non-Javadoc)
-      * @see org.crosswire.jsword.book.search.SearchEngine#activate()
-      */
-     public final void activate(Lock lock)
-     {
-         // Load the ascii Passage index
-         if (isIndexed())
-         {
-             try
-             {
-                 searcher = new IndexSearcher(NetUtil.getAsFile(url).getCanonicalPath());
-             }
-             catch (IOException ex)
-             {
-                 log.warn("second load failure", ex); //$NON-NLS-1$
-             }
-         }
-         else
-         {
-             IndexManager.instance().createIndex(this);
-         }
- 
-         active = true;
-     }
- 
-     /* (non-Javadoc)
-      * @see org.crosswire.jsword.book.search.SearchEngine#deactivate()
-      */
-     public final void deactivate(Lock lock)
-     {
-         try
-         {
-             searcher.close();
-             searcher = null;
-         }
-         catch (IOException ex)
-         {
-             Reporter.informUser(this, ex);
-         }
- 
-         active = false;
-     }
- 
      /**
!      * Helper method so we can quickly activate ourselves on access
       */
!     protected final void checkActive()
!     {
!         if (!active)
!         {
!             Activator.activate(this);
!         }
!     }
  
      /**
--- 279,286 ----
      }
  
      /**
!      * A synchronization lock point to prevent us from doing 2 index runs at a time.
       */
!     private static final Object creating = new Object();
  
      /**
***************
*** 313,331 ****
  
      /**
-      * Are we in the middle of generating an index?
-      */
-     private boolean generating = false;
- 
-     /**
-      * The lucene search index directory
-      */
-     protected static final String DIR_LUCENE = "lucene"; //$NON-NLS-1$
- 
-     /**
-      * The segments directory
-      */
-     protected static final String DIR_SEGMENTS = "segments"; //$NON-NLS-1$
- 
-     /**
       * The Lucene field for the verse name
       */
--- 295,298 ----
***************
*** 345,349 ****
       * The location of this index
       */
!     private URL url;
  
      /**
--- 312,316 ----
       * The location of this index
       */
!     private URL storage;
  
      /**

Index: Msg.properties
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene/Msg.properties,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** Msg.properties	29 Sep 2004 22:21:24 -0000	1.2
--- Msg.properties	9 Oct 2004 21:45:05 -0000	1.3
***************
*** 6,9 ****
--- 6,12 ----
  # It should have no spaces or other punctuation (e.g. _, -, ', ...)
  
+ LuceneIndexManager.TypeIndexGen=Generating index for this work. Search results will be more accurate when index is complete.
+ LuceneIndexManager.Indexing=Indexing Bible Data
+ 
  LuceneIndex.LuceneInit=Failed to initialise Lucene search engine.
  LuceneIndex.SearchFailed=Search failed.

--- NEW FILE: LuceneIndexManager.java ---
package org.crosswire.jsword.book.search.lucene;

import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;

import org.crosswire.common.util.Logger;
import org.crosswire.common.util.NetUtil;
import org.crosswire.common.util.Reporter;
import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.IndexStatus;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.book.search.IndexManager;
import org.crosswire.jsword.util.Project;

/**
 * .
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: LuceneIndexManager.java,v 1.1 2004/10/09 21:45:05 joe Exp $
 */
public class LuceneIndexManager implements IndexManager
{
    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.AbstractIndex#isIndexed()
     */
    public boolean isIndexed(Book book)
    {
        try
        {
            URL storage = getStorageArea(book);
            URL longer = NetUtil.lengthenURL(storage, DIR_SEGMENTS);
            return NetUtil.isFile(longer);
        }
        catch (IOException ex)
        {
            log.error("Failed to find lucene index storage area.", ex); //$NON-NLS-1$
            return false;
        }
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.IndexManager#getIndex(org.crosswire.jsword.book.Book)
     */
    public Index getIndex(Book book) throws BookException
    {
        try
        {
            Index reply = (Index) indexes.get(book);
            if (reply == null)
            {
                URL storage = getStorageArea(book);
                reply = new LuceneIndex(book, storage);
                indexes.put(book, reply);
            }

            return reply;
        }
        catch (IOException ex)
        {
            throw new BookException(Msg.LUCENE_INIT, ex);
        }
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.AbstractIndex#generateSearchIndex(org.crosswire.common.progress.Job)
     */
    public void scheduleIndexCreation(final Book book)
    {
        book.getBookMetaData().setIndexStatus(IndexStatus.SCHEDULED);

        Thread work = new Thread(new Runnable()
        {
            public void run()
            {
                try
                {
                    URL storage = getStorageArea(book);
                    Index index = new LuceneIndex(book, storage, true);
                    indexes.put(book, index);
                }
                catch (Exception ex)
                {
                    Reporter.informUser(LuceneIndexManager.this, ex);
                }
            }
        });
        work.start();
    }

    /* (non-Javadoc)
     * @see org.crosswire.jsword.book.search.IndexManager#deleteIndex(org.crosswire.jsword.book.Book)
     */
    public void deleteIndex(Book book) throws BookException
    {
        try
        {
            // TODO(joe): This needs some checks that it isn't being used
            URL storage = getStorageArea(book);
            NetUtil.delete(storage);
        }
        catch (IOException ex)
        {
            throw new BookException(Msg.DELETE_FAILED, ex);
        }
    }

    /**
     * Determine where an index should be stored
     * @param book The book to be indexed
     * @return A URL to store stuff in
     * @throws IOException If there is a problem in finding where to store stuff
     */
    protected URL getStorageArea(Book book) throws IOException
    {
        String driverName = book.getBookMetaData().getDriverName();
        String bookName = book.getBookMetaData().getInitials();

        assert driverName != null;
        assert bookName != null;

        URL base = Project.instance().getTempScratchSpace(DIR_LUCENE, false);
        URL driver = NetUtil.lengthenURL(base, driverName);

        return NetUtil.lengthenURL(driver, bookName);
    }

    /**
     * The created indexes
     */
    protected static final Map indexes = new HashMap();

    /**
     * The segments directory
     */
    private static final String DIR_SEGMENTS = "segments"; //$NON-NLS-1$

    /**
     * The lucene search index directory
     */
    private static final String DIR_LUCENE = "lucene"; //$NON-NLS-1$

    /**
     * The log stream
     */
    private static final Logger log = Logger.getLogger(LuceneIndexManager.class);
}

Index: Msg.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene/Msg.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** Msg.java	29 Sep 2004 22:21:24 -0000	1.9
--- Msg.java	9 Oct 2004 21:45:05 -0000	1.10
***************
*** 29,32 ****
--- 29,35 ----
  class Msg extends MsgBase
  {
+     static final Msg TYPE_INDEXGEN = new Msg("LuceneIndexManager.TypeIndexGen"); //$NON-NLS-1$
+     static final Msg INDEX_START = new Msg("LuceneIndexManager.Indexing"); //$NON-NLS-1$
+ 
      static final Msg LUCENE_INIT = new Msg("LuceneIndex.LuceneInit"); //$NON-NLS-1$
      static final Msg SEARCH_FAILED = new Msg("LuceneIndex.SearchFailed"); //$NON-NLS-1$



More information about the jsword-svn mailing list