1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id:LuceneQueryBuilder.java 984 2006-01-23 14:18:33 -0500 (Mon, 23 Jan 2006) dmsmith $
21   */
22  package org.crosswire.jsword.index.lucene;
23  
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.crosswire.jsword.index.query.AndNotQuery;
28  import org.crosswire.jsword.index.query.AndQuery;
29  import org.crosswire.jsword.index.query.BaseQuery;
30  import org.crosswire.jsword.index.query.BlurQuery;
31  import org.crosswire.jsword.index.query.NullQuery;
32  import org.crosswire.jsword.index.query.Query;
33  import org.crosswire.jsword.index.query.QueryBuilder;
34  import org.crosswire.jsword.index.query.RangeQuery;
35  
36  /**
37   * A query can have a optional range specifier and an optional blur specifier.
38   * The range specifier can be +[range], -[range] or just [range]. This must
39   * stand at the beginning of the query and may be surrounded by whitespace. The
40   * blur specifier is either ~ or ~n, where ~ means adjacent verses, but ~n means
41   * to blur by n verses.
42   * 
43   * @see gnu.lgpl.License for license details.<br>
44   *      The copyright to this program is held by it's authors.
45   * @author DM Smith [dmsmith555 at yahoo dot com]
46   */
47  public final class LuceneQueryBuilder implements QueryBuilder {
48      /*
49       * (non-Javadoc)
50       * 
51       * @see
52       * org.crosswire.jsword.index.query.QueryBuilder#parse(java.lang.String)
53       */
54      public Query parse(String aSearch) {
55          Query query = NULL_QUERY;
56  
57          String sought = aSearch;
58          if (sought == null || sought.length() == 0) {
59              return query;
60          }
61  
62          int i = 0;
63  
64          Query range = null;
65          String rangeModifier = "";
66          // Look for a range +[...], -[...], or [...]
67          Matcher rangeMatcher = RANGE_PATTERN.matcher(sought);
68          if (rangeMatcher.find()) {
69              rangeModifier = rangeMatcher.group(1);
70              range = new RangeQuery(rangeMatcher.group(2));
71              sought = sought.substring(rangeMatcher.end());
72          }
73  
74          // Look for a blur ~n
75          Matcher blurMatcher = BLUR_PATTERN.matcher(sought);
76          if (blurMatcher.find()) {
77              // Did we have ~ or ~n?
78              int blurFactor = 1;
79              String blur = blurMatcher.group(1);
80              if (blur.length() > 0) {
81                  blurFactor = Integer.parseInt(blur);
82              }
83              Query left = new BaseQuery(sought.substring(i, blurMatcher.start()));
84              Query right = new BaseQuery(sought.substring(blurMatcher.end()));
85              query = new BlurQuery(left, right, blurFactor);
86          } else if (sought.length() > 0) {
87              query = new BaseQuery(sought);
88          }
89  
90          if (range != null && !NULL_QUERY.equals(query)) {
91              if (rangeModifier.length() == 0 || rangeModifier.charAt(0) == '+') {
92                  query = new AndQuery(range, query);
93              } else {
94                  // AndNot needs to be after what it is restricting
95                  query = new AndNotQuery(query, range);
96              }
97          }
98  
99          return query;
100     }
101 
102     /**
103      * The pattern of a range. This is anything that is contained between a
104      * leading [] (but not containing a [ or ]), with a + or - optional prefix,
105      * perhaps surrounded by whitespace.
106      */
107     private static final Pattern RANGE_PATTERN = Pattern.compile("^\\s*([-+]?)\\[([^\\[\\]]+)\\]\\s*");
108 
109     /**
110      * The pattern of a blur. A '~', optionally followed by a number,
111      * representing the number of verses.
112      */
113     private static final Pattern BLUR_PATTERN = Pattern.compile("\\s~(\\d*)?\\s");
114 
115     /**
116      * A query that returns nothing.
117      */
118     private static final Query NULL_QUERY = new NullQuery();
119 
120 }
121