1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2005 - 2016
18   *
19   */
20  package org.crosswire.jsword.passage;
21  
22  import java.io.IOException;
23  import java.io.Reader;
24  import java.util.Iterator;
25  
26  import org.crosswire.jsword.JSOtherMsg;
27  import org.crosswire.jsword.versification.Versification;
28  import org.crosswire.jsword.versification.system.Versifications;
29  
30  /**
31   * The PassageKeyFactory constructs Passages of the default Passage type.
32   * This allows for tuning the application to specific time/space needs
33   * and it allows for the development of other types of Passages without
34   * needing to change the application.
35   * <p>
36   * It is strongly recommended to use this factory to create passages
37   * if there is no driving need to create them for a specific purpose.
38   * </p>
39   * <p>
40   * Most of the methods take the same arguments:
41   * </p>
42   * <ul>
43   * <li><strong>Versification v11n</strong> - All Passages are created as part
44   * of a Versification. Verses and VerseRanges which make up a Passage, require
45   * a one.</li>
46   * <li><strong>String passageReference</strong> - A string representation for the Passage.
47   * The parser is very lenient, but requires the verses to be a member of the
48   * Versification.</li>
49   * <li><strong>VerseRange basis</strong> - When interpreting a reference, the
50   * prior reference forms the context to understand that reference. For example,
51   * when seeing 11:1, it needs the context of the BibleBook to know what this
52   * chapter and verse belongs. A basis is not needed for OSIS references, as
53   * every verse is fully qualified. But for references from ThML and GBF,
54   * it is highly needed.</li>  
55   * </ul>
56   * <p>
57   * Most of the methods will throw:
58   * </p>
59   * <ul><li><strong>NoSuchKeyException</strong> - Indicating that something
60   * in the string references could not be understood as a verse. The message
61   * of the exception will give the precise reason for the failure.</li></ul>
62   * 
63   * 
64   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
65   * @author Joe Walker
66   * @author DM Smith
67   */
68  public final class PassageKeyFactory {
69      /**
70       * This class implements a Singleton pattern. So the ctor is private
71       */
72      private PassageKeyFactory() {
73      }
74  
75      /**
76       * This PassageKeyFactory is accessed through this instance.
77       * 
78       * @return this PassageKeyFactory
79       */
80      public static PassageKeyFactory instance() {
81          return keyf;
82      }
83  
84      /**
85       * Create an empty list of keys for the v11n
86       * 
87       * @param v11n
88       *            The Versification to which this Passage belongs.
89       * @return an empty Passage
90       */
91      public Key createEmptyKeyList(Versification v11n) {
92          return defaultType.createEmptyPassage(v11n);
93      }
94  
95      /**
96       * Get a Passage containing all the Verses in this Versification.
97       * This differs from {@link org.crosswire.jsword.book.Book#getGlobalKeyList} which gets the
98       * verses in the Book, which may be a small part of the whole.
99       * 
100      * @param v11n
101      *            The Versification to which this Passage belongs.
102      * @return the Passage with all the Verses in the Versification
103      */
104     public Key getGlobalKeyList(Versification v11n) {
105         return new ReadOnlyPassage(KeyUtil.getPassage(v11n.getAllVerses()), true);
106     }
107 
108     /**
109      * Convert the passageReference into a Passage or an empty Passage,
110      * if there is an error. Note, this is not recommended as it throws
111      * away the error.
112      * 
113      * @param v11n
114      *            The Versification to which this Passage belongs.
115      * @param passageReference
116      *            A String containing the text of the Passage
117      * @param basis
118      *           The basis by which to interpret passageReference
119      * @return a new Passage filled with the desired Verses or an empty Passage
120      */
121     public Key getValidKey(Versification v11n, String passageReference, Key basis) {
122         try {
123             return getKey(v11n, passageReference, basis);
124         } catch (NoSuchKeyException e) {
125             return createEmptyKeyList(v11n);
126         }
127     }
128 
129     /**
130      * Convert the passageReference into a Passage or an empty Passage,
131      * if there is an error. Note, this is not recommended as it throws
132      * away the error.
133      * 
134      * @param v11n
135      *            The Versification to which this Passage belongs.
136      * @param passageReference
137      *            A String containing the text for the Passage
138      * @return a new Passage filled with the desired Verses or an empty Passage
139      */
140     public Key getValidKey(Versification v11n, String passageReference) {
141         return getValidKey(v11n, passageReference, null);
142     }
143 
144     /**
145      * Convert the passageReference into a Passage. This is the recommended
146      * form for understanding references in ThML and GBF.
147      * 
148      * @param v11n
149      *            The Versification to which this Passage belongs.
150      * @param passageReference
151      *            A String containing the text for the Passage
152      * @param basis
153      *           The basis by which to interpret passageReference
154      * @return a new Passage filled with the desired Verses
155      * @throws NoSuchKeyException
156      *             If the passageReference has anything that could not be understood as a Verse
157      */
158     public Passage getKey(Versification v11n, String passageReference, Key basis) throws NoSuchKeyException {
159         // since normalization is relatively expensive
160         // don't try it unless it solves a problem.
161         try {
162             return defaultType.createPassage(v11n, passageReference, basis);
163         } catch (NoSuchKeyException e) {
164             try {
165                 return defaultType.createPassage(v11n, normalize(passageReference), basis);
166             } catch (NoSuchKeyException ex) {
167                 // TODO(DM): Parser should allow valid osisRefs!
168                 return defaultType.createPassage(v11n, mungOsisRef(passageReference), basis);
169             }
170         }
171     }
172 
173     /**
174      * Convert the passageReference into a Passage. This is the recommended
175      * form for application constructed references and user input. Both of
176      * these should have a fully qualified first reference.
177      * 
178      * @param v11n
179      *            The Versification to which this Passage belongs.
180      * @param passageReference
181      *            A String containing the text for the Passage
182      * @return a new Passage filled with the desired Verses
183      * @throws NoSuchKeyException
184      *             If the passageReference has anything that could not be understood as a Verse
185      */
186     public Passage getKey(Versification v11n, String passageReference) throws NoSuchKeyException {
187         return getKey(v11n, passageReference, null);
188     }
189 
190     /**
191      * Set the default PassageType
192      * 
193      * @param newDefaultType
194      *            The new default PassageType.
195      */
196     public static void setDefaultType(PassageType newDefaultType) {
197         PassageKeyFactory.defaultType = newDefaultType;
198     }
199 
200     /**
201      * Get the default PassageType.
202      * 
203      * @return The default PassageType.
204      */
205     public static PassageType getDefaultType() {
206         return defaultType;
207     }
208 
209     /**
210      * Set the default PassageType. Must be the ordinal value of one of:
211      * <ul>
212      * <li>PassageType.SPEED
213      * <li>PassageType.WRITE_SPEED
214      * <li>PassageType.SIZE
215      * <li>PassageType.MIX
216      * <li>PassageType.TALLY
217      * </ul>
218      * 
219      * @param newDefaultType
220      *            The new default type.
221      */
222     public static void setDefaultPassage(int newDefaultType) {
223         setDefaultType(PassageType.fromInteger(newDefaultType));
224     }
225 
226     /**
227      * Get the default passage type as the ordinal value of the PassageType.
228      * 
229      * @return default_type The new default type.
230      * @see PassageKeyFactory#setDefaultPassage
231      */
232     public static int getDefaultPassage() {
233         return PassageType.toInteger(defaultType);
234     }
235 
236     /**
237      * Get a new Passage based on another Passage that synchronizes all access
238      * to its members.
239      * 
240      * @param ref
241      *            The passage to synchronize
242      * @return A new synchronized passage that proxies requests to the original
243      */
244     public static Passage getSynchronizedPassage(Passage ref) {
245         return new SynchronizedPassage(ref);
246     }
247 
248     /**
249      * Get a new Passage based on another Passage that synchronizes all access
250      * to its members.
251      * 
252      * @param ref
253      *            The passage to synchronize
254      * @param ignore
255      *            Do we throw up if someone tries to change us
256      * @return A new synchronized passage that proxies requests to the original
257      */
258     public static Passage getReadOnlyPassage(Passage ref, boolean ignore) {
259         return new ReadOnlyPassage(ref, ignore);
260     }
261 
262     /**
263      * Convert us to a binary representation. There are some distinctly
264      * endianist happenings here, but that is OK because we are reading the
265      * stuff we write here just below.
266      * 
267      * @param ref
268      *            The Passage to convert
269      * @return a byte array
270      */
271     static byte[] toBinaryRepresentation(Passage ref) {
272         Versification v11n = ref.getVersification();
273         int maxOrdinal = v11n.maximumOrdinal();
274         // store these locally we use them so often
275         int verses = ref.countVerses();
276         int ranges = ref.countRanges(RestrictionType.NONE);
277 
278         // the size in bytes of teach storage method
279         int bitwiseSize = maxOrdinal / 8;
280         int rangedSize = (ranges * 4) + 1;
281         int distinctSize = (verses * 2) + 1;
282 
283         // if bitwise is equal smallest
284         if (bitwiseSize <= rangedSize && bitwiseSize <= distinctSize) {
285             int arraySize = binarySize(AbstractPassage.METHOD_COUNT) + (maxOrdinal / 8) + 1;
286             byte[] buffer = new byte[arraySize];
287             int index = 0;
288 
289             index += toBinary(buffer, index, AbstractPassage.BITWISE, AbstractPassage.METHOD_COUNT);
290 
291             for (Key aKey : ref) {
292                 Verse verse = (Verse) aKey;
293                 int ord = verse.getOrdinal();
294 
295                 // Which byte should we be altering
296                 int idx0 = (ord / 8) + index;
297 
298                 // Which bit within that byte (0-7)
299                 int bit = (ord % 8) - 1;
300 
301                 buffer[idx0] |= 1 << bit;
302             }
303 
304             return buffer;
305         } else if (distinctSize <= rangedSize) {
306             // if distinct is not bigger than ranged
307             int arraySize = binarySize(AbstractPassage.METHOD_COUNT) + binarySize(maxOrdinal)
308                     + (verses * binarySize(maxOrdinal));
309             byte[] buffer = new byte[arraySize];
310             int index = 0;
311 
312             // write the Passage type and the number of verses. There must be
313             // less than 2**16 verses
314             index += toBinary(buffer, index, AbstractPassage.DISTINCT, AbstractPassage.METHOD_COUNT);
315             index += toBinary(buffer, index, verses, maxOrdinal);
316 
317             // write the verse ordinals in a loop
318             for (Key aKey : ref) {
319                 Verse verse = (Verse) aKey;
320                 int ord = verse.getOrdinal();
321                 index += toBinary(buffer, index, ord, maxOrdinal);
322             }
323 
324             return buffer;
325         } else {
326             // otherwise use ranges
327             int arraySize = binarySize(AbstractPassage.METHOD_COUNT) + binarySize(maxOrdinal / 2)
328                     + (2 * ranges * binarySize(maxOrdinal));
329             byte[] buffer = new byte[arraySize];
330             int index = 0;
331 
332             // write the Passage type and the number of ranges
333             index += toBinary(buffer, index, AbstractPassage.RANGED, AbstractPassage.METHOD_COUNT);
334             index += toBinary(buffer, index, ranges, maxOrdinal / 2);
335 
336             // write the verse ordinals in a loop
337             Iterator<VerseRange> it = ref.rangeIterator(RestrictionType.NONE);
338             while (it.hasNext()) {
339                 VerseRange range = it.next();
340                 index += toBinary(buffer, index, range.getStart().getOrdinal(), maxOrdinal);
341                 index += toBinary(buffer, index, range.getCardinality(), maxOrdinal);
342             }
343 
344             // chop to size
345             return buffer;
346         }
347     }
348 
349     /**
350      * Write out the object to the given ObjectOutputStream
351      * 
352      * @param buffer
353      *            The stream to read our state from
354      * @return The converted Passage
355      * @throws NoSuchKeyException
356      *             If the buffer is invalid
357      */
358     static Passage fromBinaryRepresentation(byte[] buffer) throws NoSuchKeyException {
359         // AV11N(DMS): This is wrong, but toBinaryRepresentation does not write the v11n name
360         Versification rs = Versifications.instance().getVersification("KJV");
361         int maxOrdinal = rs.maximumOrdinal();
362         Passage ref = (Passage) keyf.createEmptyKeyList(rs);
363 
364         // Some speedups
365         AbstractPassage aref = null;
366         if (ref instanceof AbstractPassage) {
367             aref = (AbstractPassage) ref;
368             aref.raiseEventSuppresion();
369             aref.raiseNormalizeProtection();
370         }
371 
372         int[] index = new int[] {
373             0
374         };
375         int type = fromBinary(buffer, index, AbstractPassage.METHOD_COUNT);
376 
377         switch (type) {
378         case AbstractPassage.BITWISE:
379             for (int ord = 1; ord <= maxOrdinal; ord++) {
380                 // Which byte should we be viewing
381                 int idx0 = (ord / 8) + index[0];
382 
383                 // Which bit within that byte (0-7)
384                 int bit = (ord % 8) - 1;
385 
386                 if ((buffer[idx0] & (1 << bit)) != 0) {
387                     ref.add(rs.decodeOrdinal(ord));
388                 }
389             }
390             // index gets left behind here, but we dont care
391             break;
392 
393         case AbstractPassage.DISTINCT:
394             int verses = fromBinary(buffer, index, maxOrdinal);
395             for (int i = 0; i < verses; i++) {
396                 int ord = fromBinary(buffer, index, maxOrdinal);
397                 ref.add(rs.decodeOrdinal(ord));
398             }
399             break;
400 
401         case AbstractPassage.RANGED:
402             int ranges = fromBinary(buffer, index, maxOrdinal / 2);
403             for (int i = 0; i < ranges; i++) {
404                 int ord = fromBinary(buffer, index, maxOrdinal);
405                 int len = fromBinary(buffer, index, maxOrdinal);
406                 ref.add(RestrictionType.NONE.toRange(rs, rs.decodeOrdinal(ord), len));
407             }
408             break;
409 
410         default:
411             throw new NoSuchKeyException(JSOtherMsg.lookupText("Unknown passage type."));
412         }
413 
414         // Some speedups
415         if (aref != null) {
416             aref.lowerEventSuppressionAndTest();
417             aref.lowerNormalizeProtection();
418         }
419 
420         return ref;
421     }
422 
423     /**
424      * Read a passage from a given stream
425      * 
426      * @param in
427      *            The stream to read from
428      * @return a newly built Passage
429      * @throws IOException
430      *             If there was trouble reading the stream
431      * @throws NoSuchKeyException
432      *             if the data was not a valid passage
433      */
434     public static Passage readPassage(Reader in) throws IOException, NoSuchKeyException {
435         // Get any versification. It does not matter. readDescripton will overwrite it.
436         Versification rs = Versifications.instance().getVersification("KJV");
437         Passage ref = (Passage) keyf.createEmptyKeyList(rs);
438         ref.readDescription(in);
439         return ref;
440     }
441 
442     /**
443      * Write to buffer (starting at index) the given number using a set of bytes
444      * as required by the max possible value for the number
445      * 
446      * @param max
447      *            The number to write
448      * @return The number of bytes needed
449      */
450     protected static int binarySize(int max) {
451         // 1 byte (2^8)
452         if (max < 256) {
453             return 1;
454         }
455 
456         // 2 bytes (2^16)
457         if (max < 65536) {
458             return 2;
459         }
460 
461         // 3 bytes (2^24)
462         if (max < 16777216) {
463             return 3;
464         }
465 
466         // 4 bytes (2^32)
467         return 4;
468     }
469 
470     /**
471      * Write to buffer (starting at index) the given number using a set of bytes
472      * as required by the max possible value for the number
473      * 
474      * @param buffer
475      *            Where to write to
476      * @param index
477      *            The offset to start at
478      * @param number
479      *            The number to write
480      * @param max
481      *            The max size
482      * @return The number of bytes written
483      */
484     protected static int toBinary(byte[] buffer, int index, int number, int max) {
485         assert number >= 0 : "No -ve output " + number;
486         assert number <= max : "number " + number + " > max " + max;
487 
488         // 1 byte (2^8)
489         if (max < 256) {
490             buffer[index] = (byte) number;
491             return 1;
492         }
493 
494         // 2 bytes (2^16)
495         if (max < 65536) {
496             buffer[index + 0] = (byte) (number >>> 8);
497             buffer[index + 1] = (byte) (number >>> 0);
498             return 2;
499         }
500 
501         // 3 bytes (2^24)
502         if (max < 16777216) {
503             buffer[index + 0] = (byte) (number >>> 16);
504             buffer[index + 1] = (byte) (number >>> 8);
505             buffer[index + 2] = (byte) (number >>> 0);
506             return 3;
507         }
508 
509         // 4 bytes (2^32)
510         buffer[index + 0] = (byte) (number >>> 24);
511         buffer[index + 1] = (byte) (number >>> 16);
512         buffer[index + 2] = (byte) (number >>> 8);
513         buffer[index + 3] = (byte) (number >>> 0);
514         return 4;
515     }
516 
517     /**
518      * Read and return an int from the buffer (starting at index[0]) using a set
519      * of bytes as required by the max possible value for the number, and
520      * incrementing index[0] by that number of bytes.
521      * 
522      * @param buffer
523      *            The buffer to read from
524      * @param index
525      *            The offset to start at
526      * @param max
527      *            The max number of bytes to read
528      * @return The converted number
529      */
530     protected static int fromBinary(byte[] buffer, int[] index, int max) {
531         // Am I naive in thinking that & 0x000000ff turns int -1 into 255?.
532 
533         // 1 byte (2^8)
534         int b0 = buffer[index[0]++] & 0x000000ff;
535         if (max < 256) {
536             return b0;
537         }
538 
539         // 2 bytes (2^16)
540         int b1 = buffer[index[0]++] & 0x000000ff;
541         if (max < 65536) {
542             return (b0 << 8) + (b1 << 0);
543         }
544 
545         // 3 bytes (2^24)
546         int b2 = buffer[index[0]++] & 0x000000ff;
547         if (max < 16777216) {
548             return (b0 << 16) + (b1 << 8) + (b2 << 0);
549         }
550 
551         // 4 bytes (2^32)
552         int b3 = buffer[index[0]++] & 0x000000ff;
553         return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0);
554     }
555 
556     /**
557      * Replace spaces with semi-colons, because the parser expects them.
558      * 
559      * @param passageReference
560      * @return the munged value
561      */
562     private String mungOsisRef(String passageReference) {
563         return passageReference.replace(' ', ';');
564     }
565 
566     /**
567      * The internals of a Passage require that references are separated with a
568      * reference delimiter. However, people and other systems may not be so
569      * stringent. So we want to allow for
570      * "Ge 1:26  3:22  11:7  20:13  31:7, 53  35:7" (which is from Clarke) This
571      * should become "Ge 1:26, 3:22, 11:7, 20:13, 31:7, 53, 35:7" Basically, the
572      * rule of thumb is that if two numbers are found separated by whitespace
573      * then add a comma between them. One note $, and ff are taken to be
574      * numbers. But it is complicated by Book names that are like 1 Cor And by
575      * verse references like Gen 1.2 Gen.1.2 Gen 1 2 which are all equivalent.
576      * So we use a counter when we see a number, if the counter reaches 2 and
577      * then we see a name or a number we emit a reference delimiter.
578      * 
579      * @param passageReference
580      * @return the normalized value
581      */
582     private String normalize(String passageReference) {
583         if (passageReference == null) {
584             return null;
585         }
586 
587         // Note this has a lot in common with AccuracyType.tokenize
588         int size = passageReference.length();
589         StringBuilder buf = new StringBuilder(size * 2);
590 
591         char curChar = ' ';
592         boolean isNumber = false;
593         boolean wasNumberOrMarker = false;
594         boolean isEndMarker = false;
595         boolean isNumberOrMarker = false;
596         int i = 0;
597         while (i < size) {
598             curChar = passageReference.charAt(i);
599 
600             // Determine whether we are starting a number
601             isNumber = Character.isDigit(curChar);
602             isEndMarker = curChar == '$' || (curChar == 'f' && (i + 1 < size ? passageReference.charAt(i + 1) : ' ') == 'f');
603             isNumberOrMarker = isNumber || isEndMarker;
604             // If the last thing we saw was a number and the next thing we see
605             // is another number or a word
606             // then we want to put in a ',' or a ' '
607             if (wasNumberOrMarker) {
608                 if (isNumber) {
609                     buf.append(AbstractPassage.REF_PREF_DELIM);
610                 } else if (isEndMarker) {
611                     buf.append(VerseRange.RANGE_OSIS_DELIM);
612                 } else if (Character.isLetter(curChar)) {
613                     buf.append(' ');
614                 }
615 
616                 // Having handled the condition, we now set it to false
617                 wasNumberOrMarker = false;
618             }
619 
620             if (isNumberOrMarker) {
621                 wasNumberOrMarker = true;
622                 buf.append(curChar);
623                 i++;
624 
625                 // If it started with an 'f' it was also followed by another.
626                 if (curChar == 'f') {
627                     buf.append('f');
628                     i++;
629                 } else if (curChar != '$') {
630                     // If it wasn't an 'f' or a '$' then it was digits
631                     while (i < size) {
632                         curChar = passageReference.charAt(i);
633                         if (!Character.isDigit(curChar)) {
634                             break;
635                         }
636                         buf.append(curChar);
637                         i++;
638                     }
639                 }
640 
641                 // skip all following whitespace, it will be added back in as
642                 // needed
643                 while (i < size && Character.isWhitespace(passageReference.charAt(i))) {
644                     i++;
645                 }
646             } else {
647                 buf.append(curChar);
648                 i++;
649             }
650         }
651 
652         return buf.toString();
653     }
654 
655     /**
656      * The default type
657      */
658     private static PassageType defaultType = PassageType.SPEED;
659 
660     /**
661      * How we create Passages
662      */
663     private static PassageKeyFactory keyf = new PassageKeyFactory();
664 }
665