PassageKeyFactory.java |
1 /** 2 * Distribution License: 3 * JSword is free software; you can redistribute it and/or modify it under 4 * the terms of the GNU Lesser General Public License, version 2.1 or later 5 * as published by the Free Software Foundation. This program is distributed 6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even 7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 8 * See the GNU Lesser General Public License for more details. 9 * 10 * The License is available on the internet at: 11 * http://www.gnu.org/copyleft/lgpl.html 12 * or by writing to: 13 * Free Software Foundation, Inc. 14 * 59 Temple Place - Suite 330 15 * Boston, MA 02111-1307, USA 16 * 17 * © CrossWire Bible Society, 2005 - 2016 18 * 19 */ 20 package org.crosswire.jsword.passage; 21 22 import java.io.IOException; 23 import java.io.Reader; 24 import java.util.Iterator; 25 26 import org.crosswire.jsword.JSOtherMsg; 27 import org.crosswire.jsword.versification.Versification; 28 import org.crosswire.jsword.versification.system.Versifications; 29 30 /** 31 * The PassageKeyFactory constructs Passages of the default Passage type. 32 * This allows for tuning the application to specific time/space needs 33 * and it allows for the development of other types of Passages without 34 * needing to change the application. 35 * <p> 36 * It is strongly recommended to use this factory to create passages 37 * if there is no driving need to create them for a specific purpose. 38 * </p> 39 * <p> 40 * Most of the methods take the same arguments: 41 * </p> 42 * <ul> 43 * <li><strong>Versification v11n</strong> - All Passages are created as part 44 * of a Versification. Verses and VerseRanges which make up a Passage, require 45 * a one.</li> 46 * <li><strong>String passageReference</strong> - A string representation for the Passage. 47 * The parser is very lenient, but requires the verses to be a member of the 48 * Versification.</li> 49 * <li><strong>VerseRange basis</strong> - When interpreting a reference, the 50 * prior reference forms the context to understand that reference. For example, 51 * when seeing 11:1, it needs the context of the BibleBook to know what this 52 * chapter and verse belongs. A basis is not needed for OSIS references, as 53 * every verse is fully qualified. But for references from ThML and GBF, 54 * it is highly needed.</li> 55 * </ul> 56 * <p> 57 * Most of the methods will throw: 58 * </p> 59 * <ul><li><strong>NoSuchKeyException</strong> - Indicating that something 60 * in the string references could not be understood as a verse. The message 61 * of the exception will give the precise reason for the failure.</li></ul> 62 * 63 * 64 * @see gnu.lgpl.License The GNU Lesser General Public License for details. 65 * @author Joe Walker 66 * @author DM Smith 67 */ 68 public final class PassageKeyFactory { 69 /** 70 * This class implements a Singleton pattern. So the ctor is private 71 */ 72 private PassageKeyFactory() { 73 } 74 75 /** 76 * This PassageKeyFactory is accessed through this instance. 77 * 78 * @return this PassageKeyFactory 79 */ 80 public static PassageKeyFactory instance() { 81 return keyf; 82 } 83 84 /** 85 * Create an empty list of keys for the v11n 86 * 87 * @param v11n 88 * The Versification to which this Passage belongs. 89 * @return an empty Passage 90 */ 91 public Key createEmptyKeyList(Versification v11n) { 92 return defaultType.createEmptyPassage(v11n); 93 } 94 95 /** 96 * Get a Passage containing all the Verses in this Versification. 97 * This differs from {@link org.crosswire.jsword.book.Book#getGlobalKeyList} which gets the 98 * verses in the Book, which may be a small part of the whole. 99 * 100 * @param v11n 101 * The Versification to which this Passage belongs. 102 * @return the Passage with all the Verses in the Versification 103 */ 104 public Key getGlobalKeyList(Versification v11n) { 105 return new ReadOnlyPassage(KeyUtil.getPassage(v11n.getAllVerses()), true); 106 } 107 108 /** 109 * Convert the passageReference into a Passage or an empty Passage, 110 * if there is an error. Note, this is not recommended as it throws 111 * away the error. 112 * 113 * @param v11n 114 * The Versification to which this Passage belongs. 115 * @param passageReference 116 * A String containing the text of the Passage 117 * @param basis 118 * The basis by which to interpret passageReference 119 * @return a new Passage filled with the desired Verses or an empty Passage 120 */ 121 public Key getValidKey(Versification v11n, String passageReference, Key basis) { 122 try { 123 return getKey(v11n, passageReference, basis); 124 } catch (NoSuchKeyException e) { 125 return createEmptyKeyList(v11n); 126 } 127 } 128 129 /** 130 * Convert the passageReference into a Passage or an empty Passage, 131 * if there is an error. Note, this is not recommended as it throws 132 * away the error. 133 * 134 * @param v11n 135 * The Versification to which this Passage belongs. 136 * @param passageReference 137 * A String containing the text for the Passage 138 * @return a new Passage filled with the desired Verses or an empty Passage 139 */ 140 public Key getValidKey(Versification v11n, String passageReference) { 141 return getValidKey(v11n, passageReference, null); 142 } 143 144 /** 145 * Convert the passageReference into a Passage. This is the recommended 146 * form for understanding references in ThML and GBF. 147 * 148 * @param v11n 149 * The Versification to which this Passage belongs. 150 * @param passageReference 151 * A String containing the text for the Passage 152 * @param basis 153 * The basis by which to interpret passageReference 154 * @return a new Passage filled with the desired Verses 155 * @throws NoSuchKeyException 156 * If the passageReference has anything that could not be understood as a Verse 157 */ 158 public Passage getKey(Versification v11n, String passageReference, Key basis) throws NoSuchKeyException { 159 // since normalization is relatively expensive 160 // don't try it unless it solves a problem. 161 try { 162 return defaultType.createPassage(v11n, passageReference, basis); 163 } catch (NoSuchKeyException e) { 164 try { 165 return defaultType.createPassage(v11n, normalize(passageReference), basis); 166 } catch (NoSuchKeyException ex) { 167 // TODO(DM): Parser should allow valid osisRefs! 168 return defaultType.createPassage(v11n, mungOsisRef(passageReference), basis); 169 } 170 } 171 } 172 173 /** 174 * Convert the passageReference into a Passage. This is the recommended 175 * form for application constructed references and user input. Both of 176 * these should have a fully qualified first reference. 177 * 178 * @param v11n 179 * The Versification to which this Passage belongs. 180 * @param passageReference 181 * A String containing the text for the Passage 182 * @return a new Passage filled with the desired Verses 183 * @throws NoSuchKeyException 184 * If the passageReference has anything that could not be understood as a Verse 185 */ 186 public Passage getKey(Versification v11n, String passageReference) throws NoSuchKeyException { 187 return getKey(v11n, passageReference, null); 188 } 189 190 /** 191 * Set the default PassageType 192 * 193 * @param newDefaultType 194 * The new default PassageType. 195 */ 196 public static void setDefaultType(PassageType newDefaultType) { 197 PassageKeyFactory.defaultType = newDefaultType; 198 } 199 200 /** 201 * Get the default PassageType. 202 * 203 * @return The default PassageType. 204 */ 205 public static PassageType getDefaultType() { 206 return defaultType; 207 } 208 209 /** 210 * Set the default PassageType. Must be the ordinal value of one of: 211 * <ul> 212 * <li>PassageType.SPEED 213 * <li>PassageType.WRITE_SPEED 214 * <li>PassageType.SIZE 215 * <li>PassageType.MIX 216 * <li>PassageType.TALLY 217 * </ul> 218 * 219 * @param newDefaultType 220 * The new default type. 221 */ 222 public static void setDefaultPassage(int newDefaultType) { 223 setDefaultType(PassageType.fromInteger(newDefaultType)); 224 } 225 226 /** 227 * Get the default passage type as the ordinal value of the PassageType. 228 * 229 * @return default_type The new default type. 230 * @see PassageKeyFactory#setDefaultPassage 231 */ 232 public static int getDefaultPassage() { 233 return PassageType.toInteger(defaultType); 234 } 235 236 /** 237 * Get a new Passage based on another Passage that synchronizes all access 238 * to its members. 239 * 240 * @param ref 241 * The passage to synchronize 242 * @return A new synchronized passage that proxies requests to the original 243 */ 244 public static Passage getSynchronizedPassage(Passage ref) { 245 return new SynchronizedPassage(ref); 246 } 247 248 /** 249 * Get a new Passage based on another Passage that synchronizes all access 250 * to its members. 251 * 252 * @param ref 253 * The passage to synchronize 254 * @param ignore 255 * Do we throw up if someone tries to change us 256 * @return A new synchronized passage that proxies requests to the original 257 */ 258 public static Passage getReadOnlyPassage(Passage ref, boolean ignore) { 259 return new ReadOnlyPassage(ref, ignore); 260 } 261 262 /** 263 * Convert us to a binary representation. There are some distinctly 264 * endianist happenings here, but that is OK because we are reading the 265 * stuff we write here just below. 266 * 267 * @param ref 268 * The Passage to convert 269 * @return a byte array 270 */ 271 static byte[] toBinaryRepresentation(Passage ref) { 272 Versification v11n = ref.getVersification(); 273 int maxOrdinal = v11n.maximumOrdinal(); 274 // store these locally we use them so often 275 int verses = ref.countVerses(); 276 int ranges = ref.countRanges(RestrictionType.NONE); 277 278 // the size in bytes of teach storage method 279 int bitwiseSize = maxOrdinal / 8; 280 int rangedSize = (ranges * 4) + 1; 281 int distinctSize = (verses * 2) + 1; 282 283 // if bitwise is equal smallest 284 if (bitwiseSize <= rangedSize && bitwiseSize <= distinctSize) { 285 int arraySize = binarySize(AbstractPassage.METHOD_COUNT) + (maxOrdinal / 8) + 1; 286 byte[] buffer = new byte[arraySize]; 287 int index = 0; 288 289 index += toBinary(buffer, index, AbstractPassage.BITWISE, AbstractPassage.METHOD_COUNT); 290 291 for (Key aKey : ref) { 292 Verse verse = (Verse) aKey; 293 int ord = verse.getOrdinal(); 294 295 // Which byte should we be altering 296 int idx0 = (ord / 8) + index; 297 298 // Which bit within that byte (0-7) 299 int bit = (ord % 8) - 1; 300 301 buffer[idx0] |= 1 << bit; 302 } 303 304 return buffer; 305 } else if (distinctSize <= rangedSize) { 306 // if distinct is not bigger than ranged 307 int arraySize = binarySize(AbstractPassage.METHOD_COUNT) + binarySize(maxOrdinal) 308 + (verses * binarySize(maxOrdinal)); 309 byte[] buffer = new byte[arraySize]; 310 int index = 0; 311 312 // write the Passage type and the number of verses. There must be 313 // less than 2**16 verses 314 index += toBinary(buffer, index, AbstractPassage.DISTINCT, AbstractPassage.METHOD_COUNT); 315 index += toBinary(buffer, index, verses, maxOrdinal); 316 317 // write the verse ordinals in a loop 318 for (Key aKey : ref) { 319 Verse verse = (Verse) aKey; 320 int ord = verse.getOrdinal(); 321 index += toBinary(buffer, index, ord, maxOrdinal); 322 } 323 324 return buffer; 325 } else { 326 // otherwise use ranges 327 int arraySize = binarySize(AbstractPassage.METHOD_COUNT) + binarySize(maxOrdinal / 2) 328 + (2 * ranges * binarySize(maxOrdinal)); 329 byte[] buffer = new byte[arraySize]; 330 int index = 0; 331 332 // write the Passage type and the number of ranges 333 index += toBinary(buffer, index, AbstractPassage.RANGED, AbstractPassage.METHOD_COUNT); 334 index += toBinary(buffer, index, ranges, maxOrdinal / 2); 335 336 // write the verse ordinals in a loop 337 Iterator<VerseRange> it = ref.rangeIterator(RestrictionType.NONE); 338 while (it.hasNext()) { 339 VerseRange range = it.next(); 340 index += toBinary(buffer, index, range.getStart().getOrdinal(), maxOrdinal); 341 index += toBinary(buffer, index, range.getCardinality(), maxOrdinal); 342 } 343 344 // chop to size 345 return buffer; 346 } 347 } 348 349 /** 350 * Write out the object to the given ObjectOutputStream 351 * 352 * @param buffer 353 * The stream to read our state from 354 * @return The converted Passage 355 * @throws NoSuchKeyException 356 * If the buffer is invalid 357 */ 358 static Passage fromBinaryRepresentation(byte[] buffer) throws NoSuchKeyException { 359 // AV11N(DMS): This is wrong, but toBinaryRepresentation does not write the v11n name 360 Versification rs = Versifications.instance().getVersification("KJV"); 361 int maxOrdinal = rs.maximumOrdinal(); 362 Passage ref = (Passage) keyf.createEmptyKeyList(rs); 363 364 // Some speedups 365 AbstractPassage aref = null; 366 if (ref instanceof AbstractPassage) { 367 aref = (AbstractPassage) ref; 368 aref.raiseEventSuppresion(); 369 aref.raiseNormalizeProtection(); 370 } 371 372 int[] index = new int[] { 373 0 374 }; 375 int type = fromBinary(buffer, index, AbstractPassage.METHOD_COUNT); 376 377 switch (type) { 378 case AbstractPassage.BITWISE: 379 for (int ord = 1; ord <= maxOrdinal; ord++) { 380 // Which byte should we be viewing 381 int idx0 = (ord / 8) + index[0]; 382 383 // Which bit within that byte (0-7) 384 int bit = (ord % 8) - 1; 385 386 if ((buffer[idx0] & (1 << bit)) != 0) { 387 ref.add(rs.decodeOrdinal(ord)); 388 } 389 } 390 // index gets left behind here, but we dont care 391 break; 392 393 case AbstractPassage.DISTINCT: 394 int verses = fromBinary(buffer, index, maxOrdinal); 395 for (int i = 0; i < verses; i++) { 396 int ord = fromBinary(buffer, index, maxOrdinal); 397 ref.add(rs.decodeOrdinal(ord)); 398 } 399 break; 400 401 case AbstractPassage.RANGED: 402 int ranges = fromBinary(buffer, index, maxOrdinal / 2); 403 for (int i = 0; i < ranges; i++) { 404 int ord = fromBinary(buffer, index, maxOrdinal); 405 int len = fromBinary(buffer, index, maxOrdinal); 406 ref.add(RestrictionType.NONE.toRange(rs, rs.decodeOrdinal(ord), len)); 407 } 408 break; 409 410 default: 411 throw new NoSuchKeyException(JSOtherMsg.lookupText("Unknown passage type.")); 412 } 413 414 // Some speedups 415 if (aref != null) { 416 aref.lowerEventSuppressionAndTest(); 417 aref.lowerNormalizeProtection(); 418 } 419 420 return ref; 421 } 422 423 /** 424 * Read a passage from a given stream 425 * 426 * @param in 427 * The stream to read from 428 * @return a newly built Passage 429 * @throws IOException 430 * If there was trouble reading the stream 431 * @throws NoSuchKeyException 432 * if the data was not a valid passage 433 */ 434 public static Passage readPassage(Reader in) throws IOException, NoSuchKeyException { 435 // Get any versification. It does not matter. readDescripton will overwrite it. 436 Versification rs = Versifications.instance().getVersification("KJV"); 437 Passage ref = (Passage) keyf.createEmptyKeyList(rs); 438 ref.readDescription(in); 439 return ref; 440 } 441 442 /** 443 * Write to buffer (starting at index) the given number using a set of bytes 444 * as required by the max possible value for the number 445 * 446 * @param max 447 * The number to write 448 * @return The number of bytes needed 449 */ 450 protected static int binarySize(int max) { 451 // 1 byte (2^8) 452 if (max < 256) { 453 return 1; 454 } 455 456 // 2 bytes (2^16) 457 if (max < 65536) { 458 return 2; 459 } 460 461 // 3 bytes (2^24) 462 if (max < 16777216) { 463 return 3; 464 } 465 466 // 4 bytes (2^32) 467 return 4; 468 } 469 470 /** 471 * Write to buffer (starting at index) the given number using a set of bytes 472 * as required by the max possible value for the number 473 * 474 * @param buffer 475 * Where to write to 476 * @param index 477 * The offset to start at 478 * @param number 479 * The number to write 480 * @param max 481 * The max size 482 * @return The number of bytes written 483 */ 484 protected static int toBinary(byte[] buffer, int index, int number, int max) { 485 assert number >= 0 : "No -ve output " + number; 486 assert number <= max : "number " + number + " > max " + max; 487 488 // 1 byte (2^8) 489 if (max < 256) { 490 buffer[index] = (byte) number; 491 return 1; 492 } 493 494 // 2 bytes (2^16) 495 if (max < 65536) { 496 buffer[index + 0] = (byte) (number >>> 8); 497 buffer[index + 1] = (byte) (number >>> 0); 498 return 2; 499 } 500 501 // 3 bytes (2^24) 502 if (max < 16777216) { 503 buffer[index + 0] = (byte) (number >>> 16); 504 buffer[index + 1] = (byte) (number >>> 8); 505 buffer[index + 2] = (byte) (number >>> 0); 506 return 3; 507 } 508 509 // 4 bytes (2^32) 510 buffer[index + 0] = (byte) (number >>> 24); 511 buffer[index + 1] = (byte) (number >>> 16); 512 buffer[index + 2] = (byte) (number >>> 8); 513 buffer[index + 3] = (byte) (number >>> 0); 514 return 4; 515 } 516 517 /** 518 * Read and return an int from the buffer (starting at index[0]) using a set 519 * of bytes as required by the max possible value for the number, and 520 * incrementing index[0] by that number of bytes. 521 * 522 * @param buffer 523 * The buffer to read from 524 * @param index 525 * The offset to start at 526 * @param max 527 * The max number of bytes to read 528 * @return The converted number 529 */ 530 protected static int fromBinary(byte[] buffer, int[] index, int max) { 531 // Am I naive in thinking that & 0x000000ff turns int -1 into 255?. 532 533 // 1 byte (2^8) 534 int b0 = buffer[index[0]++] & 0x000000ff; 535 if (max < 256) { 536 return b0; 537 } 538 539 // 2 bytes (2^16) 540 int b1 = buffer[index[0]++] & 0x000000ff; 541 if (max < 65536) { 542 return (b0 << 8) + (b1 << 0); 543 } 544 545 // 3 bytes (2^24) 546 int b2 = buffer[index[0]++] & 0x000000ff; 547 if (max < 16777216) { 548 return (b0 << 16) + (b1 << 8) + (b2 << 0); 549 } 550 551 // 4 bytes (2^32) 552 int b3 = buffer[index[0]++] & 0x000000ff; 553 return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0); 554 } 555 556 /** 557 * Replace spaces with semi-colons, because the parser expects them. 558 * 559 * @param passageReference 560 * @return the munged value 561 */ 562 private String mungOsisRef(String passageReference) { 563 return passageReference.replace(' ', ';'); 564 } 565 566 /** 567 * The internals of a Passage require that references are separated with a 568 * reference delimiter. However, people and other systems may not be so 569 * stringent. So we want to allow for 570 * "Ge 1:26 3:22 11:7 20:13 31:7, 53 35:7" (which is from Clarke) This 571 * should become "Ge 1:26, 3:22, 11:7, 20:13, 31:7, 53, 35:7" Basically, the 572 * rule of thumb is that if two numbers are found separated by whitespace 573 * then add a comma between them. One note $, and ff are taken to be 574 * numbers. But it is complicated by Book names that are like 1 Cor And by 575 * verse references like Gen 1.2 Gen.1.2 Gen 1 2 which are all equivalent. 576 * So we use a counter when we see a number, if the counter reaches 2 and 577 * then we see a name or a number we emit a reference delimiter. 578 * 579 * @param passageReference 580 * @return the normalized value 581 */ 582 private String normalize(String passageReference) { 583 if (passageReference == null) { 584 return null; 585 } 586 587 // Note this has a lot in common with AccuracyType.tokenize 588 int size = passageReference.length(); 589 StringBuilder buf = new StringBuilder(size * 2); 590 591 char curChar = ' '; 592 boolean isNumber = false; 593 boolean wasNumberOrMarker = false; 594 boolean isEndMarker = false; 595 boolean isNumberOrMarker = false; 596 int i = 0; 597 while (i < size) { 598 curChar = passageReference.charAt(i); 599 600 // Determine whether we are starting a number 601 isNumber = Character.isDigit(curChar); 602 isEndMarker = curChar == '$' || (curChar == 'f' && (i + 1 < size ? passageReference.charAt(i + 1) : ' ') == 'f'); 603 isNumberOrMarker = isNumber || isEndMarker; 604 // If the last thing we saw was a number and the next thing we see 605 // is another number or a word 606 // then we want to put in a ',' or a ' ' 607 if (wasNumberOrMarker) { 608 if (isNumber) { 609 buf.append(AbstractPassage.REF_PREF_DELIM); 610 } else if (isEndMarker) { 611 buf.append(VerseRange.RANGE_OSIS_DELIM); 612 } else if (Character.isLetter(curChar)) { 613 buf.append(' '); 614 } 615 616 // Having handled the condition, we now set it to false 617 wasNumberOrMarker = false; 618 } 619 620 if (isNumberOrMarker) { 621 wasNumberOrMarker = true; 622 buf.append(curChar); 623 i++; 624 625 // If it started with an 'f' it was also followed by another. 626 if (curChar == 'f') { 627 buf.append('f'); 628 i++; 629 } else if (curChar != '$') { 630 // If it wasn't an 'f' or a '$' then it was digits 631 while (i < size) { 632 curChar = passageReference.charAt(i); 633 if (!Character.isDigit(curChar)) { 634 break; 635 } 636 buf.append(curChar); 637 i++; 638 } 639 } 640 641 // skip all following whitespace, it will be added back in as 642 // needed 643 while (i < size && Character.isWhitespace(passageReference.charAt(i))) { 644 i++; 645 } 646 } else { 647 buf.append(curChar); 648 i++; 649 } 650 } 651 652 return buf.toString(); 653 } 654 655 /** 656 * The default type 657 */ 658 private static PassageType defaultType = PassageType.SPEED; 659 660 /** 661 * How we create Passages 662 */ 663 private static PassageKeyFactory keyf = new PassageKeyFactory(); 664 } 665