Coverage Report - org.crosswire.jsword.book.sword.ZVerseBackend
 
Classes in this File Line Coverage Branch Coverage Complexity
ZVerseBackend
0%
0/154
0%
0/44
5
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2005 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.jsword.book.sword;
 21  
 
 22  
 import java.io.IOException;
 23  
 import java.io.RandomAccessFile;
 24  
 
 25  
 import org.crosswire.common.compress.CompressorType;
 26  
 import org.crosswire.jsword.JSMsg;
 27  
 import org.crosswire.jsword.book.BookException;
 28  
 import org.crosswire.jsword.book.BookMetaData;
 29  
 import org.crosswire.jsword.book.sword.state.OpenFileStateManager;
 30  
 import org.crosswire.jsword.book.sword.state.ZVerseBackendState;
 31  
 import org.crosswire.jsword.passage.BitwisePassage;
 32  
 import org.crosswire.jsword.passage.Key;
 33  
 import org.crosswire.jsword.passage.KeyUtil;
 34  
 import org.crosswire.jsword.passage.RocketPassage;
 35  
 import org.crosswire.jsword.passage.Verse;
 36  
 import org.crosswire.jsword.versification.Testament;
 37  
 import org.crosswire.jsword.versification.Versification;
 38  
 import org.crosswire.jsword.versification.system.Versifications;
 39  
 import org.slf4j.Logger;
 40  
 import org.slf4j.LoggerFactory;
 41  
 
 42  
 /**
 43  
  * A backend to read compressed data verse based files. While the text file
 44  
  * contains data compressed with ZIP or LZSS, it cannot be uncompressed using a
 45  
  * stand alone zip utility, such as WinZip or gzip. The reason for this is that
 46  
  * the data file is a concatenation of blocks of compressed data.
 47  
  * 
 48  
  * <p>
 49  
  * The blocks can either be "b", book (aka testament); "c", chapter or "v",
 50  
  * verse. The choice is a matter of trade offs. The program needs to uncompress
 51  
  * a block into memory. Having it at the book level is very memory expensive.
 52  
  * Having it at the verse level is very disk expensive, but takes the least
 53  
  * amount of memory. The most common is chapter.
 54  
  * </p>
 55  
  * 
 56  
  * <p>
 57  
  * In order to find the data in the text file, we need to find the block. The
 58  
  * first index (idx) is used for this. Each verse is indexed to a tuple (block
 59  
  * number, verse start, verse size). This data allows us to find the correct
 60  
  * block, and to extract the verse from the uncompressed block, but it does not
 61  
  * help us uncompress the block.
 62  
  * </p>
 63  
  * 
 64  
  * <p>
 65  
  * Once the block is known, then the next index (comp) gives the location of the
 66  
  * compressed block, its compressed size and its uncompressed size.
 67  
  * </p>
 68  
  * 
 69  
  * <p>
 70  
  * There are 3 files for each testament, 2 (idx and comp) are indexes into the
 71  
  * third (text) which contains the data. The key into each index is the verse
 72  
  * index within that testament, which is determined by book, chapter and verse
 73  
  * of that key.
 74  
  * </p>
 75  
  * 
 76  
  * <p>
 77  
  * All unsigned numbers are stored 2-complement, little endian.
 78  
  * </p>
 79  
  * <p>
 80  
  * Then proceed as follows, at all times working on the set of files for the
 81  
  * testament in question:
 82  
  * </p>
 83  
  * 
 84  
  * The three files are laid out in the following fashion:
 85  
  * <ul>
 86  
  * <li>The idx file has one entry per verse in the versification. The number
 87  
  * of verses varies by versification and testament. Each entry describes the
 88  
  * compressed block in which it is found, the start of the verse in the
 89  
  * uncompressed block and the length of the verse.
 90  
  * <ul>
 91  
  * <li>Block number - 32-bit/4-bytes - the number of the entry in the comp file.</li>
 92  
  * <li>Verse start - 32 bit/4-bytes - the start of the verse in the uncompressed block in the dat file.</li>
 93  
  * <li>Verse length - 16 bit/4-bytes - the length of the verse in the uncompressed block from the dat file.</li>
 94  
  * </ul>
 95  
  * Algorithm:
 96  
  * <ul>
 97  
  * <li>Given the ordinal value of the verse, seek to the ordinal * 10 and read 10 bytes.
 98  
  * <li>Decode the 10 bytes as Block Number, Verse start and length</li>
 99  
  * </ul>
 100  
  * </li>
 101  
  * <li>The comp file has one entry per block.
 102  
  * Each entry describes the location of a compressed block,
 103  
  * giving its start and size in the next file.
 104  
  * <ul>
 105  
  * <li>Block Start - 32-bit/4-byte - the start of the block in the dat file</li>
 106  
  * <li>Compressed Block Size - 32-bit/4-byte - the size of the compressed block in the dat file</li>
 107  
  * <li>Uncompressed Block Size - 32-bit/4-byte - the size of the block after uncompressing</li>
 108  
  * </ul>
 109  
  * Algorithm:
 110  
  * <ul>
 111  
  * <li>Given a block number, seek to block-index * 12 and read 12 bytes</li>
 112  
  * <li>Decode the 12 bytes as Block Start, Compressed Block Size and Uncompressed Block Size</li>
 113  
  * </ul>
 114  
  * </li>
 115  
  * <li> The dat file is compressed blocks of verses.
 116  
  * <br>
 117  
  * Algorithm:
 118  
  * <ul>
 119  
  * <li>Given the entry from the comp file, seek to the start and read the indicated compressed block size</li>
 120  
  * <li>If the book is enciphered it, decipher it.</li>
 121  
  * <li>Uncompress the block, using the uncompressed size as an optimization.</li>
 122  
  * <li>Using the verse start, seek to that location in the uncompressed block and read the indicated verse size.</li>
 123  
  * <li>Convert the bytes to a String using the books indicated charset.</li>
 124  
  * </ul>
 125  
  * </li>
 126  
  * </ul>
 127  
  * 
 128  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 129  
  * @author Joe Walker
 130  
  * @author DM Smith
 131  
  */
 132  0
 public class ZVerseBackend extends AbstractBackend<ZVerseBackendState> {
 133  
     /**
 134  
      * Simple ctor
 135  
      * @param sbmd 
 136  
      * @param blockType 
 137  
      */
 138  
     public ZVerseBackend(SwordBookMetaData sbmd, BlockType blockType) {
 139  0
         super(sbmd);
 140  0
         this.blockType = blockType;
 141  0
     }
 142  
 
 143  
     /* This method assumes single keys. It is the responsibility of the caller to provide the iteration. 
 144  
      * 
 145  
      * FIXME: this could be refactored to push the iterations down, but no performance benefit would be gained since we have a manager that keeps the file accesses open
 146  
      * (non-Javadoc)
 147  
      * @see org.crosswire.jsword.book.sword.AbstractBackend#contains(org.crosswire.jsword.passage.Key)
 148  
      */
 149  
     @Override
 150  
     public boolean contains(Key key) {
 151  0
         return getRawTextLength(key) > 0;
 152  
     }
 153  
 
 154  
     /* (non-Javadoc)
 155  
      * @see org.crosswire.jsword.book.sword.AbstractBackend#size(org.crosswire.jsword.passage.Key)
 156  
      */
 157  
     @Override
 158  
     public int getRawTextLength(Key key) {
 159  0
         ZVerseBackendState rafBook = null;
 160  
         try {
 161  0
             rafBook = initState();
 162  
 
 163  0
             String v11nName = getBookMetaData().getProperty(BookMetaData.KEY_VERSIFICATION);
 164  0
             Versification v11n = Versifications.instance().getVersification(v11nName);
 165  0
             Verse verse = KeyUtil.getVerse(key);
 166  
 
 167  0
             int index = verse.getOrdinal();
 168  0
             Testament testament = v11n.getTestament(index);
 169  0
             index = v11n.getTestamentOrdinal(index);
 170  
 
 171  0
             RandomAccessFile idxRaf = rafBook.getIdxRaf(testament);
 172  
 
 173  
             // If Bible does not contain the desired testament, then false
 174  0
             if (idxRaf == null) {
 175  0
                 return 0;
 176  
             }
 177  
 
 178  
             // 10 because the index is 10 bytes long for each verse
 179  0
             byte[] temp = SwordUtil.readRAF(idxRaf, 1L * index * IDX_ENTRY_SIZE, IDX_ENTRY_SIZE);
 180  
 
 181  
             // If the Bible does not contain the desired verse, return nothing.
 182  
             // Some Bibles have different versification, so the requested verse
 183  
             // may not exist.
 184  0
             if (temp == null || temp.length == 0) {
 185  0
                 return 0;
 186  
             }
 187  
 
 188  
             // The data is little endian - extract the verseSize
 189  0
             return SwordUtil.decodeLittleEndian16(temp, 8);
 190  
 
 191  0
         } catch (IOException e) {
 192  0
             return 0;
 193  0
         } catch (BookException e) {
 194  
             // FIXME(CJB): fail silently as before, but i don't think this is
 195  
             // correct behaviour - would cause API changes
 196  0
             log.error("Unable to ascertain key validity", e);
 197  0
             return 0;
 198  
         } finally {
 199  0
             OpenFileStateManager.instance().release(rafBook);
 200  
         }
 201  
     }
 202  
 
 203  
     /* (non-Javadoc)
 204  
      * @see org.crosswire.jsword.book.sword.AbstractBackend#getGlobalKeyList()
 205  
      */
 206  
     @Override
 207  
     public Key getGlobalKeyList() throws BookException {
 208  0
         ZVerseBackendState rafBook = null;
 209  
         try {
 210  0
             rafBook = initState();
 211  
 
 212  0
             String v11nName = getBookMetaData().getProperty(BookMetaData.KEY_VERSIFICATION);
 213  0
             Versification v11n = Versifications.instance().getVersification(v11nName);
 214  
 
 215  0
             Testament[] testaments = new Testament[] {
 216  
                     Testament.OLD, Testament.NEW
 217  
             };
 218  
 
 219  0
             BitwisePassage passage = new RocketPassage(v11n);
 220  0
             passage.raiseEventSuppresion();
 221  0
             passage.raiseNormalizeProtection();
 222  
 
 223  0
             for (Testament currentTestament : testaments) {
 224  0
                 RandomAccessFile idxRaf = rafBook.getIdxRaf(currentTestament);
 225  
 
 226  
                 // If Bible does not contain the desired testament, then false
 227  0
                 if (idxRaf == null) {
 228  
                     // no keys in this testament
 229  0
                     continue;
 230  
                 }
 231  
 
 232  0
                 int maxIndex = v11n.getCount(currentTestament) - 1;
 233  
 
 234  
                 // Read in the whole index, a few hundred Kb at most.
 235  0
                 byte[] temp = SwordUtil.readRAF(idxRaf, 0, IDX_ENTRY_SIZE * maxIndex);
 236  
 
 237  
                 // For each entry of 10 bytes, the length of the verse in bytes
 238  
                 // is in the last 2 bytes. If both bytes are 0, then there is no content.
 239  0
                 for (int ii = 0; ii < temp.length; ii += IDX_ENTRY_SIZE) {
 240  
                     // This can be simplified to temp[ii + 8] == 0 && temp[ii + 9] == 0.
 241  
                     // int verseSize = SwordUtil.decodeLittleEndian16(temp, ii + 8);
 242  
                     // if (verseSize > 0) {
 243  0
                     if (temp[ii + 8] != 0 || temp[ii + 9] != 0) {
 244  0
                         int ordinal = ii / IDX_ENTRY_SIZE;
 245  0
                         passage.addVersifiedOrdinal(v11n.getOrdinal(currentTestament, ordinal));
 246  
                     }
 247  
                 }
 248  
             }
 249  
 
 250  0
             passage.lowerNormalizeProtection();
 251  0
             passage.lowerEventSuppressionAndTest();
 252  
 
 253  0
             return passage;
 254  0
         } catch (IOException e) {
 255  0
             throw new BookException(JSMsg.gettext("Unable to read key list from book."));
 256  
         } finally {
 257  0
             OpenFileStateManager.instance().release(rafBook);
 258  
         }
 259  
     }
 260  
 
 261  
     /* (non-Javadoc)
 262  
      * @see org.crosswire.jsword.book.sword.StatefulFileBackedBackend#initState()
 263  
      */
 264  
     public ZVerseBackendState initState() throws BookException {
 265  0
         return OpenFileStateManager.instance().getZVerseBackendState(getBookMetaData(), blockType);
 266  
     }
 267  
 
 268  
     /* (non-Javadoc)
 269  
      * @see org.crosswire.jsword.book.sword.StatefulFileBackedBackend#readRawContent(org.crosswire.jsword.book.sword.state.OpenFileState, org.crosswire.jsword.passage.Key)
 270  
      */
 271  
     public String readRawContent(ZVerseBackendState rafBook, Key key) throws IOException {
 272  
 
 273  0
         BookMetaData bookMetaData = getBookMetaData();
 274  0
         final String charset = bookMetaData.getBookCharset();
 275  0
         final String compressType = bookMetaData.getProperty(SwordBookMetaData.KEY_COMPRESS_TYPE);
 276  
 
 277  0
         final String v11nName = getBookMetaData().getProperty(BookMetaData.KEY_VERSIFICATION);
 278  0
         final Versification v11n = Versifications.instance().getVersification(v11nName);
 279  0
         Verse verse = KeyUtil.getVerse(key);
 280  
 
 281  0
         int index = verse.getOrdinal();
 282  0
         final Testament testament = v11n.getTestament(index);
 283  0
         index = v11n.getTestamentOrdinal(index);
 284  
         final RandomAccessFile idxRaf;
 285  
         final RandomAccessFile compRaf;
 286  
         final RandomAccessFile textRaf;
 287  
 
 288  0
         idxRaf = rafBook.getIdxRaf(testament);
 289  0
         compRaf = rafBook.getCompRaf(testament);
 290  0
         textRaf = rafBook.getTextRaf(testament);
 291  
 
 292  
         // If Bible does not contain the desired testament, return nothing.
 293  0
         if (idxRaf == null) {
 294  0
             return "";
 295  
         }
 296  
 
 297  
         //dumpIdxRaf(v11n, 0, compRaf);
 298  
         //dumpCompRaf(idxRaf);
 299  
         // 10 because the index is 10 bytes long for each verse
 300  0
         byte[] temp = SwordUtil.readRAF(idxRaf, 1L * index * IDX_ENTRY_SIZE, IDX_ENTRY_SIZE);
 301  
 
 302  
         // If the Bible does not contain the desired verse, return nothing.
 303  
         // Some Bibles have different versification, so the requested verse
 304  
         // may not exist.
 305  0
         if (temp == null || temp.length == 0) {
 306  0
             return "";
 307  
         }
 308  
 
 309  
         // The data is little endian - extract the blockNum, verseStart
 310  
         // and
 311  
         // verseSize
 312  0
         final long blockNum = SwordUtil.decodeLittleEndian32(temp, 0);
 313  0
         final int verseStart = SwordUtil.decodeLittleEndian32(temp, 4);
 314  0
         final int verseSize = SwordUtil.decodeLittleEndian16(temp, 8);
 315  
 
 316  
         // Can we get the data from the cache
 317  0
         byte[] uncompressed = null;
 318  0
         if (blockNum == rafBook.getLastBlockNum() && testament == rafBook.getLastTestament()) {
 319  0
             uncompressed = rafBook.getLastUncompressed();
 320  
         } else {
 321  
             // Then seek using this index into the idx file
 322  0
             temp = SwordUtil.readRAF(compRaf, blockNum * COMP_ENTRY_SIZE, COMP_ENTRY_SIZE);
 323  0
             if (temp == null || temp.length == 0) {
 324  0
                 return "";
 325  
             }
 326  
 
 327  0
             final int blockStart = SwordUtil.decodeLittleEndian32(temp, 0);
 328  0
             final int blockSize = SwordUtil.decodeLittleEndian32(temp, 4);
 329  0
             final int uncompressedSize = SwordUtil.decodeLittleEndian32(temp, 8);
 330  
 
 331  
             // Read from the data file.
 332  0
             final byte[] data = SwordUtil.readRAF(textRaf, blockStart, blockSize);
 333  
 
 334  0
             decipher(data);
 335  
 
 336  0
             uncompressed = CompressorType.fromString(compressType).getCompressor(data).uncompress(uncompressedSize).toByteArray();
 337  
 
 338  
             // cache the uncompressed data for next time
 339  0
             rafBook.setLastBlockNum(blockNum);
 340  0
             rafBook.setLastTestament(testament);
 341  0
             rafBook.setLastUncompressed(uncompressed);
 342  
         }
 343  
 
 344  
         // and cut out the required section.
 345  0
         final byte[] chopped = new byte[verseSize];
 346  0
         System.arraycopy(uncompressed, verseStart, chopped, 0, verseSize);
 347  
 
 348  0
         return SwordUtil.decode(key.getName(), chopped, charset);
 349  
 
 350  
     }
 351  
 
 352  
     /* (non-Javadoc)
 353  
      * @see org.crosswire.jsword.book.sword.AbstractBackend#setAliasKey(org.crosswire.jsword.passage.Key, org.crosswire.jsword.passage.Key)
 354  
      */
 355  
     public void setAliasKey(ZVerseBackendState rafBook, Key alias, Key source) throws IOException {
 356  0
         throw new UnsupportedOperationException();
 357  
     }
 358  
 
 359  
     /* (non-Javadoc)
 360  
      * @see org.crosswire.jsword.book.sword.AbstractBackend#setRawText(org.crosswire.jsword.passage.Key, java.lang.String)
 361  
      */
 362  
     public void setRawText(ZVerseBackendState rafBook, Key key, String text) throws BookException, IOException {
 363  0
         throw new UnsupportedOperationException();
 364  
     }
 365  
 
 366  
     /** 
 367  
      * Experimental code.
 368  
      * 
 369  
      * @param v11n
 370  
      * @param ordinalStart
 371  
      * @param raf
 372  
      */
 373  
     public void dumpIdxRaf(Versification v11n, int ordinalStart, RandomAccessFile raf) {
 374  0
         long end = -1;
 375  
         try {
 376  0
             end = raf.length();
 377  0
         } catch (IOException e) {
 378  
             // TODO Auto-generated catch block
 379  0
             e.printStackTrace();
 380  0
         }
 381  
 
 382  0
         int i = ordinalStart;
 383  0
         StringBuilder buf = new StringBuilder();
 384  0
         System.out.println("osisID\tblock\tstart\tsize");
 385  0
         for (long offset = 0; offset < end; offset += IDX_ENTRY_SIZE) {
 386  
             // 10 because the index is 10 bytes long for each verse
 387  0
             byte[] temp = null;
 388  
             try {
 389  0
                 temp = SwordUtil.readRAF(raf, offset, IDX_ENTRY_SIZE);
 390  0
             } catch (IOException e) {
 391  0
                 e.printStackTrace();
 392  0
             }
 393  
 
 394  
             // If the Bible does not contain the desired verse, return nothing.
 395  
             // Some Bibles have different versification, so the requested verse
 396  
             // may not exist.
 397  0
             long blockNum = -1;
 398  0
             int verseStart = -1;
 399  0
             int verseSize = -1;
 400  0
             if (temp != null && temp.length > 0) {
 401  
                 // The data is little endian - extract the blockNum, verseStart and verseSize
 402  0
                 blockNum = SwordUtil.decodeLittleEndian32(temp, 0);
 403  0
                 verseStart = SwordUtil.decodeLittleEndian32(temp, 4);
 404  0
                 verseSize = SwordUtil.decodeLittleEndian16(temp, 8);
 405  
             }
 406  0
             buf.setLength(0);
 407  0
             buf.append(v11n.decodeOrdinal(i++).getOsisID());
 408  0
             buf.append('\t');
 409  0
             buf.append(blockNum);
 410  0
             buf.append('\t');
 411  0
             buf.append(verseStart);
 412  0
             buf.append('\t');
 413  0
             buf.append(verseSize);
 414  0
             System.out.println(buf.toString());
 415  
         }
 416  0
     }
 417  
 
 418  
     /**
 419  
      * Experimental code.
 420  
      * 
 421  
      * @param raf
 422  
      */
 423  
     public void dumpCompRaf(RandomAccessFile raf) {
 424  0
         long end = -1;
 425  
         try {
 426  0
             end = raf.length();
 427  0
         } catch (IOException e) {
 428  
             // TODO Auto-generated catch block
 429  0
             e.printStackTrace();
 430  0
         }
 431  
 
 432  0
         int blockNum = 0;
 433  0
         StringBuilder buf = new StringBuilder();
 434  0
         System.out.println("block\tstart\tsize\tuncompressed");
 435  0
         for (long offset = 0; offset < end; offset += COMP_ENTRY_SIZE) {
 436  
             // 12 because the index is 12 bytes long for each verse
 437  0
             byte[] temp = null;
 438  
             try {
 439  0
                 temp = SwordUtil.readRAF(raf, offset, COMP_ENTRY_SIZE);
 440  0
             } catch (IOException e) {
 441  0
                 e.printStackTrace();
 442  0
             }
 443  
 
 444  
             // If the Bible does not contain the desired verse, return nothing.
 445  
             // Some Bibles have different versification, so the requested verse
 446  
             // may not exist.
 447  0
             int blockStart = -1;
 448  0
             int blockSize = -1;
 449  0
             int uncompressedSize = -1;
 450  0
             if (temp != null && temp.length > 0) {
 451  
                 // The data is little endian - extract the blockNum, verseStar and verseSize
 452  0
                  blockStart = SwordUtil.decodeLittleEndian32(temp, 0);
 453  0
                  blockSize = SwordUtil.decodeLittleEndian32(temp, 4);
 454  0
                  uncompressedSize = SwordUtil.decodeLittleEndian32(temp, 8);
 455  
             }
 456  0
             buf.setLength(0);
 457  0
             buf.append(blockNum);
 458  0
             buf.append('\t');
 459  0
             buf.append(blockStart);
 460  0
             buf.append('\t');
 461  0
             buf.append(blockSize);
 462  0
             buf.append('\t');
 463  0
             buf.append(uncompressedSize);
 464  0
             System.out.println(buf.toString());
 465  
         }
 466  0
     }
 467  
 
 468  
     /**
 469  
      * Whether the book is blocked by Book, Chapter or Verse.
 470  
      */
 471  
     private final BlockType blockType;
 472  
 
 473  
     /**
 474  
      * How many bytes in the idx index?
 475  
      */
 476  
     private static final int IDX_ENTRY_SIZE = 10;
 477  
 
 478  
     /**
 479  
      * How many bytes in the comp index?
 480  
      */
 481  
     private static final int COMP_ENTRY_SIZE = 12;
 482  
 
 483  
     /**
 484  
      * The log stream
 485  
      */
 486  0
     private static final Logger log = LoggerFactory.getLogger(ZVerseBackend.class);
 487  
 }