1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: ZVerseBackend.java 2230 2012-02-08 00:00:10Z dmsmith $
21   */
22  package org.crosswire.jsword.book.sword;
23  
24  import java.io.File;
25  import java.io.FileNotFoundException;
26  import java.io.IOException;
27  import java.io.RandomAccessFile;
28  import java.net.URI;
29  
30  import org.crosswire.common.activate.Activator;
31  import org.crosswire.common.activate.Lock;
32  import org.crosswire.common.compress.CompressorType;
33  import org.crosswire.common.util.FileUtil;
34  import org.crosswire.common.util.Logger;
35  import org.crosswire.common.util.NetUtil;
36  import org.crosswire.jsword.JSMsg;
37  import org.crosswire.jsword.book.BookException;
38  import org.crosswire.jsword.passage.Key;
39  import org.crosswire.jsword.passage.KeyUtil;
40  import org.crosswire.jsword.passage.Verse;
41  import org.crosswire.jsword.versification.Testament;
42  import org.crosswire.jsword.versification.Versification;
43  import org.crosswire.jsword.versification.system.Versifications;
44  
45  /**
46   * A backend to read compressed data verse based files. While the text file
47   * contains data compressed with ZIP or LZSS, it cannot be uncompressed using a
48   * stand alone zip utility, such as WinZip or gzip. The reason for this is that
49   * the data file is a concatenation of blocks of compressed data.
50   * 
51   * <p>
52   * The blocks can either be "b", book (aka testament); "c", chapter or "v",
53   * verse. The choice is a matter of trade offs. The program needs to uncompress
54   * a block into memory. Having it at the book level is very memory expensive.
55   * Having it at the verse level is very disk expensive, but takes the least
56   * amount of memory. The most common is chapter.
57   * </p>
58   * 
59   * <p>
60   * In order to find the data in the text file, we need to find the block. The
61   * first index (comp) is used for this. Each verse is indexed to a tuple (block
62   * number, verse start, verse size). This data allows us to find the correct
63   * block, and to extract the verse from the uncompressed block, but it does not
64   * help us uncompress the block.
65   * </p>
66   * 
67   * <p>
68   * Once the block is known, then the next index (idx) gives the location of the
69   * compressed block, its compressed size and its uncompressed size.
70   * </p>
71   * 
72   * <p>
73   * There are 3 files for each testament, 2 (comp and idx) are indexes into the
74   * third (text) which contains the data. The key into each index is the verse
75   * index within that testament, which is determined by book, chapter and verse
76   * of that key.
77   * </p>
78   * 
79   * <p>
80   * All numbers are stored 2-complement, little endian.
81   * </p>
82   * <p>
83   * Then proceed as follows, at all times working on the set of files for the
84   * testament in question:
85   * </p>
86   * 
87   * <pre>
88   * in the comp file, seek to the index * 10
89   * read 10 bytes.
90   * the block-index is the first 4 bytes (32-bit number)
91   * the next bytes are the verse offset and length of the uncompressed block.
92   * in the idx file seek to block-index * 12
93   * read 12 bytes
94   * the text-block-index is the first 4 bytes
95   * the data-size is the next 4 bytes
96   * the uncompressed-size is the next 4 bytes
97   * in the text file seek to the text-block-index
98   * read data-size bytes
99   * decipher them if they are encrypted
100  * unGZIP them into a byte uncompressed-size
101  * </pre>
102  * 
103  * @see gnu.lgpl.License for license details.<br>
104  *      The copyright to this program is held by it's authors.
105  * @author Joe Walker [joe at eireneh dot com]
106  */
107 public class ZVerseBackend extends AbstractBackend {
108     private static final String SUFFIX_COMP = "v";
109     private static final String SUFFIX_INDEX = "s";
110     private static final String SUFFIX_PART1 = "z";
111     private static final String SUFFIX_TEXT = "z";
112 
113     /**
114      * Simple ctor
115      */
116     public ZVerseBackend(SwordBookMetaData sbmd, BlockType blockType) {
117         super(sbmd);
118         this.blockType = blockType;
119     }
120 
121     /* (non-Javadoc)
122      * @see org.crosswire.common.activate.Activatable#activate(org.crosswire.common.activate.Lock)
123      */
124     public final void activate(Lock lock) {
125         try {
126             if (otIdxFile == null) {
127                 URI path = getExpandedDataPath();
128                 String otAllButLast = NetUtil.lengthenURI(path, File.separator + SwordConstants.FILE_OT + '.' + blockType.getIndicator() + SUFFIX_PART1).getPath();
129                 otIdxFile = new File(otAllButLast + SUFFIX_INDEX);
130                 otTextFile = new File(otAllButLast + SUFFIX_TEXT);
131                 otCompFile = new File(otAllButLast + SUFFIX_COMP);
132 
133                 String ntAllButLast = NetUtil.lengthenURI(path, File.separator + SwordConstants.FILE_NT + '.' + blockType.getIndicator() + SUFFIX_PART1).getPath();
134                 ntIdxFile = new File(ntAllButLast + SUFFIX_INDEX);
135                 ntTextFile = new File(ntAllButLast + SUFFIX_TEXT);
136                 ntCompFile = new File(ntAllButLast + SUFFIX_COMP);
137             }
138         } catch (BookException e) {
139             otIdxFile = null;
140             otTextFile = null;
141             otCompFile = null;
142 
143             ntIdxFile = null;
144             ntTextFile = null;
145             ntCompFile = null;
146 
147             return;
148         }
149 
150         if (otIdxFile.canRead()) {
151             try {
152                 otIdxRaf = new RandomAccessFile(otIdxFile, FileUtil.MODE_READ);
153                 otTextRaf = new RandomAccessFile(otTextFile, FileUtil.MODE_READ);
154                 otCompRaf = new RandomAccessFile(otCompFile, FileUtil.MODE_READ);
155             } catch (FileNotFoundException ex) {
156                 assert false : ex;
157                 log.error("Could not open OT", ex);
158                 otIdxRaf = null;
159                 otTextRaf = null;
160                 otCompRaf = null;
161             }
162         }
163 
164         if (ntIdxFile.canRead()) {
165             try {
166                 ntIdxRaf = new RandomAccessFile(ntIdxFile, FileUtil.MODE_READ);
167                 ntTextRaf = new RandomAccessFile(ntTextFile, FileUtil.MODE_READ);
168                 ntCompRaf = new RandomAccessFile(ntCompFile, FileUtil.MODE_READ);
169             } catch (FileNotFoundException ex) {
170                 assert false : ex;
171                 log.error("Could not open NT", ex);
172                 ntIdxRaf = null;
173                 ntTextRaf = null;
174                 ntCompRaf = null;
175             }
176         }
177 
178         active = true;
179     }
180 
181     /* (non-Javadoc)
182      * @see org.crosswire.common.activate.Activatable#deactivate(org.crosswire.common.activate.Lock)
183      */
184     public final void deactivate(Lock lock) {
185         if (ntIdxRaf != null) {
186             try {
187                 ntIdxRaf.close();
188                 ntTextRaf.close();
189                 ntCompRaf.close();
190             } catch (IOException ex) {
191                 log.error("failed to close nt files", ex);
192             } finally {
193                 ntIdxRaf = null;
194                 ntTextRaf = null;
195                 ntCompRaf = null;
196             }
197         }
198 
199         if (otIdxRaf != null) {
200             try {
201                 otIdxRaf.close();
202                 otTextRaf.close();
203                 otCompRaf.close();
204             } catch (IOException ex) {
205                 log.error("failed to close ot files", ex);
206             } finally {
207                 otIdxRaf = null;
208                 otTextRaf = null;
209                 otCompRaf = null;
210             }
211         }
212 
213         active = false;
214     }
215 
216     /* (non-Javadoc)
217      * @see org.crosswire.jsword.book.sword.AbstractBackend#contains(org.crosswire.jsword.passage.Key)
218      */
219     @Override
220     public boolean contains(Key key) {
221         checkActive();
222         Verse verse = KeyUtil.getVerse(key);
223 
224         try {
225             String v11nName = getBookMetaData().getProperty(ConfigEntryType.VERSIFICATION).toString();
226             Versification v11n = Versifications.instance().getVersification(v11nName);
227             int index = v11n.getOrdinal(verse);
228             Testament testament = v11n.getTestament(index);
229             index = v11n.getTestamentOrdinal(index);
230             RandomAccessFile compRaf = otCompRaf;
231             if (testament == Testament.NEW) {
232                 compRaf = ntCompRaf;
233             }
234 
235             // If Bible does not contain the desired testament, then false
236             if (compRaf == null) {
237                 return false;
238             }
239 
240             // 10 because the index is 10 bytes long for each verse
241             byte[] temp = SwordUtil.readRAF(compRaf, 1L * index * COMP_ENTRY_SIZE, COMP_ENTRY_SIZE);
242 
243             // If the Bible does not contain the desired verse, return nothing.
244             // Some Bibles have different versification, so the requested verse may not exist.
245             if (temp == null || temp.length == 0) {
246                 return false;
247             }
248 
249             // The data is little endian - extract the blockNum, verseStart and verseSize
250             int verseSize = SwordUtil.decodeLittleEndian16(temp, 8);
251 
252             return verseSize > 0;
253 
254         } catch (IOException e) {
255             return false;
256         }
257     }
258 
259     /* (non-Javadoc)
260      * @see org.crosswire.jsword.book.sword.AbstractBackend#getRawText(org.crosswire.jsword.passage.Key)
261      */
262     @Override
263     public String getRawText(Key key) throws BookException {
264         checkActive();
265 
266         SwordBookMetaData sbmd = getBookMetaData();
267         String charset = sbmd.getBookCharset();
268         String compressType = (String) sbmd.getProperty(ConfigEntryType.COMPRESS_TYPE);
269 
270         Verse verse = KeyUtil.getVerse(key);
271 
272         try {
273             String v11nName = getBookMetaData().getProperty(ConfigEntryType.VERSIFICATION).toString();
274             Versification v11n = Versifications.instance().getVersification(v11nName);
275             int index = v11n.getOrdinal(verse);
276             Testament testament = v11n.getTestament(index);
277             index = v11n.getTestamentOrdinal(index);
278             RandomAccessFile compRaf = otCompRaf;
279             RandomAccessFile idxRaf = otIdxRaf;
280             RandomAccessFile textRaf = otTextRaf;
281             if (testament == Testament.NEW) {
282                 compRaf = ntCompRaf;
283                 idxRaf = ntIdxRaf;
284                 textRaf = ntTextRaf;
285             }
286 
287             // If Bible does not contain the desired testament, return nothing.
288             if (compRaf == null) {
289                 return "";
290             }
291 
292             // 10 because the index is 10 bytes long for each verse
293             byte[] temp = SwordUtil.readRAF(compRaf, 1L * index * COMP_ENTRY_SIZE, COMP_ENTRY_SIZE);
294 
295             // If the Bible does not contain the desired verse, return nothing.
296             // Some Bibles have different versification, so the requested verse may not exist.
297             if (temp == null || temp.length == 0) {
298                 return "";
299             }
300 
301             // The data is little endian - extract the blockNum, verseStart
302             // and
303             // verseSize
304             long blockNum = SwordUtil.decodeLittleEndian32(temp, 0);
305             int verseStart = SwordUtil.decodeLittleEndian32(temp, 4);
306             int verseSize = SwordUtil.decodeLittleEndian16(temp, 8);
307 
308             // Can we get the data from the cache
309             byte[] uncompressed = null;
310             if (blockNum == lastBlockNum && testament == lastTestament) {
311                 uncompressed = lastUncompressed;
312             } else {
313                 // Then seek using this index into the idx file
314                 temp = SwordUtil.readRAF(idxRaf, blockNum * IDX_ENTRY_SIZE, IDX_ENTRY_SIZE);
315                 if (temp == null || temp.length == 0) {
316                     return "";
317                 }
318 
319                 int blockStart = SwordUtil.decodeLittleEndian32(temp, 0);
320                 int blockSize = SwordUtil.decodeLittleEndian32(temp, 4);
321                 int uncompressedSize = SwordUtil.decodeLittleEndian32(temp, 8);
322 
323                 // Read from the data file.
324                 byte[] data = SwordUtil.readRAF(textRaf, blockStart, blockSize);
325 
326                 decipher(data);
327 
328                 uncompressed = CompressorType.fromString(compressType).getCompressor(data).uncompress(uncompressedSize).toByteArray();
329 
330                 // cache the uncompressed data for next time
331                 lastBlockNum = blockNum;
332                 lastTestament = testament;
333                 lastUncompressed = uncompressed;
334             }
335 
336             // and cut out the required section.
337             byte[] chopped = new byte[verseSize];
338             System.arraycopy(uncompressed, verseStart, chopped, 0, verseSize);
339 
340             return SwordUtil.decode(key.getName(), chopped, charset);
341         } catch (IOException e) {
342             // TRANSLATOR: Common error condition: The file could not be read. There can be many reasons.
343             // {0} is a placeholder for the file.
344             throw new BookException(JSMsg.gettext("Error reading {0}", verse.getName()), e);
345         }
346     }
347 
348     /* (non-Javadoc)
349      * @see org.crosswire.jsword.book.sword.AbstractBackend#setAliasKey(org.crosswire.jsword.passage.Key, org.crosswire.jsword.passage.Key)
350      */
351     @Override
352     public void setAliasKey(Key alias, Key source) throws IOException {
353         throw new UnsupportedOperationException();
354     }
355 
356     /* (non-Javadoc)
357      * @see org.crosswire.jsword.book.sword.AbstractBackend#setRawText(org.crosswire.jsword.passage.Key, java.lang.String)
358      */
359     @Override
360     public void setRawText(Key key, String text) throws BookException, IOException {
361         throw new UnsupportedOperationException();
362     }
363 
364     /**
365      * Helper method so we can quickly activate ourselves on access
366      */
367     protected final void checkActive() {
368         if (!active) {
369             Activator.activate(this);
370         }
371     }
372 
373     /**
374      * Whether the book is blocked by Book, Chapter or Verse.
375      */
376     private BlockType blockType;
377 
378     /**
379      *
380      */
381     private Testament lastTestament;
382 
383     /**
384      *
385      */
386     private long lastBlockNum = -1;
387 
388     /**
389      *
390      */
391     private byte[] lastUncompressed;
392 
393     /**
394      * Are we active
395      */
396     private boolean active;
397 
398     /**
399      * The index random access files
400      */
401     private RandomAccessFile otIdxRaf;
402     private RandomAccessFile ntIdxRaf;
403 
404     /**
405      * The data random access files
406      */
407     private RandomAccessFile otTextRaf;
408     private RandomAccessFile ntTextRaf;
409 
410     /**
411      * The compressed random access files
412      */
413     private RandomAccessFile otCompRaf;
414     private RandomAccessFile ntCompRaf;
415 
416     /**
417      * The index random access files
418      */
419     private File otIdxFile;
420     private File ntIdxFile;
421 
422     /**
423      * The data random access files
424      */
425     private File otTextFile;
426     private File ntTextFile;
427 
428     /**
429      * The compressed random access files
430      */
431     private File otCompFile;
432     private File ntCompFile;
433 
434     /**
435      * How many bytes in the comp index?
436      */
437     private static final int COMP_ENTRY_SIZE = 10;
438 
439     /**
440      * How many bytes in the idx index?
441      */
442     private static final int IDX_ENTRY_SIZE = 12;
443 
444     /**
445      * The log stream
446      */
447     private static final Logger log = Logger.getLogger(ZVerseBackend.class);
448 }
449