| ZVerseBackend.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2005
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: ZVerseBackend.java 2230 2012-02-08 00:00:10Z dmsmith $
21 */
22 package org.crosswire.jsword.book.sword;
23
24 import java.io.File;
25 import java.io.FileNotFoundException;
26 import java.io.IOException;
27 import java.io.RandomAccessFile;
28 import java.net.URI;
29
30 import org.crosswire.common.activate.Activator;
31 import org.crosswire.common.activate.Lock;
32 import org.crosswire.common.compress.CompressorType;
33 import org.crosswire.common.util.FileUtil;
34 import org.crosswire.common.util.Logger;
35 import org.crosswire.common.util.NetUtil;
36 import org.crosswire.jsword.JSMsg;
37 import org.crosswire.jsword.book.BookException;
38 import org.crosswire.jsword.passage.Key;
39 import org.crosswire.jsword.passage.KeyUtil;
40 import org.crosswire.jsword.passage.Verse;
41 import org.crosswire.jsword.versification.Testament;
42 import org.crosswire.jsword.versification.Versification;
43 import org.crosswire.jsword.versification.system.Versifications;
44
45 /**
46 * A backend to read compressed data verse based files. While the text file
47 * contains data compressed with ZIP or LZSS, it cannot be uncompressed using a
48 * stand alone zip utility, such as WinZip or gzip. The reason for this is that
49 * the data file is a concatenation of blocks of compressed data.
50 *
51 * <p>
52 * The blocks can either be "b", book (aka testament); "c", chapter or "v",
53 * verse. The choice is a matter of trade offs. The program needs to uncompress
54 * a block into memory. Having it at the book level is very memory expensive.
55 * Having it at the verse level is very disk expensive, but takes the least
56 * amount of memory. The most common is chapter.
57 * </p>
58 *
59 * <p>
60 * In order to find the data in the text file, we need to find the block. The
61 * first index (comp) is used for this. Each verse is indexed to a tuple (block
62 * number, verse start, verse size). This data allows us to find the correct
63 * block, and to extract the verse from the uncompressed block, but it does not
64 * help us uncompress the block.
65 * </p>
66 *
67 * <p>
68 * Once the block is known, then the next index (idx) gives the location of the
69 * compressed block, its compressed size and its uncompressed size.
70 * </p>
71 *
72 * <p>
73 * There are 3 files for each testament, 2 (comp and idx) are indexes into the
74 * third (text) which contains the data. The key into each index is the verse
75 * index within that testament, which is determined by book, chapter and verse
76 * of that key.
77 * </p>
78 *
79 * <p>
80 * All numbers are stored 2-complement, little endian.
81 * </p>
82 * <p>
83 * Then proceed as follows, at all times working on the set of files for the
84 * testament in question:
85 * </p>
86 *
87 * <pre>
88 * in the comp file, seek to the index * 10
89 * read 10 bytes.
90 * the block-index is the first 4 bytes (32-bit number)
91 * the next bytes are the verse offset and length of the uncompressed block.
92 * in the idx file seek to block-index * 12
93 * read 12 bytes
94 * the text-block-index is the first 4 bytes
95 * the data-size is the next 4 bytes
96 * the uncompressed-size is the next 4 bytes
97 * in the text file seek to the text-block-index
98 * read data-size bytes
99 * decipher them if they are encrypted
100 * unGZIP them into a byte uncompressed-size
101 * </pre>
102 *
103 * @see gnu.lgpl.License for license details.<br>
104 * The copyright to this program is held by it's authors.
105 * @author Joe Walker [joe at eireneh dot com]
106 */
107 public class ZVerseBackend extends AbstractBackend {
108 private static final String SUFFIX_COMP = "v";
109 private static final String SUFFIX_INDEX = "s";
110 private static final String SUFFIX_PART1 = "z";
111 private static final String SUFFIX_TEXT = "z";
112
113 /**
114 * Simple ctor
115 */
116 public ZVerseBackend(SwordBookMetaData sbmd, BlockType blockType) {
117 super(sbmd);
118 this.blockType = blockType;
119 }
120
121 /* (non-Javadoc)
122 * @see org.crosswire.common.activate.Activatable#activate(org.crosswire.common.activate.Lock)
123 */
124 public final void activate(Lock lock) {
125 try {
126 if (otIdxFile == null) {
127 URI path = getExpandedDataPath();
128 String otAllButLast = NetUtil.lengthenURI(path, File.separator + SwordConstants.FILE_OT + '.' + blockType.getIndicator() + SUFFIX_PART1).getPath();
129 otIdxFile = new File(otAllButLast + SUFFIX_INDEX);
130 otTextFile = new File(otAllButLast + SUFFIX_TEXT);
131 otCompFile = new File(otAllButLast + SUFFIX_COMP);
132
133 String ntAllButLast = NetUtil.lengthenURI(path, File.separator + SwordConstants.FILE_NT + '.' + blockType.getIndicator() + SUFFIX_PART1).getPath();
134 ntIdxFile = new File(ntAllButLast + SUFFIX_INDEX);
135 ntTextFile = new File(ntAllButLast + SUFFIX_TEXT);
136 ntCompFile = new File(ntAllButLast + SUFFIX_COMP);
137 }
138 } catch (BookException e) {
139 otIdxFile = null;
140 otTextFile = null;
141 otCompFile = null;
142
143 ntIdxFile = null;
144 ntTextFile = null;
145 ntCompFile = null;
146
147 return;
148 }
149
150 if (otIdxFile.canRead()) {
151 try {
152 otIdxRaf = new RandomAccessFile(otIdxFile, FileUtil.MODE_READ);
153 otTextRaf = new RandomAccessFile(otTextFile, FileUtil.MODE_READ);
154 otCompRaf = new RandomAccessFile(otCompFile, FileUtil.MODE_READ);
155 } catch (FileNotFoundException ex) {
156 assert false : ex;
157 log.error("Could not open OT", ex);
158 otIdxRaf = null;
159 otTextRaf = null;
160 otCompRaf = null;
161 }
162 }
163
164 if (ntIdxFile.canRead()) {
165 try {
166 ntIdxRaf = new RandomAccessFile(ntIdxFile, FileUtil.MODE_READ);
167 ntTextRaf = new RandomAccessFile(ntTextFile, FileUtil.MODE_READ);
168 ntCompRaf = new RandomAccessFile(ntCompFile, FileUtil.MODE_READ);
169 } catch (FileNotFoundException ex) {
170 assert false : ex;
171 log.error("Could not open NT", ex);
172 ntIdxRaf = null;
173 ntTextRaf = null;
174 ntCompRaf = null;
175 }
176 }
177
178 active = true;
179 }
180
181 /* (non-Javadoc)
182 * @see org.crosswire.common.activate.Activatable#deactivate(org.crosswire.common.activate.Lock)
183 */
184 public final void deactivate(Lock lock) {
185 if (ntIdxRaf != null) {
186 try {
187 ntIdxRaf.close();
188 ntTextRaf.close();
189 ntCompRaf.close();
190 } catch (IOException ex) {
191 log.error("failed to close nt files", ex);
192 } finally {
193 ntIdxRaf = null;
194 ntTextRaf = null;
195 ntCompRaf = null;
196 }
197 }
198
199 if (otIdxRaf != null) {
200 try {
201 otIdxRaf.close();
202 otTextRaf.close();
203 otCompRaf.close();
204 } catch (IOException ex) {
205 log.error("failed to close ot files", ex);
206 } finally {
207 otIdxRaf = null;
208 otTextRaf = null;
209 otCompRaf = null;
210 }
211 }
212
213 active = false;
214 }
215
216 /* (non-Javadoc)
217 * @see org.crosswire.jsword.book.sword.AbstractBackend#contains(org.crosswire.jsword.passage.Key)
218 */
219 @Override
220 public boolean contains(Key key) {
221 checkActive();
222 Verse verse = KeyUtil.getVerse(key);
223
224 try {
225 String v11nName = getBookMetaData().getProperty(ConfigEntryType.VERSIFICATION).toString();
226 Versification v11n = Versifications.instance().getVersification(v11nName);
227 int index = v11n.getOrdinal(verse);
228 Testament testament = v11n.getTestament(index);
229 index = v11n.getTestamentOrdinal(index);
230 RandomAccessFile compRaf = otCompRaf;
231 if (testament == Testament.NEW) {
232 compRaf = ntCompRaf;
233 }
234
235 // If Bible does not contain the desired testament, then false
236 if (compRaf == null) {
237 return false;
238 }
239
240 // 10 because the index is 10 bytes long for each verse
241 byte[] temp = SwordUtil.readRAF(compRaf, 1L * index * COMP_ENTRY_SIZE, COMP_ENTRY_SIZE);
242
243 // If the Bible does not contain the desired verse, return nothing.
244 // Some Bibles have different versification, so the requested verse may not exist.
245 if (temp == null || temp.length == 0) {
246 return false;
247 }
248
249 // The data is little endian - extract the blockNum, verseStart and verseSize
250 int verseSize = SwordUtil.decodeLittleEndian16(temp, 8);
251
252 return verseSize > 0;
253
254 } catch (IOException e) {
255 return false;
256 }
257 }
258
259 /* (non-Javadoc)
260 * @see org.crosswire.jsword.book.sword.AbstractBackend#getRawText(org.crosswire.jsword.passage.Key)
261 */
262 @Override
263 public String getRawText(Key key) throws BookException {
264 checkActive();
265
266 SwordBookMetaData sbmd = getBookMetaData();
267 String charset = sbmd.getBookCharset();
268 String compressType = (String) sbmd.getProperty(ConfigEntryType.COMPRESS_TYPE);
269
270 Verse verse = KeyUtil.getVerse(key);
271
272 try {
273 String v11nName = getBookMetaData().getProperty(ConfigEntryType.VERSIFICATION).toString();
274 Versification v11n = Versifications.instance().getVersification(v11nName);
275 int index = v11n.getOrdinal(verse);
276 Testament testament = v11n.getTestament(index);
277 index = v11n.getTestamentOrdinal(index);
278 RandomAccessFile compRaf = otCompRaf;
279 RandomAccessFile idxRaf = otIdxRaf;
280 RandomAccessFile textRaf = otTextRaf;
281 if (testament == Testament.NEW) {
282 compRaf = ntCompRaf;
283 idxRaf = ntIdxRaf;
284 textRaf = ntTextRaf;
285 }
286
287 // If Bible does not contain the desired testament, return nothing.
288 if (compRaf == null) {
289 return "";
290 }
291
292 // 10 because the index is 10 bytes long for each verse
293 byte[] temp = SwordUtil.readRAF(compRaf, 1L * index * COMP_ENTRY_SIZE, COMP_ENTRY_SIZE);
294
295 // If the Bible does not contain the desired verse, return nothing.
296 // Some Bibles have different versification, so the requested verse may not exist.
297 if (temp == null || temp.length == 0) {
298 return "";
299 }
300
301 // The data is little endian - extract the blockNum, verseStart
302 // and
303 // verseSize
304 long blockNum = SwordUtil.decodeLittleEndian32(temp, 0);
305 int verseStart = SwordUtil.decodeLittleEndian32(temp, 4);
306 int verseSize = SwordUtil.decodeLittleEndian16(temp, 8);
307
308 // Can we get the data from the cache
309 byte[] uncompressed = null;
310 if (blockNum == lastBlockNum && testament == lastTestament) {
311 uncompressed = lastUncompressed;
312 } else {
313 // Then seek using this index into the idx file
314 temp = SwordUtil.readRAF(idxRaf, blockNum * IDX_ENTRY_SIZE, IDX_ENTRY_SIZE);
315 if (temp == null || temp.length == 0) {
316 return "";
317 }
318
319 int blockStart = SwordUtil.decodeLittleEndian32(temp, 0);
320 int blockSize = SwordUtil.decodeLittleEndian32(temp, 4);
321 int uncompressedSize = SwordUtil.decodeLittleEndian32(temp, 8);
322
323 // Read from the data file.
324 byte[] data = SwordUtil.readRAF(textRaf, blockStart, blockSize);
325
326 decipher(data);
327
328 uncompressed = CompressorType.fromString(compressType).getCompressor(data).uncompress(uncompressedSize).toByteArray();
329
330 // cache the uncompressed data for next time
331 lastBlockNum = blockNum;
332 lastTestament = testament;
333 lastUncompressed = uncompressed;
334 }
335
336 // and cut out the required section.
337 byte[] chopped = new byte[verseSize];
338 System.arraycopy(uncompressed, verseStart, chopped, 0, verseSize);
339
340 return SwordUtil.decode(key.getName(), chopped, charset);
341 } catch (IOException e) {
342 // TRANSLATOR: Common error condition: The file could not be read. There can be many reasons.
343 // {0} is a placeholder for the file.
344 throw new BookException(JSMsg.gettext("Error reading {0}", verse.getName()), e);
345 }
346 }
347
348 /* (non-Javadoc)
349 * @see org.crosswire.jsword.book.sword.AbstractBackend#setAliasKey(org.crosswire.jsword.passage.Key, org.crosswire.jsword.passage.Key)
350 */
351 @Override
352 public void setAliasKey(Key alias, Key source) throws IOException {
353 throw new UnsupportedOperationException();
354 }
355
356 /* (non-Javadoc)
357 * @see org.crosswire.jsword.book.sword.AbstractBackend#setRawText(org.crosswire.jsword.passage.Key, java.lang.String)
358 */
359 @Override
360 public void setRawText(Key key, String text) throws BookException, IOException {
361 throw new UnsupportedOperationException();
362 }
363
364 /**
365 * Helper method so we can quickly activate ourselves on access
366 */
367 protected final void checkActive() {
368 if (!active) {
369 Activator.activate(this);
370 }
371 }
372
373 /**
374 * Whether the book is blocked by Book, Chapter or Verse.
375 */
376 private BlockType blockType;
377
378 /**
379 *
380 */
381 private Testament lastTestament;
382
383 /**
384 *
385 */
386 private long lastBlockNum = -1;
387
388 /**
389 *
390 */
391 private byte[] lastUncompressed;
392
393 /**
394 * Are we active
395 */
396 private boolean active;
397
398 /**
399 * The index random access files
400 */
401 private RandomAccessFile otIdxRaf;
402 private RandomAccessFile ntIdxRaf;
403
404 /**
405 * The data random access files
406 */
407 private RandomAccessFile otTextRaf;
408 private RandomAccessFile ntTextRaf;
409
410 /**
411 * The compressed random access files
412 */
413 private RandomAccessFile otCompRaf;
414 private RandomAccessFile ntCompRaf;
415
416 /**
417 * The index random access files
418 */
419 private File otIdxFile;
420 private File ntIdxFile;
421
422 /**
423 * The data random access files
424 */
425 private File otTextFile;
426 private File ntTextFile;
427
428 /**
429 * The compressed random access files
430 */
431 private File otCompFile;
432 private File ntCompFile;
433
434 /**
435 * How many bytes in the comp index?
436 */
437 private static final int COMP_ENTRY_SIZE = 10;
438
439 /**
440 * How many bytes in the idx index?
441 */
442 private static final int IDX_ENTRY_SIZE = 12;
443
444 /**
445 * The log stream
446 */
447 private static final Logger log = Logger.getLogger(ZVerseBackend.class);
448 }
449