1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: RawLDBackend.java 2230 2012-02-08 00:00:10Z dmsmith $
21   */
22  package org.crosswire.jsword.book.sword;
23  
24  import java.io.File;
25  import java.io.IOException;
26  import java.io.ObjectInputStream;
27  import java.io.RandomAccessFile;
28  import java.io.UnsupportedEncodingException;
29  import java.net.URI;
30  import java.text.DecimalFormat;
31  import java.text.MessageFormat;
32  import java.text.ParseException;
33  import java.util.Calendar;
34  import java.util.Date;
35  import java.util.GregorianCalendar;
36  import java.util.Locale;
37  import java.util.regex.Matcher;
38  import java.util.regex.Pattern;
39  
40  import org.crosswire.common.activate.Activator;
41  import org.crosswire.common.activate.Lock;
42  import org.crosswire.common.icu.DateFormatter;
43  import org.crosswire.common.util.FileUtil;
44  import org.crosswire.common.util.Logger;
45  import org.crosswire.common.util.Reporter;
46  import org.crosswire.common.util.StringUtil;
47  import org.crosswire.jsword.JSMsg;
48  import org.crosswire.jsword.book.BookCategory;
49  import org.crosswire.jsword.book.BookException;
50  import org.crosswire.jsword.book.FeatureType;
51  import org.crosswire.jsword.passage.DefaultLeafKeyList;
52  import org.crosswire.jsword.passage.Key;
53  
54  /**
55   * An implementation AbstractKeyBackend to read RAW format files.
56   * 
57   * @see gnu.lgpl.License for license details.<br>
58   *      The copyright to this program is held by it's authors.
59   * @author Joe Walker [joe at eireneh dot com]
60   * @author DM Smith [dmsmith555 at yahoo dot com]
61   */
62  public class RawLDBackend extends AbstractKeyBackend {
63      /**
64       * Simple ctor
65       * 
66       * @param datasize
67       *            We need to know how many bytes in the size portion of the
68       *            index
69       */
70      public RawLDBackend(SwordBookMetaData sbmd, int datasize) {
71          super(sbmd);
72          this.size = -1;
73          this.datasize = datasize;
74          this.entrysize = OFFSETSIZE + datasize;
75      }
76  
77      /*
78       * (non-Javadoc)
79       * 
80       * @see
81       * org.crosswire.jsword.book.sword.AbstractBackend#getRawText(org.crosswire
82       * .jsword.passage.Key, java.lang.String)
83       */
84      @Override
85      public String getRawText(Key key) throws BookException {
86          String result = getRawText(key.getName());
87          return result;
88      }
89  
90      public String getRawText(String key) throws BookException {
91          if (!checkActive()) {
92              return "";
93          }
94  
95          try {
96              int pos = search(key);
97              if (pos >= 0) {
98                  DataEntry entry = getEntry(key, pos);
99                  if (entry.isLinkEntry()) {
100                     return getRawText(entry.getLinkTarget());
101                 }
102                 return getRawText(entry);
103             }
104             // TRANSLATOR: Error condition: Indicates that something could not be found in the book. {0} is a placeholder for the unknown key.
105             throw new BookException(JSMsg.gettext("Key not found {0}", key));
106         } catch (IOException ex) {
107             // TRANSLATOR: Common error condition: The file could not be read. There can be many reasons.
108             // {0} is a placeholder for the file.
109             throw new BookException(JSMsg.gettext("Error reading {0}", key), ex);
110         }
111     }
112 
113     protected String getRawText(DataEntry entry) {
114         String cipherKeyString = (String) getBookMetaData().getProperty(ConfigEntryType.CIPHER_KEY);
115         try {
116             return entry.getRawText((cipherKeyString != null) ? cipherKeyString.getBytes(getBookMetaData().getBookCharset()) : null);
117         } catch (UnsupportedEncodingException e) {
118             return entry.getRawText(cipherKeyString.getBytes());
119         }
120     }
121 
122     /*
123      * (non-Javadoc)
124      * 
125      * @see org.crosswire.jsword.passage.Key#getCardinality()
126      */
127     public int getCardinality() {
128         if (!checkActive()) {
129             return 0;
130         }
131 
132         if (size == -1) {
133             try {
134                 size = (int) (idxRaf.length() / entrysize);
135             } catch (IOException e) {
136                 size = 0;
137             }
138         }
139         return size;
140     }
141 
142     /*
143      * (non-Javadoc)
144      * 
145      * @see org.crosswire.jsword.passage.Key#get(int)
146      */
147     public Key get(int index) {
148         if (checkActive()) {
149             try {
150                 if (index < getCardinality()) {
151                     DataEntry entry = getEntry(getBookMetaData().getInitials(), index);
152                     String keytitle = internal2external(entry.getKey());
153                     return new DefaultLeafKeyList(keytitle);
154                 }
155             } catch (IOException e) {
156                 // fall through
157             }
158         }
159         throw new ArrayIndexOutOfBoundsException(index);
160     }
161 
162     /*
163      * (non-Javadoc)
164      * 
165      * @see
166      * org.crosswire.jsword.passage.Key#indexOf(org.crosswire.jsword.passage
167      * .Key)
168      */
169     public int indexOf(Key that) {
170         try {
171             return search(that.getName());
172         } catch (IOException e) {
173             return -getCardinality() - 1;
174         }
175     }
176 
177     /*
178      * (non-Javadoc)
179      * 
180      * @see
181      * org.crosswire.common.activate.Activatable#activate(org.crosswire.common
182      * .activate.Lock)
183      */
184     public void activate(Lock lock) {
185         active = false;
186         size = -1;
187         idxFile = null;
188         datFile = null;
189         idxRaf = null;
190         datRaf = null;
191 
192         URI path = null;
193         try {
194             path = getExpandedDataPath();
195         } catch (BookException e) {
196             Reporter.informUser(this, e);
197             return;
198         }
199 
200         try {
201             idxFile = new File(path.getPath() + SwordConstants.EXTENSION_INDEX);
202             datFile = new File(path.getPath() + SwordConstants.EXTENSION_DATA);
203 
204             if (!idxFile.canRead()) {
205                 // TRANSLATOR: Common error condition: The file could not be read. There can be many reasons.
206                 // {0} is a placeholder for the file.
207                 Reporter.informUser(this, new BookException(JSMsg.gettext("Error reading {0}", idxFile.getAbsolutePath())));
208                 return;
209             }
210 
211             if (!datFile.canRead()) {
212                 // TRANSLATOR: Common error condition: The file could not be read. There can be many reasons.
213                 // {0} is a placeholder for the file.
214                 Reporter.informUser(this, new BookException(JSMsg.gettext("Error reading {0}", datFile.getAbsolutePath())));
215                 return;
216             }
217 
218             // Open the files
219             idxRaf = new RandomAccessFile(idxFile, FileUtil.MODE_READ);
220             datRaf = new RandomAccessFile(datFile, FileUtil.MODE_READ);
221         } catch (IOException ex) {
222             log.error("failed to open files", ex);
223             idxRaf = null;
224             datRaf = null;
225             return;
226         }
227 
228         active = true;
229     }
230 
231     /*
232      * (non-Javadoc)
233      * 
234      * @see
235      * org.crosswire.common.activate.Activatable#deactivate(org.crosswire.common
236      * .activate.Lock)
237      */
238     public void deactivate(Lock lock) {
239         size = -1;
240         try {
241             if (idxRaf != null) {
242                 idxRaf.close();
243             }
244             if (datRaf != null) {
245                 datRaf.close();
246             }
247         } catch (IOException ex) {
248             log.error("failed to close files", ex);
249         } finally {
250             idxRaf = null;
251             datRaf = null;
252         }
253 
254         active = false;
255     }
256 
257     /**
258      * Helper method so we can quickly activate ourselves on access
259      */
260     protected boolean checkActive() {
261         if (!isActive()) {
262             Activator.activate(this);
263         }
264         return isActive();
265     }
266 
267     /**
268      * Determine whether we are active.
269      */
270     protected boolean isActive() {
271         return active;
272     }
273 
274     /**
275      * Get the Index (that is offset and size) for an entry.
276      * 
277      * @param entry
278      * @return the index of the entry
279      * @throws IOException
280      */
281     private DataIndex getIndex(long entry) throws IOException {
282         // Read the offset and size for this key from the index
283         byte[] buffer = SwordUtil.readRAF(idxRaf, entry * entrysize, entrysize);
284         int entryOffset = SwordUtil.decodeLittleEndian32(buffer, 0);
285         int entrySize = -1;
286         switch (datasize) {
287         case 2:
288             entrySize = SwordUtil.decodeLittleEndian16(buffer, 4);
289             break;
290         case 4:
291             entrySize = SwordUtil.decodeLittleEndian32(buffer, 4);
292             break;
293         default:
294             assert false : datasize;
295         }
296         return new DataIndex(entryOffset, entrySize);
297     }
298 
299     /**
300      * Get the text for an indexed entry in the book.
301      * 
302      * @param index
303      *            the entry to get
304      * @return the text for the entry.
305      * @throws IOException
306      */
307     private DataEntry getEntry(String reply, int index) throws IOException {
308         DataIndex dataIndex = getIndex(index);
309         // Now read the data file for this key using the offset and size
310         byte[] data = SwordUtil.readRAF(datRaf, dataIndex.getOffset(), dataIndex.getSize());
311 
312         return new DataEntry(reply, data, getBookMetaData().getBookCharset());
313     }
314 
315     /**
316      * Find a matching entry, returning it's index. Otherwise return < 0, such
317      * that (-pos - 1) gives the insertion index.
318      * 
319      * @param key
320      * @return the match
321      * @throws IOException
322      */
323     private int search(String key) throws IOException {
324         if (!checkActive()) {
325             return -1;
326         }
327 
328         String target = external2internal(key);
329 
330         // Initialize to one beyond both ends.
331         int total = getCardinality();
332         // Note: In some dictionaries, the first element is out of order and
333         // represents the title of the work.
334         // So, do the bin search from 1 to end and if not found, check the first
335         // element as a special case.
336         // If that does not match return the position found otherwise.
337         int low = 0;
338         int high = total;
339         int match = -1;
340 
341         while (high - low > 1) {
342             // use >>> to keep mid always in range
343             int mid = (low + high) >>> 1;
344 
345             // Get the key for the item at "mid"
346             int cmp = normalizeForSearch(getEntry(key, mid).getKey()).compareTo(target);
347             if (cmp < 0) {
348                 low = mid;
349             } else if (cmp > 0) {
350                 high = mid;
351             } else {
352                 match = mid;
353                 break;
354             }
355         }
356 
357         // Do we have an exact match?
358         if (match >= 0) {
359             return match;
360         }
361 
362         // Strong's Greek And Hebrew dictionaries have an introductory entry, so
363         // check it for a match.
364         if (normalizeForSearch(getEntry(key, 0).getKey()).compareTo(target) == 0) {
365             return 0;
366         }
367 
368         return -(high + 1);
369     }
370 
371     /**
372      * Convert the supplied key to something that can be understood by the
373      * module.
374      * 
375      * @param externalKey
376      * @return the internal representation of the key.
377      */
378     private String external2internal(String externalKey) {
379         SwordBookMetaData bmd = getBookMetaData();
380         String keytitle = externalKey;
381         if (BookCategory.DAILY_DEVOTIONS.equals(bmd.getBookCategory())) {
382             Calendar greg = new GregorianCalendar();
383             DateFormatter nameDF = DateFormatter.getDateInstance();
384             nameDF.setLenient(true);
385             try {
386                 Date date = nameDF.parse(keytitle);
387                 greg.setTime(date);
388                 Object[] objs = {
389                         Integer.valueOf(1 + greg.get(Calendar.MONTH)), Integer.valueOf(greg.get(Calendar.DATE))
390                 };
391                 return DATE_KEY_FORMAT.format(objs);
392             } catch (ParseException e) {
393                 assert false : e;
394             }
395         } else if (bmd.hasFeature(FeatureType.GREEK_DEFINITIONS) || bmd.hasFeature(FeatureType.HEBREW_DEFINITIONS)) {
396             // Is the string valid?
397             Matcher m = STRONGS_PATTERN.matcher(keytitle);
398             if (!m.matches()) {
399                 return keytitle.toUpperCase(Locale.US);
400             }
401 
402             // NASB has trailing letters!
403             int pos = keytitle.length() - 1;
404             char lastLetter = keytitle.charAt(pos);
405             boolean hasTrailingLetter = Character.isLetter(lastLetter);
406             if (hasTrailingLetter) {
407                 keytitle = keytitle.substring(0, pos);
408                 // And it might be preceded by a !
409                 pos--;
410                 if (pos > 0 && keytitle.charAt(pos) == '!') {
411                     keytitle = keytitle.substring(0, pos);
412                 }
413             }
414 
415             // Get the G or the H.
416             char type = keytitle.charAt(0);
417 
418             // Get the number after the G or H
419             int strongsNumber = Integer.parseInt(keytitle.substring(1));
420             if (bmd.hasFeature(FeatureType.GREEK_DEFINITIONS) && bmd.hasFeature(FeatureType.HEBREW_DEFINITIONS)) {
421                 // The convention is that a Strong's dictionary with both Greek
422                 // and Hebrew have G or H prefix
423                 StringBuilder buf = new StringBuilder();
424                 buf.append(Character.toUpperCase(type));
425                 buf.append(ZERO_4PAD.format(strongsNumber));
426 
427                 // The NAS lexicon has some entries that end in A-Z, but it is
428                 // not preceded by a !
429                 if (hasTrailingLetter && "naslex".equalsIgnoreCase(bmd.getInitials()))
430                 {
431                     buf.append(Character.toUpperCase(lastLetter));
432                 }
433                 return buf.toString();
434             }
435 
436             return ZERO_5PAD.format(strongsNumber);
437         } else {
438             return keytitle.toUpperCase(Locale.US);
439         }
440 
441         return keytitle;
442     }
443 
444     private String internal2external(String internalKey) {
445         SwordBookMetaData bmd = getBookMetaData();
446         String keytitle = internalKey;
447         if (BookCategory.DAILY_DEVOTIONS.equals(bmd.getBookCategory()) && keytitle.length() >= 3) {
448             Calendar greg = new GregorianCalendar();
449             DateFormatter nameDF = DateFormatter.getDateInstance();
450             String[] spec = StringUtil.splitAll(keytitle, '.');
451             greg.set(Calendar.MONTH, Integer.parseInt(spec[0]) - 1);
452             greg.set(Calendar.DATE, Integer.parseInt(spec[1]));
453             keytitle = nameDF.format(greg.getTime());
454         }
455         return keytitle;
456     }
457 
458     private String normalizeForSearch(String internalKey) {
459         SwordBookMetaData bmd = getBookMetaData();
460         String keytitle = internalKey;
461         if (!BookCategory.DAILY_DEVOTIONS.equals(bmd.getBookCategory())) {
462             return keytitle.toUpperCase(Locale.US);
463         }
464 
465         return keytitle;
466     }
467 
468     /**
469      * Serialization support.
470      * 
471      * @param is
472      * @throws IOException
473      * @throws ClassNotFoundException
474      */
475     private void readObject(ObjectInputStream is) throws IOException, ClassNotFoundException {
476         active = false;
477         size = -1;
478         idxFile = null;
479         datFile = null;
480         idxRaf = null;
481         datRaf = null;
482         is.defaultReadObject();
483     }
484 
485     /**
486      * How many bytes in the offset pointers in the index
487      */
488     private static final int OFFSETSIZE = 4;
489 
490     /**
491      * Flags whether there are open files or not
492      */
493     private transient boolean active;
494 
495     /**
496      * The number of bytes in the size count in the index
497      */
498     private int datasize;
499 
500     /**
501      * The number of bytes for each entry in the index: either 6 or 8
502      */
503     private int entrysize;
504 
505     /**
506      * The number of entries in the book.
507      */
508     private transient int size;
509 
510     /**
511      * The index file
512      */
513     private transient File idxFile;
514 
515     /**
516      * The index random access file
517      */
518     private transient RandomAccessFile idxRaf;
519 
520     /**
521      * The data file
522      */
523     private transient File datFile;
524 
525     /**
526      * The data random access file
527      */
528     private transient RandomAccessFile datRaf;
529 
530     /**
531      * Date formatter
532      */
533     private static final MessageFormat DATE_KEY_FORMAT = new MessageFormat("{0,number,00}.{1,number,00}");
534 
535     /**
536      * This is the pattern of a Strong's Number. It begins with a G or H. Is
537      * followed by a number. It can be followed by a ! and a letter or just a
538      * letter.
539      */
540     private static final Pattern STRONGS_PATTERN = Pattern.compile("^([GH])(\\d+)((!)?([a-z])?)$");
541 
542     /**
543      * A means to normalize Strong's Numbers.
544      */
545     private static final DecimalFormat ZERO_5PAD = new DecimalFormat("00000");
546 
547     private static final DecimalFormat ZERO_4PAD = new DecimalFormat("0000");
548 
549     /**
550      * Serialization ID
551      */
552     private static final long serialVersionUID = 818089833394450383L;
553 
554     /**
555      * The log stream
556      */
557     private static final Logger log = Logger.getLogger(RawLDBackend.class);
558 }
559