1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2005 - 2016
18   *
19   */
20  package org.crosswire.jsword.book;
21  
22  import java.util.ArrayList;
23  import java.util.Iterator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.TreeMap;
27  
28  import org.crosswire.common.diff.Diff;
29  import org.crosswire.common.diff.DiffCleanup;
30  import org.crosswire.common.diff.Difference;
31  import org.crosswire.common.util.Language;
32  import org.crosswire.common.xml.JDOMSAXEventProvider;
33  import org.crosswire.common.xml.SAXEventProvider;
34  import org.crosswire.jsword.passage.Key;
35  import org.crosswire.jsword.passage.KeyUtil;
36  import org.crosswire.jsword.passage.Passage;
37  import org.crosswire.jsword.passage.RestrictionType;
38  import org.crosswire.jsword.passage.Verse;
39  import org.crosswire.jsword.versification.Versification;
40  import org.crosswire.jsword.versification.VersificationsMapper;
41  import org.crosswire.jsword.versification.system.Versifications;
42  import org.jdom2.Content;
43  import org.jdom2.Document;
44  import org.jdom2.Element;
45  import org.jdom2.Namespace;
46  import org.jdom2.Text;
47  
48  /**
49   * BookData is the assembler of the OSIS that is returned by the filters. As
50   * such it puts that into an OSIS document. When several books are supplied, it
51   * gets the data from each and puts it into a parallel or interlinear view.
52   * Note: it is critical that all the books are able to understand the same key.
53   * That does not mean that each has to have content for each key. Missing keys
54   * are represented by empty cells.
55   *
56   * @author Joe Walker
57   * @author DM Smith
58   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
59   */
60  public class BookData implements BookProvider {
61      /**
62       * Create a BookData.
63       * 
64       * @param book the Book to which the data belongs
65       * @param key the Key specifying the data
66       */
67      public BookData(Book book, Key key) {
68          assert book != null;
69          assert key != null;
70  
71          this.key = key;
72  
73          books = new Book[1];
74          books[0] = book;
75      }
76  
77      /**
78       * Create BookData for multiple books.
79       * 
80       * @param books the set of Books to which the data belongs
81       * @param key the Key specifying the data
82       * @param compare when true each pair of adjacent books is to be compared
83       */
84      public BookData(Book[] books, Key key, boolean compare) {
85          assert books != null && books.length > 0;
86          assert key != null;
87  
88          this.books = books.clone();
89          this.key = key;
90          this.comparingBooks = compare;
91      }
92  
93      /**
94       * Accessor for the root OSIS element
95       * 
96       * @return the root of the OSIS document representing this data
97       * @throws BookException if there is any problem with this request
98       */
99      public Element getOsis() throws BookException {
100         if (osis == null) {
101             // TODO(DMS): Determine the proper representation of the OSISWork
102             // name for multiple books.
103             osis = OSISUtil.createOsisFramework(getFirstBook().getBookMetaData());
104             Element text = osis.getChild(OSISUtil.OSIS_ELEMENT_OSISTEXT);
105             Element div = getOsisFragment();
106             text.addContent(div);
107         }
108 
109         return osis;
110     }
111 
112     /**
113      * Accessor for the requested data in OSIS format.
114      * 
115      * @return the fragment of the OSIS document representing this data
116      * @throws BookException if there is any problem with this request
117      */
118     public Element getOsisFragment() throws BookException {
119         if (fragment == null) {
120             fragment = getOsisContent(true);
121         }
122 
123         return fragment;
124     }
125 
126     /**
127      * Accessor for the root OSIS element
128      * 
129      * @param allowGenTitles whether to generate titles
130      * @return the root of the document
131      * @throws BookException if there is any problem with this request
132      */
133     public Element getOsisFragment(boolean allowGenTitles) throws BookException {
134         if (fragment == null) {
135             fragment = getOsisContent(allowGenTitles);
136         }
137 
138         return fragment;
139     }
140 
141     /**
142      * Output the current data as a SAX stream.
143      *
144      * @return A way of posting SAX events
145      * @throws BookException if there is any problem with this request
146      */
147     public SAXEventProvider getSAXEventProvider() throws BookException {
148         // If the fragment is already in a document, then use that.
149         Element frag = getOsisFragment();
150         Document doc = frag.getDocument();
151         if (doc == null) {
152             doc = new Document(frag);
153         }
154         return new JDOMSAXEventProvider(doc);
155     }
156 
157     /**
158      * Who created this data.
159      *
160      * @return Returns the book.
161      */
162     public Book[] getBooks() {
163         return books == null ? null : (Book[]) books.clone();
164     }
165 
166     /**
167      * Get the first book.
168      * 
169      * @return the first or only book
170      */
171     public Book getFirstBook() {
172         return books != null && books.length > 0 ? books[0] : null;
173     }
174 
175     /**
176      * The key used to obtain data from one or more books.
177      *
178      * @return Returns the key.
179      */
180     public Key getKey() {
181         return key;
182     }
183 
184     /**
185      * @return whether the books should be compared.
186      */
187     public boolean isComparingBooks() {
188         return comparingBooks;
189     }
190 
191     private Element getOsisContent(boolean allowGenTitles) throws BookException {
192         Element div = OSISUtil.factory().createDiv();
193 
194         if (books.length == 1) {
195             Iterator<Content> iter = books[0].getOsisIterator(key, false, allowGenTitles);
196             while (iter.hasNext()) {
197                 Content content = iter.next();
198                 div.addContent(content);
199             }
200         } else {
201             Element table = OSISUtil.factory().createTable();
202             Element row = OSISUtil.factory().createRow();
203             Element cell = OSISUtil.factory().createCell();
204 
205             table.addContent(row);
206 
207             Iterator<Content>[] iters = new Iterator[books.length];
208             Passage[] passages = new Passage[books.length];
209             boolean[] showDiffs = new boolean[books.length - 1];
210             boolean doDiffs = false;
211 
212             //iterate through a first time mapping out our data. This enables us to detect a difference in number
213             //of ranges later on and flag it to the user...
214             boolean[] ommittedVerses = new boolean[books.length];
215             int numRangesInMasterPassage = 0;
216             for (int i = 0; i < books.length; i++) {
217                 //although the osis iterator now caters for keys in different versifications
218                 //we are going to want to analyse the resulting key, so let's do the conversion up-front
219                 passages[i] = VersificationsMapper.instance().map(KeyUtil.getPassage(key), getVersification(i));
220 
221                 //iterator takes care of versification differences here...
222                 iters[i] = books[i].getOsisIterator(passages[i], true, true);
223 
224                 if (i == 0) {
225                     //we never omit a verse for the first passage, since we're going to output everything based on that.
226                     ommittedVerses[i] = false;
227                     numRangesInMasterPassage = passages[i].countRanges(RestrictionType.NONE);
228                 } else {
229                     // basically, if we end up with more ranges than we started with, then we're omitting a verse
230                     //somewhere along the lines.
231                     ommittedVerses[i] = passages[i].countRanges(RestrictionType.NONE) > numRangesInMasterPassage;
232                 }
233             }
234 
235 
236             //now read the content and map it out
237             BookVerseContent[] booksContents = new BookVerseContent[books.length];
238             for (int i = 0; i < books.length; i++) {
239                 doDiffs |= addHeaderAndSetShowDiffsState(row, showDiffs, i, ommittedVerses[i]);
240                 booksContents[i] = keyIteratorContentByVerse(
241                         getVersification(i),
242                         iters[i]);
243             }
244 
245             int cellCount = 0;
246             int rowCount = 0;
247 
248             //we iterate through the first book's contents, and match the verses from all the other ones
249             for (Map.Entry<Verse, List<Content>> verseContent : booksContents[0].entrySet()) {
250                 cellCount = 0;
251                 row = OSISUtil.factory().createRow();
252                 String firstText = "";
253 
254                 for (int i = 0; i < books.length; i++) {
255                     Book book = books[i];
256                     cell = OSISUtil.factory().createCell();
257                     Language lang = book.getLanguage();
258                     if (lang != null) {
259                         cell.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang.getCode(), Namespace.XML_NAMESPACE);
260                     }
261 
262                     row.addContent(cell);
263 
264                     StringBuilder newText = new StringBuilder(doDiffs ? 32 : 0);
265 
266                     //get the contents from the mapped verse - key might be null if we had content outside of a verse.
267                     //might be a no-op if it's in the same versification.
268                     Key verseInRelavantBookContents = VersificationsMapper.instance().mapVerse(verseContent.getKey(), getVersification(i));
269 
270                     //key might have several child keys, ie. a verse mapping to a range, or list of verses
271                     Passage passageOfInterest = KeyUtil.getPassage(verseInRelavantBookContents);
272                     Iterator<Key> passageKeys = passageOfInterest.iterator();
273                     while (passageKeys.hasNext()) {
274                         Key singleKey = passageKeys.next();
275                         //TODO(CJB): for performance, we probably want to avoid the instanceof, so either change the
276                         //method signature, or cast directly and be optimistic
277                         if (!(singleKey instanceof Verse)) {
278                             throw new UnsupportedOperationException("Iterating through a passage gives non-verses");
279                         }
280 
281                         List<Content> xmlContent = booksContents[i].get(singleKey);
282 
283                         //if the book simply did not contain that reference (say Greek book, with Gen.1 as a reference)
284                         //then we end up with a key that doesn't exist in the map. Therefore, we need to cope for this.
285                         if (xmlContent == null) {
286                             xmlContent = new ArrayList<Content>(0);
287                         }
288 
289                         addText(doDiffs, newText, xmlContent);
290 
291                         if (doDiffs) {
292                             String thisText = newText.toString();
293                             if (unaccenter != null) {
294                                 thisText = unaccenter.unaccent(thisText);
295                             }
296 
297                             if (i > 0 && showDiffs[i - 1]) {
298                                 List<Difference> diffs = new Diff(firstText, thisText, false).compare();
299                                 DiffCleanup.cleanupSemantic(diffs);
300                                 cell.addContent(OSISUtil.diffToOsis(diffs));
301 
302                                 // Since we used that cell create another
303                                 cell = OSISUtil.factory().createCell();
304                                 lang = book.getLanguage();
305                                 cell.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang.getCode(), Namespace.XML_NAMESPACE);
306                                 row.addContent(cell);
307                             }
308                             if (i == 0) {
309                                 firstText = thisText;
310                             }
311                         }
312 
313                         //TODO(CJB): wrong location - we should record the keys in a set and notify
314                         //when there is a problem
315                         //this should be outside of the loop?
316                         addContentSafely(cell, xmlContent);
317                         cellCount++;
318                     }
319                 }
320 
321                 if (cellCount == 0) {
322                     break;
323                 }
324 
325                 table.addContent(row);
326                 rowCount++;
327             }
328             if (rowCount > 0) {
329                 div.addContent(table);
330             }
331         }
332 
333         return div;
334     }
335 
336     /**
337      * JDOM will throw an exception if we try and add the content to multiple parents.
338      * As a result, we take the opportunity to add it safely, and add a note indicating
339      * this content appears twice.
340      *
341      * @param cell the element to be added
342      * @param xmlContent the collector of content
343      */
344     private void addContentSafely(final Element cell, final List<Content> xmlContent) {
345         Element note = null;
346         for (Content c : xmlContent) {
347             if (c.getParent() == null) {
348                 cell.addContent(c);
349             } else if (note != null) {
350                 note.addContent(c.clone());
351             } else {
352                 //we're in the situation where we have added this already.
353                 //add note. In this case, we wrap the content that has already been applied.
354                 note = appendVersificationNotice(cell, "duplicate");
355                 note.addContent(c.clone());
356             }
357         }
358     }
359 
360     /**
361      * Creates a notice element.
362      *
363      * @param parent the parent to which the notice is added
364      * @param notice the notice fragment to be applied to the sub-type
365      * @return the new element
366      */
367     private Element appendVersificationNotice(Element parent, final String notice) {
368         Element note = OSISUtil.factory().createDiv();
369         note.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.GENERATED_CONTENT);
370         note.setAttribute(OSISUtil.OSIS_ATTR_SUBTYPE, OSISUtil.TYPE_X_PREFIX + notice);
371         parent.addContent(note);
372         return note;
373     }
374 
375     /**
376      * @param i the current position in the array of books
377      * @return the versification of the book.
378      */
379     private Versification getVersification(final int i) {
380         return Versifications.instance().getVersification(
381                 books[i].getBookMetaData().getProperty(BookMetaData.KEY_VERSIFICATION));
382     }
383 
384 
385     /**
386      * We iterate through the content, making sure we key together those bits that belong together.
387      * And separating out each verse.
388      *
389      * @param v11n the versification for the content
390      * @param iter the iterator of OSIS content
391      * @return the verse content for the book
392      * @throws BookException if there is any problem with this request
393      */
394     private BookVerseContent keyIteratorContentByVerse(Versification v11n, final Iterator<Content> iter) throws BookException {
395         BookVerseContent contentsByOsisID = new BookVerseContent();
396 
397         //we will be using this map later to track whi ch keys have been catered for in the order calculation
398         Verse currentVerse = null;
399         Content content;
400 
401         List<Content> contents = new ArrayList<Content>();
402         while (iter.hasNext()) {
403             content = iter.next();
404             if (content instanceof Element && OSISUtil.OSIS_ELEMENT_VERSE.equals(((Element) content).getName())) {
405                 if (currentVerse != null) {
406                     contentsByOsisID.put(currentVerse, contents);
407                     contents = new ArrayList<Content>();
408                 }
409 
410                 currentVerse = OSISUtil.getVerse(v11n, (Element) content);
411 
412                 //if we still have stuff in here, then let's assign it to the previous verse (i.e.
413                 //we might have come across content that legitimately sits in verse 0 for example).
414                 //of perhaps we've somehow come across previous content. Either way, it clearly doesn't
415                 //belong to the current verse.
416                 if (contents.size() > 0) {
417                     Verse previousVerse = new Verse(currentVerse.getVersification(), currentVerse.getOrdinal() - 1);
418                     contentsByOsisID.put(previousVerse, contents);
419                     contents = new ArrayList<Content>();
420                 }
421             }
422 
423             contents.add(content);
424         }
425 
426         //now append what's left into the last verse
427         if (currentVerse != null) {
428             contentsByOsisID.put(currentVerse, contents);
429         }
430 
431         return contentsByOsisID;
432     }
433 
434     /**
435      * @param row           our current OSIS row
436      * @param showDiffs     the array of states as to whether we are showing diffs for this column
437      * @param i             our current place in the state
438      * @param ommittedVerse true to indicate this column will be ommiting a verse
439      * @return true if we are doing diffs
440      */
441     private boolean addHeaderAndSetShowDiffsState(final Element row, final boolean[] showDiffs, final int i, final boolean ommittedVerse) {
442         boolean doDiffs = false;
443         Book book = books[i];
444         Element cell = OSISUtil.factory().createHeaderCell();
445 
446         if (i > 0) {
447             Book firstBook = books[0];
448             BookCategory category = book.getBookCategory();
449 
450             BookCategory prevCategory = firstBook.getBookCategory();
451             String prevName = firstBook.getInitials();
452             showDiffs[i - 1] = comparingBooks && BookCategory.BIBLE.equals(category) && category.equals(prevCategory)
453                     && book.getLanguage().equals(firstBook.getLanguage()) && !book.getInitials().equals(prevName);
454 
455             if (showDiffs[i - 1]) {
456                 doDiffs = true;
457                 StringBuilder buf = new StringBuilder(firstBook.getInitials());
458                 buf.append(" ==> ");
459                 buf.append(book.getInitials());
460 
461                 cell.addContent(OSISUtil.factory().createText(buf.toString()));
462                 row.addContent(cell);
463                 cell = OSISUtil.factory().createHeaderCell();
464             }
465         }
466 
467         final Text text = OSISUtil.factory().createText(book.getInitials());
468         if (ommittedVerse) {
469             Element notice = this.appendVersificationNotice(cell, "omitted-verses");
470             notice.addContent(text);
471         } else {
472             cell.addContent(text);
473         }
474         row.addContent(cell);
475         return doDiffs;
476     }
477 
478     /**
479      * Loops around contents and calls addText for a single element
480      *
481      * @param doDiffs  true for calculating differences
482      * @param newText  the newText buffer used to compare one portion of text to another
483      * @param contents the contents to be added
484      */
485     private void addText(boolean doDiffs, StringBuilder newText, List<Content> contents) {
486         for (Content c : contents) {
487             addText(doDiffs, newText, c);
488         }
489     }
490 
491     /**
492      * Adds the text to the diff buffer
493      *
494      * @param doDiffs true for calculating differences
495      * @param newText the newText buffer used to compare one portion of text to another
496      * @param content the content element to be added
497      */
498     private void addText(boolean doDiffs, StringBuilder newText, Content content) {
499         if (doDiffs) {
500             // if we already have content, let's add a space to avoid chaining words together
501             if (newText.length() != 0) {
502                 newText.append(' ');
503             }
504 
505             if (content instanceof Element) {
506                 newText.append(OSISUtil.getCanonicalText((Element) content));
507             } else if (content instanceof Text) {
508                 newText.append(((Text) content).getText());
509             }
510         }
511     }
512 
513     /**
514      * @param unaccenter the unaccenter to set
515      */
516     public void setUnaccenter(UnAccenter unaccenter) {
517         this.unaccenter = unaccenter;
518     }
519 
520     /**
521      * A temporary holder for a map that links each verse ID to its set of OSIS elements.
522      * Used purely to avoid having too many generic/array notations entangled in the code
523      */
524     class BookVerseContent extends TreeMap<Verse, List<Content>> {
525         /**
526          * Serialization ID
527          */
528         private static final long serialVersionUID = -6508118172314227362L;
529     }
530 
531     /**
532      * What key was used to create this data
533      */
534     private Key key;
535 
536     /**
537      * The books to which the key should be applied.
538      */
539     private Book[] books;
540 
541     /**
542      * Whether the Books should be compared.
543      */
544     private boolean comparingBooks;
545 
546     /**
547      * The complete OSIS container for the element
548      */
549     private Element osis;
550 
551     /**
552      * Just the element
553      */
554     private Element fragment;
555 
556     private UnAccenter unaccenter;
557 }
558