Coverage Report - org.crosswire.jsword.book.OSISUtil
 
Classes in this File Line Coverage Branch Coverage Complexity
OSISUtil
0%
0/278
0%
0/180
2.868
OSISUtil$OSISFactory
0%
0/37
N/A
2.868
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2005 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.jsword.book;
 21  
 
 22  
 import java.util.ArrayList;
 23  
 import java.util.Arrays;
 24  
 import java.util.Collection;
 25  
 import java.util.HashSet;
 26  
 import java.util.Iterator;
 27  
 import java.util.List;
 28  
 import java.util.Set;
 29  
 import java.util.Stack;
 30  
 import java.util.regex.Matcher;
 31  
 import java.util.regex.Pattern;
 32  
 
 33  
 import org.crosswire.common.diff.Difference;
 34  
 import org.crosswire.common.diff.EditType;
 35  
 import org.crosswire.jsword.JSOtherMsg;
 36  
 import org.crosswire.jsword.passage.Key;
 37  
 import org.crosswire.jsword.passage.NoSuchKeyException;
 38  
 import org.crosswire.jsword.passage.NoSuchVerseException;
 39  
 import org.crosswire.jsword.passage.PassageKeyFactory;
 40  
 import org.crosswire.jsword.passage.Verse;
 41  
 import org.crosswire.jsword.passage.VerseFactory;
 42  
 import org.crosswire.jsword.versification.Versification;
 43  
 import org.jdom2.Content;
 44  
 import org.jdom2.Element;
 45  
 import org.jdom2.Parent;
 46  
 import org.jdom2.Text;
 47  
 import org.slf4j.Logger;
 48  
 import org.slf4j.LoggerFactory;
 49  
 
 50  
 /**
 51  
  * Some simple utilities to help working with OSIS classes.
 52  
  * 
 53  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 54  
  * @author Joe Walker
 55  
  */
 56  
 public final class OSISUtil {
 57  
     private static final char SPACE_SEPARATOR = ' ';
 58  
     private static final char MORPH_INFO_SEPARATOR = '@';
 59  
 
 60  
     /**
 61  
      * The following are values for the type attribute on the hi element.
 62  
      */
 63  
     /**
 64  
      * Constant for acrostic highlighting
 65  
      */
 66  
     public static final String HI_ACROSTIC = "acrostic";
 67  
 
 68  
     /**
 69  
      * Constant for rendering bold text
 70  
      */
 71  
     public static final String HI_BOLD = "bold";
 72  
 
 73  
     /**
 74  
      * Constant for rendering emphatic text
 75  
      */
 76  
     public static final String HI_EMPHASIS = "emphasis";
 77  
 
 78  
     /**
 79  
      * Constant for rendering illuminated text.
 80  
      */
 81  
     public static final String HI_ILLUMINATED = "illuminated";
 82  
 
 83  
     /**
 84  
      * Constant for rendering italic text.
 85  
      */
 86  
     public static final String HI_ITALIC = "italic";
 87  
 
 88  
     /**
 89  
      * Constant for rendering strike-through text
 90  
      */
 91  
     public static final String HI_LINETHROUGH = "line-through";
 92  
 
 93  
     /**
 94  
      * Constant for rendering normal text.
 95  
      */
 96  
     public static final String HI_NORMAL = "normal";
 97  
 
 98  
     /**
 99  
      * Constant for rendering small caps
 100  
      */
 101  
     public static final String HI_SMALL_CAPS = "small-caps";
 102  
 
 103  
     /**
 104  
      * Constant for rendering subscripts
 105  
      */
 106  
     public static final String HI_SUB = "sub";
 107  
 
 108  
     /**
 109  
      * Constant for rendering superscripts
 110  
      */
 111  
     public static final String HI_SUPER = "super";
 112  
 
 113  
     /**
 114  
      * Constant for rendering underlined text
 115  
      */
 116  
     public static final String HI_UNDERLINE = "underline";
 117  
 
 118  
     /**
 119  
      * Constant for rendering upper case text
 120  
      */
 121  
     public static final String HI_X_CAPS = "x-caps";
 122  
 
 123  
     /**
 124  
      * Constant for rendering big text
 125  
      */
 126  
     public static final String HI_X_BIG = "x-big";
 127  
 
 128  
     /**
 129  
      * Constant for rendering small text
 130  
      */
 131  
     public static final String HI_X_SMALL = "x-small";
 132  
 
 133  
     /**
 134  
      * Constant for rendering tt text
 135  
      */
 136  
     public static final String HI_X_TT = "x-tt";
 137  
 
 138  
     /**
 139  
      * Constant to help narrow down what we use seg for. In this case the
 140  
      * justify right tag
 141  
      */
 142  
     public static final String SEG_JUSTIFYRIGHT = "text-align: right;";
 143  
 
 144  
     /**
 145  
      * Constant to help narrow down what we use seg for. In this case the
 146  
      * justify right tag
 147  
      */
 148  
     public static final String SEG_JUSTIFYLEFT = "text-align: left;";
 149  
 
 150  
     /**
 151  
      * Constant to help narrow down what we use seg for. In this case the thml
 152  
      * center tag
 153  
      */
 154  
     public static final String SEG_CENTER = "text-align: center;";
 155  
 
 156  
     /**
 157  
      * Constant to help narrow down what we use div for. In this case the thml
 158  
      * pre tag
 159  
      */
 160  
     public static final String DIV_PRE = "x-pre";
 161  
 
 162  
     /**
 163  
      * Constant to help narrow down what we use seg for. In this case the color
 164  
      * tag
 165  
      */
 166  
     public static final String SEG_COLORPREFIX = "color: ";
 167  
 
 168  
     /**
 169  
      * Constant to help narrow down what we use seg for. In this case the
 170  
      * font-size tag
 171  
      */
 172  
     public static final String SEG_SIZEPREFIX = "font-size: ";
 173  
 
 174  
     /**
 175  
      * Constant for x- types
 176  
      */
 177  
     public static final String TYPE_X_PREFIX = "x-";
 178  
 
 179  
     /**
 180  
      * Constant for the study note type
 181  
      */
 182  
     public static final String NOTETYPE_STUDY = "x-StudyNote";
 183  
 
 184  
     /**
 185  
      * Constant for the cross reference note type
 186  
      */
 187  
     public static final String NOTETYPE_REFERENCE = "crossReference";
 188  
 
 189  
     /**
 190  
      * Constant for the variant type segment
 191  
      */
 192  
     public static final String VARIANT_TYPE = "x-variant";
 193  
     public static final String VARIANT_CLASS = "x-";
 194  
 
 195  
     /**
 196  
      * Constant for JSword generated content. Used for type or subType.
 197  
      */
 198  
     public static final String GENERATED_CONTENT = "x-gen";
 199  
 
 200  
     /**
 201  
      * Constant for the pos (part of speech) type.
 202  
      */
 203  
     public static final String POS_TYPE = "x-pos";
 204  
 
 205  
     /**
 206  
      * Constant for the def (dictionary definition) type
 207  
      */
 208  
     public static final String DEF_TYPE = "x-def";
 209  
 
 210  
     /**
 211  
      * Constant for a Strong's numbering lemma
 212  
      */
 213  
     public static final String LEMMA_STRONGS = "strong:";
 214  
     public static final String LEMMA_MISC = "lemma:";
 215  
     public static final String MORPH_ROBINSONS = "robinson:";
 216  
 
 217  
     /**
 218  
      * Constant for Strong's numbering morphology
 219  
      */
 220  
     public static final String MORPH_STRONGS = "x-StrongsMorph:T";
 221  
 
 222  
     /**
 223  
      * Constant to help narrow down what we use "q" for. In this case:
 224  
      * blockquote
 225  
      */
 226  
     public static final String Q_BLOCK = "blockquote";
 227  
 
 228  
     /**
 229  
      * Constant to help narrow down what we use "q" for. In this case: citation
 230  
      */
 231  
     public static final String Q_CITATION = "citation";
 232  
 
 233  
     /**
 234  
      * Constant to help narrow down what we use "q" for. In this case: embedded
 235  
      */
 236  
     public static final String Q_EMBEDDED = "embedded";
 237  
 
 238  
     /**
 239  
      * Constant to help narrow down what we use "list" for.
 240  
      */
 241  
     public static final String LIST_ORDERED = "x-ordered";
 242  
     public static final String LIST_UNORDERED = "x-unordered";
 243  
 
 244  
     /**
 245  
      * Table roles (on table, row and cell elements) can be "data", the default,
 246  
      * or label.
 247  
      */
 248  
     public static final String TABLE_ROLE_LABEL = "label";
 249  
 
 250  
     /**
 251  
      * Possible cell alignments
 252  
      */
 253  
     public static final String CELL_ALIGN_LEFT = "left";
 254  
     public static final String CELL_ALIGN_RIGHT = "right";
 255  
     public static final String CELL_ALIGN_CENTER = "center";
 256  
     public static final String CELL_ALIGN_JUSTIFY = "justify";
 257  
     public static final String CELL_ALIGN_START = "start";
 258  
     public static final String CELL_ALIGN_END = "end";
 259  
 
 260  
     public static final String OSIS_ELEMENT_ABBR = "abbr";
 261  
     public static final String OSIS_ELEMENT_TITLE = "title";
 262  
     public static final String OSIS_ELEMENT_TABLE = "table";
 263  
     public static final String OSIS_ELEMENT_SPEECH = "speech";
 264  
     public static final String OSIS_ELEMENT_SPEAKER = "speaker";
 265  
     public static final String OSIS_ELEMENT_ROW = "row";
 266  
     public static final String OSIS_ELEMENT_REFERENCE = "reference";
 267  
     public static final String OSIS_ELEMENT_NOTE = "note";
 268  
     public static final String OSIS_ELEMENT_NAME = "name";
 269  
     public static final String OSIS_ELEMENT_Q = "q";
 270  
     public static final String OSIS_ELEMENT_LIST = "list";
 271  
     public static final String OSIS_ELEMENT_P = "p";
 272  
     public static final String OSIS_ELEMENT_ITEM = "item";
 273  
     public static final String OSIS_ELEMENT_FIGURE = "figure";
 274  
     public static final String OSIS_ELEMENT_FOREIGN = "foreign";
 275  
     public static final String OSIS_ELEMENT_W = "w";
 276  
     public static final String OSIS_ELEMENT_CHAPTER = "chapter";
 277  
     public static final String OSIS_ELEMENT_VERSE = "verse";
 278  
     public static final String OSIS_ELEMENT_CELL = "cell";
 279  
     public static final String OSIS_ELEMENT_DIV = "div";
 280  
     public static final String OSIS_ELEMENT_OSIS = "osis";
 281  
     public static final String OSIS_ELEMENT_WORK = "work";
 282  
     public static final String OSIS_ELEMENT_HEADER = "header";
 283  
     public static final String OSIS_ELEMENT_OSISTEXT = "osisText";
 284  
     public static final String OSIS_ELEMENT_SEG = "seg";
 285  
     public static final String OSIS_ELEMENT_LG = "lg";
 286  
     public static final String OSIS_ELEMENT_L = "l";
 287  
     public static final String OSIS_ELEMENT_LB = "lb";
 288  
     public static final String OSIS_ELEMENT_HI = "hi";
 289  
 
 290  
     public static final String ATTRIBUTE_TEXT_OSISIDWORK = "osisIDWork";
 291  
     public static final String ATTRIBUTE_WORK_OSISWORK = "osisWork";
 292  
     public static final String OSIS_ATTR_OSISID = "osisID";
 293  
     public static final String OSIS_ATTR_SID = "sID";
 294  
     public static final String OSIS_ATTR_EID = "eID";
 295  
     public static final String ATTRIBUTE_W_LEMMA = "lemma";
 296  
     public static final String ATTRIBUTE_FIGURE_SRC = "src";
 297  
     public static final String ATTRIBUTE_TABLE_BORDER = "border";
 298  
     public static final String ATTRIBUTE_TABLE_ROLE = "role";
 299  
     public static final String ATTRIBUTE_CELL_ALIGN = "align";
 300  
     public static final String ATTRIBUTE_CELL_ROWS = "rows";
 301  
     public static final String ATTRIBUTE_CELL_COLS = "cols";
 302  
     public static final String OSIS_ATTR_TYPE = "type";
 303  
     public static final String OSIS_ATTR_CANONICAL = "canonical";
 304  
     public static final String OSIS_ATTR_SUBTYPE = "subType";
 305  
     public static final String OSIS_ATTR_REF = "osisRef";
 306  
     public static final String OSIS_ATTR_LEVEL = "level";
 307  
     public static final String ATTRIBUTE_SPEAKER_WHO = "who";
 308  
     public static final String ATTRIBUTE_Q_WHO = "who";
 309  
     public static final String ATTRIBUTE_W_MORPH = "morph";
 310  
     public static final String ATTRIBUTE_OSISTEXT_OSISIDWORK = "osisIDWork";
 311  
     // OSIS defines the lang attribute as the one from the xml namespace
 312  
     // Typical usage element.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang,
 313  
     // Namespace.XML_NAMESPACE);
 314  
     public static final String OSIS_ATTR_LANG = "lang";
 315  
     public static final String ATTRIBUTE_DIV_BOOK = "book";
 316  
 
 317  
     /**
 318  
      * Prefix for OSIS IDs that refer to Bibles
 319  
      */
 320  
     private static final String OSISID_PREFIX_BIBLE = "Bible.";
 321  
 
 322  0
     private static final Set<String> EXTRA_BIBLICAL_ELEMENTS = new HashSet<String>(Arrays.asList(new String[] {
 323  
             OSIS_ELEMENT_NOTE, OSIS_ELEMENT_TITLE, OSIS_ELEMENT_REFERENCE
 324  
     }));
 325  
 
 326  
     /**
 327  
      * The log stream
 328  
      */
 329  0
     private static final Logger log = LoggerFactory.getLogger(OSISUtil.class);
 330  
 
 331  
 
 332  
     /**
 333  
      * Prevent instantiation
 334  
      */
 335  0
     private OSISUtil() {
 336  0
     }
 337  
 
 338  0
     private static OSISFactory factory = new OSISFactory();
 339  
 
 340  
     /**
 341  
      * An accessor for the OSISFactory that creates OSIS objects
 342  
      * 
 343  
      * @return the singleton OSISFactory
 344  
      */
 345  
     public static OSISFactory factory() {
 346  0
         return factory;
 347  
     }
 348  
 
 349  
     /**
 350  
      * A generic way of creating empty Elements of various types
 351  
      */
 352  0
     public static class OSISFactory {
 353  
         /**
 354  
         * @return an abbr element
 355  
         */
 356  
         public Element createAbbr() {
 357  0
             return new Element(OSIS_ELEMENT_ABBR);
 358  
         }
 359  
 
 360  
         /**
 361  
          * @return a seg element
 362  
          */
 363  
         public Element createSeg() {
 364  0
             return new Element(OSIS_ELEMENT_SEG);
 365  
         }
 366  
 
 367  
         /**
 368  
          * @return an osisText element
 369  
          */
 370  
         public Element createOsisText() {
 371  0
             return new Element(OSIS_ELEMENT_OSISTEXT);
 372  
         }
 373  
 
 374  
         /**
 375  
          * @return a header element
 376  
          */
 377  
         public Element createHeader() {
 378  0
             return new Element(OSIS_ELEMENT_HEADER);
 379  
         }
 380  
 
 381  
         /**
 382  
          * @return a work element
 383  
          */
 384  
         public Element createWork() {
 385  0
             return new Element(OSIS_ELEMENT_WORK);
 386  
         }
 387  
 
 388  
         /**
 389  
          * @return an osis element
 390  
          */
 391  
         public Element createOsis() {
 392  0
             return new Element(OSIS_ELEMENT_OSIS);
 393  
         }
 394  
 
 395  
         /**
 396  
          * @return a div element
 397  
          */
 398  
         public Element createDiv() {
 399  0
             return new Element(OSIS_ELEMENT_DIV);
 400  
         }
 401  
 
 402  
         /**
 403  
          * @return a cell element
 404  
          */
 405  
         public Element createCell() {
 406  0
             return new Element(OSIS_ELEMENT_CELL);
 407  
         }
 408  
 
 409  
         /**
 410  
          * @return a header cell element (akin to HTML's TH)
 411  
          */
 412  
         public Element createHeaderCell() {
 413  0
             Element ele = new Element(OSIS_ELEMENT_CELL);
 414  0
             ele.setAttribute(ATTRIBUTE_TABLE_ROLE, TABLE_ROLE_LABEL);
 415  0
             ele.setAttribute(ATTRIBUTE_CELL_ALIGN, CELL_ALIGN_CENTER);
 416  0
             return ele;
 417  
         }
 418  
 
 419  
         /**
 420  
          * @return a verse element
 421  
          */
 422  
         public Element createVerse() {
 423  0
             return new Element(OSIS_ELEMENT_VERSE);
 424  
         }
 425  
 
 426  
         /**
 427  
          * @return a w element
 428  
          */
 429  
         public Element createW() {
 430  0
             return new Element(OSIS_ELEMENT_W);
 431  
         }
 432  
 
 433  
         /**
 434  
          * @return a figure element
 435  
          */
 436  
         public Element createFigure() {
 437  0
             return new Element(OSIS_ELEMENT_FIGURE);
 438  
         }
 439  
 
 440  
         /**
 441  
          * @return a foreign element
 442  
          */
 443  
         public Element createForeign() {
 444  0
             return new Element(OSIS_ELEMENT_FOREIGN);
 445  
         }
 446  
 
 447  
         /**
 448  
          * @return an item element
 449  
          */
 450  
         public Element createItem() {
 451  0
             return new Element(OSIS_ELEMENT_ITEM);
 452  
         }
 453  
 
 454  
         /**
 455  
          * @return a p element
 456  
          */
 457  
         public Element createP() {
 458  0
             return new Element(OSIS_ELEMENT_P);
 459  
         }
 460  
 
 461  
         /**
 462  
          * @return a list element
 463  
          */
 464  
         public Element createList() {
 465  0
             return new Element(OSIS_ELEMENT_LIST);
 466  
         }
 467  
 
 468  
         /**
 469  
          * @return a q element
 470  
          */
 471  
         public Element createQ() {
 472  0
             return new Element(OSIS_ELEMENT_Q);
 473  
         }
 474  
 
 475  
         /**
 476  
          * @return a name element
 477  
          */
 478  
         public Element createName() {
 479  0
             return new Element(OSIS_ELEMENT_NAME);
 480  
         }
 481  
 
 482  
         /**
 483  
          * @return a note element
 484  
          */
 485  
         public Element createNote() {
 486  0
             return new Element(OSIS_ELEMENT_NOTE);
 487  
         }
 488  
 
 489  
         /**
 490  
          * @return a reference element
 491  
          */
 492  
         public Element createReference() {
 493  0
             return new Element(OSIS_ELEMENT_REFERENCE);
 494  
         }
 495  
 
 496  
         /**
 497  
          * @return a row element
 498  
          */
 499  
         public Element createRow() {
 500  0
             return new Element(OSIS_ELEMENT_ROW);
 501  
         }
 502  
 
 503  
         /**
 504  
          * @return a speaker element
 505  
          */
 506  
         public Element createSpeaker() {
 507  0
             return new Element(OSIS_ELEMENT_SPEAKER);
 508  
         }
 509  
 
 510  
         /**
 511  
          * @return a speech element
 512  
          */
 513  
         public Element createSpeech() {
 514  0
             return new Element(OSIS_ELEMENT_SPEECH);
 515  
         }
 516  
 
 517  
         /**
 518  
          * @return a table element
 519  
          */
 520  
         public Element createTable() {
 521  0
             return new Element(OSIS_ELEMENT_TABLE);
 522  
         }
 523  
 
 524  
        /**
 525  
         * @return a title element
 526  
         */
 527  
        public Element createTitle() {
 528  0
            return new Element(OSIS_ELEMENT_TITLE);
 529  
        }
 530  
 
 531  
         /**
 532  
          * Create a title marked as generated.
 533  
          * 
 534  
          * @return a generated title element
 535  
          */
 536  
         public Element createGeneratedTitle() {
 537  0
             Element title = new Element(OSIS_ELEMENT_TITLE);
 538  0
             title.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.GENERATED_CONTENT);
 539  0
             return title;
 540  
         }
 541  
 
 542  
         /**
 543  
          * Line Group
 544  
          * 
 545  
          * @return a lg element
 546  
          */
 547  
         public Element createLG() {
 548  0
             return new Element(OSIS_ELEMENT_LG);
 549  
         }
 550  
 
 551  
         /**
 552  
          * Line
 553  
          * 
 554  
          * @return a l element
 555  
          */
 556  
         public Element createL() {
 557  0
             return new Element(OSIS_ELEMENT_L);
 558  
         }
 559  
 
 560  
         /**
 561  
          * Line Break
 562  
          * 
 563  
          * @return a lb element
 564  
          */
 565  
         public Element createLB() {
 566  0
             return new Element(OSIS_ELEMENT_LB);
 567  
         }
 568  
 
 569  
         /**
 570  
          * Highlight
 571  
          * 
 572  
          * @return a hi element
 573  
          */
 574  
         public Element createHI() {
 575  0
             return new Element(OSIS_ELEMENT_HI);
 576  
         }
 577  
 
 578  
         /**
 579  
          * Text
 580  
          * 
 581  
          * @param text the text for this element
 582  
          * @return a text element
 583  
          */
 584  
         public Text createText(String text) {
 585  0
             return new Text(text);
 586  
         }
 587  
     }
 588  
 
 589  
     /**
 590  
      * Dig past the osis and osisText element, if present, to get the meaningful
 591  
      * content of the document.
 592  
      * 
 593  
      * @param root the element from which to get a fragment
 594  
      * @return a fragment
 595  
      */
 596  
     public static List<Content> getFragment(Element root) {
 597  0
         if (root != null) {
 598  0
             Element content = root;
 599  0
             if (OSISUtil.OSIS_ELEMENT_OSIS.equals(root.getName())) {
 600  0
                 content = root.getChild(OSISUtil.OSIS_ELEMENT_OSISTEXT);
 601  
             }
 602  
 
 603  0
             if (OSISUtil.OSIS_ELEMENT_OSISTEXT.equals(root.getName())) {
 604  0
                 content = root.getChild(OSISUtil.OSIS_ELEMENT_DIV);
 605  
             }
 606  
 
 607  
             // At this point we are at something interesting, possibly null.
 608  
             // If this was a semantically valid OSIS document then it is a div.
 609  
             // As long as this node has one child dig deeper.
 610  0
             if (content != null && content.getContentSize() == 1) {
 611  0
                 Content firstChild = content.getContent(0);
 612  0
                 if (firstChild instanceof Element && OSISUtil.OSIS_ELEMENT_DIV.equals(((Element) firstChild).getName())) {
 613  0
                     content = (Element) firstChild;
 614  
                 }
 615  
             }
 616  
 
 617  0
             if (content != null) {
 618  0
                 return content.getContent();
 619  
             }
 620  
         }
 621  0
         return new ArrayList<Content>();
 622  
     }
 623  
 
 624  
     /**
 625  
      * Get the canonical text from an osis document consisting of a single
 626  
      * fragment. The document is assumed to be valid OSIS2.0 XML. While xml
 627  
      * valid is rigidly defined as meaning that an xml parser can validate the
 628  
      * document, it does not mean that the document is valid OSIS. This is a
 629  
      * semantic problem that is not validated. This method assumes that the root
 630  
      * element is also semantically valid.
 631  
      * 
 632  
      * <p>
 633  
      * This means that the top level element's tagname is osis. This can contain
 634  
      * either a osisText or an osisCorpus. If it is an osisCorpus, then it
 635  
      * contains an osisText. However, as a simplification, since JSword
 636  
      * constructs the whole doc for the fragment, osisCorpus can be ignored.
 637  
      * <p>
 638  
      * The osisText element contains a div element that is either a container or
 639  
      * a milestone. Again, JSword is providing the div element and it will be
 640  
      * provided as a container. It is this div that "contains" the actual
 641  
      * fragment.
 642  
      * </p>
 643  
      * <p>
 644  
      * A verse element may either be a container or a milestone. Sword OSIS
 645  
      * books differ in whether they provide the verse element. Most do not. The
 646  
      * few that do are using the container model, but it has been proposed that
 647  
      * milestones are the best practice.
 648  
      * </p>
 649  
      * 
 650  
      * <p>
 651  
      * The fragment may contain elements that are not a part of the original
 652  
      * text. These are things such as notes.
 653  
      * </p>
 654  
      * 
 655  
      * <p>
 656  
      * Milestones require special handling. Beginning milestones elements have
 657  
      * an sID attribute, while ending milestones have an eID with the same value
 658  
      * as the opening. So everything between the start and the corresponding end
 659  
      * is the content of the element. Also, for a given element, say div, they
 660  
      * have to be properly nested as if they were container elements.
 661  
      * </p>
 662  
      * 
 663  
      * @param root
 664  
      *            the whole osis document.
 665  
      * @return The canonical text without markup
 666  
      */
 667  
     public static String getCanonicalText(Element root) {
 668  
         // if someone passes a root element which has text in, we need to check whether it's worth processing.
 669  
         // For example. where you have a non-canonical title being passed in, we deal with this here.
 670  0
         if (!isCanonical(root)) {
 671  
             //no point in continuing...
 672  0
             return "";
 673  
         }
 674  
 
 675  0
         StringBuilder buffer = new StringBuilder();
 676  
 
 677  
         // Dig past osis, osisText, if present, to get to the real content.
 678  0
         List<Content> frag = OSISUtil.getFragment(root);
 679  
 
 680  0
         Iterator<Content> dit = frag.iterator();
 681  0
         String sID = null;
 682  0
         Content data = null;
 683  0
         Element ele = null;
 684  0
         while (dit.hasNext()) {
 685  0
             data = dit.next();
 686  0
             if (data instanceof Element) {
 687  0
                 ele = (Element) data;
 688  0
                 if (!isCanonical(ele)) {
 689  0
                     continue;
 690  
                 }
 691  
 
 692  0
                 if (ele.getName().equals(OSISUtil.OSIS_ELEMENT_VERSE)) {
 693  0
                     sID = ele.getAttributeValue(OSISUtil.OSIS_ATTR_SID);
 694  
                 }
 695  
 
 696  0
                 if (sID != null) {
 697  0
                     getCanonicalContent(ele, sID, dit, buffer);
 698  
                 } else {
 699  0
                     getCanonicalContent(ele, null, ele.getContent().iterator(), buffer);
 700  
                 }
 701  0
             } else if (data instanceof Text) {
 702  
                 // make sure that adjacent text elements are separated by
 703  
                 // whitespace
 704  
                 // TODO(dms): verify that the xml parser does not split words
 705  
                 // containing entities.
 706  0
                 int lastIndex = buffer.length() - 1;
 707  0
                 String text = ((Text) data).getText();
 708  
                 // Ignore empty text nodes and do not add 
 709  0
                 if (text.length() != 0) {
 710  
                     //do not add spaces when within a OSIS seg
 711  0
                     if (lastIndex >= 0 && !Character.isWhitespace(buffer.charAt(lastIndex)) && !Character.isWhitespace(text.charAt(0))) {
 712  0
                         buffer.append(' ');
 713  
                     }
 714  0
                     buffer.append(text);
 715  
                 }
 716  0
             }
 717  
         }
 718  
 
 719  0
         return buffer.toString().trim();
 720  
     }
 721  
 
 722  
     /**
 723  
      * A simplified plain text version of the data in this Element with all the
 724  
      * markup stripped out.
 725  
      * 
 726  
      * @param root
 727  
      *            the whole osis document.
 728  
      * @return The Bible text without markup
 729  
      */
 730  
     public static String getPlainText(Element root) {
 731  
         // Dig past osis, osisText, if present, to get to the real content.
 732  0
         return getTextContent(OSISUtil.getFragment(root));
 733  
     }
 734  
 
 735  
     /**
 736  
      * A space separate string containing Strong's numbers.
 737  
      * 
 738  
      * @param root
 739  
      *            the whole osis document.
 740  
      * @return The Strong's numbers in the text
 741  
      */
 742  
     public static String getStrongsNumbers(Element root) {
 743  0
         return getLexicalInformation(root, false);
 744  
     }
 745  
 
 746  
     /**
 747  
      * A '@' separated list of morphologies and strong numbers
 748  
      * 
 749  
      * @param root the osis element in question
 750  
      * @return the string
 751  
      */
 752  
     public static String getMorphologiesWithStrong(Element root) {
 753  0
         return getLexicalInformation(root, true);
 754  
     }
 755  
 
 756  
     /**
 757  
      * concatenates strong and morphology information together
 758  
      * 
 759  
      * @param root the osis element in question
 760  
      * @param includeMorphology whether to include morphology
 761  
      * @return root of the element
 762  
      */
 763  
     public static String getLexicalInformation(Element root, boolean includeMorphology) {
 764  0
         StringBuilder buffer = new StringBuilder();
 765  
 
 766  0
         for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_W)) {
 767  0
             Element ele = (Element) content;
 768  0
             String attr = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_LEMMA);
 769  0
             if (attr != null) {
 770  0
                 Matcher matcher = strongsNumberPattern.matcher(attr);
 771  0
                 while (matcher.find()) {
 772  0
                     String strongsNum = matcher.group(1);
 773  0
                     if (buffer.length() > 0) {
 774  0
                         buffer.append(' ');
 775  
                     }
 776  
 
 777  0
                     if (includeMorphology) {
 778  
                         //if including morphology, we want 1 big field, separated with '@'
 779  0
                         strongsNum = strongsNum.replace(SPACE_SEPARATOR, MORPH_INFO_SEPARATOR);
 780  
                     }
 781  0
                     buffer.append(strongsNum);
 782  
 
 783  0
                     if (includeMorphology) {
 784  
                         //also include morphology if available
 785  0
                         String morph = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_MORPH);
 786  0
                         if (morph != null && morph.length() != 0) {
 787  0
                             buffer.append(MORPH_INFO_SEPARATOR);
 788  0
                             buffer.append(morph.replace(SPACE_SEPARATOR, MORPH_INFO_SEPARATOR));
 789  
                         }
 790  
                     }
 791  0
                 }
 792  
             }
 793  0
         }
 794  
 
 795  0
         return buffer.toString().trim();
 796  
     }
 797  
 
 798  
     /**
 799  
      * A space separate string containing osisID from the reference element.
 800  
      * We pass book and key because the xref may not be valid and it needs to be reported.
 801  
      *
 802  
      * @param book the book to which the references refer
 803  
      * @param key the verse containing the cross references
 804  
      * @param v11n the versification
 805  
      * @param root the osis element in question
 806  
      * @return The references in the text
 807  
      */
 808  
     public static String getReferences(Book book, Key key, Versification v11n, Element root) {
 809  0
         PassageKeyFactory keyf = PassageKeyFactory.instance();
 810  0
         Key collector = keyf.createEmptyKeyList(v11n);
 811  
 
 812  0
         for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_REFERENCE)) {
 813  0
             Element ele = (Element) content;
 814  0
             String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_REF);
 815  0
             if (attr != null) {
 816  
                 try {
 817  0
                     collector.addAll(keyf.getKey(v11n, attr));
 818  0
                 } catch (NoSuchKeyException e) {
 819  0
                     DataPolice.report(book, key, "Unable to parse: " + attr + " - No such reference:" + e.getMessage());
 820  0
                 }
 821  
             }
 822  0
         }
 823  
 
 824  0
         return collector.getOsisID();
 825  
     }
 826  
 
 827  
     /**
 828  
      * The text of non-reference notes.
 829  
      * 
 830  
      * @param root the whole OSIS document
 831  
      * @return The references in the text
 832  
      */
 833  
     public static String getNotes(Element root) {
 834  0
         StringBuilder buffer = new StringBuilder();
 835  
 
 836  0
         for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_NOTE)) {
 837  0
             Element ele = (Element) content;
 838  0
             String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_TYPE);
 839  0
             if (attr == null || !attr.equals(NOTETYPE_REFERENCE)) {
 840  0
                 if (buffer.length() > 0) {
 841  0
                     buffer.append(' ');
 842  
                 }
 843  0
                 buffer.append(OSISUtil.getTextContent(ele.getContent()));
 844  
             }
 845  0
         }
 846  
 
 847  0
         return buffer.toString();
 848  
     }
 849  
 
 850  
     /**
 851  
      * The text of non-reference notes.
 852  
      * 
 853  
      * @param root the whole OSIS document
 854  
      * @return The references in the text
 855  
      */
 856  
     public static String getHeadings(Element root) {
 857  0
         StringBuilder buffer = new StringBuilder();
 858  
 
 859  0
         for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_TITLE)) {
 860  0
             Element ele = (Element) content;
 861  
 
 862  0
             if (buffer.length() > 0) {
 863  0
                 buffer.append(' ');
 864  
             }
 865  0
             buffer.append(OSISUtil.getTextContent(ele.getContent()));
 866  0
         }
 867  
 
 868  0
         return buffer.toString();
 869  
     }
 870  
 
 871  
     private static void getCanonicalContent(Element parent, String sID, Iterator<Content> iter, StringBuilder buffer) {
 872  0
         if (!isCanonical(parent)) {
 873  0
             return;
 874  
         }
 875  
 
 876  0
         Content data = null;
 877  0
         Element ele = null;
 878  0
         String eleName = null;
 879  0
         String eID = null;
 880  0
         while (iter.hasNext()) {
 881  0
             data = iter.next();
 882  0
             if (data instanceof Element) {
 883  0
                 ele = (Element) data;
 884  
                 // If the milestoned element is done then quit.
 885  
                 // This should be a eID=, that matches sID, from the same
 886  
                 // element.
 887  0
                 eleName = ele.getName();
 888  0
                 eID = ele.getAttributeValue(OSISUtil.OSIS_ATTR_SID);
 889  0
                 if (eID != null && eID.equals(sID) && eleName.equals(parent.getName())) {
 890  0
                     break;
 891  
                 }
 892  0
                 OSISUtil.getCanonicalContent(ele, sID, ele.getContent().iterator(), buffer);
 893  0
             } else if (data instanceof Text) {
 894  
                 // make sure that adjacent text elements are separated by
 895  
                 // whitespace
 896  
                 // Empty elements also produce whitespace.
 897  
                 // TODO(dms): verify that the xml parser does not split words
 898  
                 // containing entities.
 899  0
                 int lastIndex = buffer.length() - 1;
 900  0
                 String text = ((Text) data).getText();
 901  0
                 if (lastIndex >= 0 && !Character.isWhitespace(buffer.charAt(lastIndex)) && (text.length() == 0 || !Character.isWhitespace(text.charAt(0)))  && !OSIS_ELEMENT_SEG.equals(parent.getName())) {
 902  0
                     buffer.append(' ');
 903  
                 }
 904  0
                 buffer.append(text);
 905  0
             }
 906  
         }
 907  0
     }
 908  
 
 909  
     private static boolean isCanonical(Content content) {
 910  0
         boolean result = true;
 911  0
         if (content instanceof Element) {
 912  0
             Element element = (Element) content;
 913  
 
 914  
             // Ignore extra-biblical text
 915  0
             if (EXTRA_BIBLICAL_ELEMENTS.contains(element.getName())) {
 916  0
                 String canonical = element.getAttributeValue(OSISUtil.OSIS_ATTR_CANONICAL);
 917  0
                 result = Boolean.valueOf(canonical).booleanValue();
 918  
             }
 919  
         }
 920  
 
 921  0
         return result;
 922  
     }
 923  
 
 924  
     private static String getTextContent(List<Content> fragment) {
 925  0
         StringBuilder buffer = new StringBuilder();
 926  
 
 927  0
         for (Content next : fragment) {
 928  0
             recurseElement(next, buffer);
 929  
         }
 930  
 
 931  0
         return buffer.toString();
 932  
     }
 933  
 
 934  
     /**
 935  
      * Find all the instances of elements of type <code>find</code> under the
 936  
      * element <code>div</code>.
 937  
      * 
 938  
      * @param div the element to trawl
 939  
      * @param name the element name to search
 940  
      * @return the collection of matching content
 941  
      */
 942  
     public static Collection<Content> getDeepContent(Element div, String name) {
 943  0
         List<Content> reply = new ArrayList<Content>();
 944  0
         recurseDeepContent(div, name, reply);
 945  0
         return reply;
 946  
     }
 947  
 
 948  
     /**
 949  
      * Walk up the tree from the W to find out what verse we are in.
 950  
      * 
 951  
      * @param v11n the versification
 952  
      * @param ele
 953  
      *            The start point for our verse hunt.
 954  
      * @return The verse we are in
 955  
      * @throws BookException 
 956  
      */
 957  
     public static Verse getVerse(Versification v11n, Element ele) throws BookException {
 958  0
         if (ele.getName().equals(OSIS_ELEMENT_VERSE)) {
 959  
             // If the element is an OSIS Verse then this is fairly easy
 960  0
             String osisid = ele.getAttributeValue(OSIS_ATTR_OSISID);
 961  
 
 962  
             try {
 963  0
                 return VerseFactory.fromString(v11n, osisid);
 964  0
             } catch (NoSuchVerseException ex) {
 965  0
                 throw new BookException(JSOtherMsg.lookupText("OsisID not valid: {0}", osisid), ex);
 966  
             }
 967  
         }
 968  
 
 969  
         // So we just walk up the tree trying to find a verse
 970  0
         Parent parent = ele.getParent();
 971  0
         if (parent instanceof Element) {
 972  0
             return getVerse(v11n, (Element) parent);
 973  
         }
 974  
 
 975  0
         throw new BookException(JSOtherMsg.lookupText("Verse element could not be found"));
 976  
     }
 977  
 
 978  
     /**
 979  
      * Helper method to create the boilerplate headers in an OSIS document from
 980  
      * the current metadata object
 981  
      * 
 982  
      * @param bmd the book's meta data
 983  
      * @return the root of an OSIS document
 984  
      */
 985  
     public static Element createOsisFramework(BookMetaData bmd) {
 986  0
         Element osis = factory().createOsis();
 987  0
         String osisid = bmd.getInitials();
 988  
 
 989  0
         Element work = factory().createWork();
 990  0
         work.setAttribute(ATTRIBUTE_WORK_OSISWORK, osisid);
 991  
 
 992  0
         Element header = factory().createHeader();
 993  0
         header.addContent(work);
 994  
 
 995  0
         Element text = factory().createOsisText();
 996  0
         text.setAttribute(ATTRIBUTE_TEXT_OSISIDWORK, OSISID_PREFIX_BIBLE + osisid);
 997  0
         text.addContent(header);
 998  
 
 999  0
         osis.addContent(text);
 1000  
 
 1001  0
         return osis;
 1002  
     }
 1003  
 
 1004  
     /**
 1005  
      * Convert a Difference list into a pretty HTML report.
 1006  
      * 
 1007  
      * @param diffs
 1008  
      *            List of Difference objects
 1009  
      * @return HTML representation
 1010  
      */
 1011  
     public static List<Content> diffToOsis(List<Difference> diffs) {
 1012  0
         Element div = factory().createDiv();
 1013  
 
 1014  0
         for (int x = 0; x < diffs.size(); x++) {
 1015  0
             Difference diff = diffs.get(x);
 1016  0
             EditType editType = diff.getEditType(); // Mode (delete, equal,
 1017  
                                                     // insert)
 1018  0
             Text text = factory.createText(diff.getText()); // Text of change.
 1019  
 
 1020  0
             if (EditType.DELETE.equals(editType)) {
 1021  0
                 Element hi = factory().createHI();
 1022  0
                 hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_LINETHROUGH);
 1023  0
                 hi.addContent(text);
 1024  0
                 div.addContent(hi);
 1025  0
             } else if (EditType.INSERT.equals(editType)) {
 1026  0
                 Element hi = factory().createHI();
 1027  0
                 hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_UNDERLINE);
 1028  0
                 hi.addContent(text);
 1029  0
                 div.addContent(hi);
 1030  0
             } else {
 1031  0
                 div.addContent(text);
 1032  
             }
 1033  
         }
 1034  0
         return div.cloneContent();
 1035  
     }
 1036  
 
 1037  
     public static List<Content> rtfToOsis(String rtf) {
 1038  0
         Element div = factory().createDiv();
 1039  0
         Stack<Content> stack = new Stack<Content>();
 1040  0
         stack.push(div);
 1041  
 
 1042  0
         int strlen = rtf.length();
 1043  
 
 1044  0
         StringBuilder text = new StringBuilder(strlen);
 1045  
 
 1046  0
         int i = 0;
 1047  0
         for (i = 0; i < strlen; i++) {
 1048  0
             char curChar = rtf.charAt(i);
 1049  0
             if (curChar != '\\') {
 1050  0
                 text.append(curChar);
 1051  0
                 continue;
 1052  
             }
 1053  
 
 1054  
             // The following are ordered from most to least common
 1055  
             // and when one is a prefix of another, it follows.
 1056  
 
 1057  
             // Used to end all open attributes. Only \qc in our implementation.
 1058  0
             if (rtf.startsWith("\\pard", i)) {
 1059  0
                 Element currentElement = (Element) stack.pop();
 1060  0
                 currentElement.addContent(text.toString());
 1061  0
                 text.delete(0, text.length());
 1062  0
                 stack.clear();
 1063  0
                 stack.push(div);
 1064  0
                 i += (i + 5 < strlen && rtf.charAt(i + 5) == ' ') ? 5 : 4;
 1065  0
                 continue;
 1066  
             }
 1067  
 
 1068  
             // Simulate a paragraph break.
 1069  0
             if (rtf.startsWith("\\par", i)) {
 1070  0
                 Element currentElement = (Element) stack.peek();
 1071  0
                 currentElement.addContent(text.toString());
 1072  0
                 text.delete(0, text.length());
 1073  0
                 currentElement.addContent(OSISUtil.factory.createLB());
 1074  0
                 i += (i + 4 < strlen && rtf.charAt(i + 4) == ' ') ? 4 : 3;
 1075  0
                 continue;
 1076  
             }
 1077  
 
 1078  
             // OSIS does not have the notion of centered text.
 1079  
             // So we define our own
 1080  0
             if (rtf.startsWith("\\qc", i)) {
 1081  0
                 Element centerDiv = OSISUtil.factory.createDiv();
 1082  0
                 centerDiv.setAttribute(OSIS_ATTR_TYPE, "x-center");
 1083  0
                 Element currentElement = (Element) stack.peek();
 1084  0
                 currentElement.addContent(text.toString());
 1085  0
                 text.delete(0, text.length());
 1086  0
                 currentElement.addContent(centerDiv);
 1087  0
                 stack.push(centerDiv);
 1088  
                 // skip following space, if any
 1089  0
                 i += (i + 3 < strlen && rtf.charAt(i + 3) == ' ') ? 3 : 2;
 1090  0
                 continue;
 1091  
             }
 1092  
 
 1093  
             // convert Unicode representations to Unicode
 1094  0
             if (rtf.startsWith("\\u", i)) {
 1095  0
                 StringBuilder buf = new StringBuilder();
 1096  0
                 i += 2;
 1097  0
                 while (i < strlen) {
 1098  0
                     char curDigit = rtf.charAt(i);
 1099  0
                     if (curDigit != '-' && !Character.isDigit(curDigit)) {
 1100  0
                         break;
 1101  
                     }
 1102  0
                     buf.append(curDigit);
 1103  0
                     i++;
 1104  0
                 }
 1105  
                 // At this point:
 1106  
                 // buf contains the numeric representation of the number, 16-bit
 1107  
                 // signed
 1108  
                 // charAt(i) is the substitution character if Unicode is not
 1109  
                 // supported
 1110  0
                 int value = Integer.parseInt(buf.toString());
 1111  0
                 if (value < 0) {
 1112  0
                     value += 65536;
 1113  
                 }
 1114  0
                 text.append((char) value);
 1115  
                 // don't advance since i is on the substitute character.
 1116  0
                 continue;
 1117  
             }
 1118  
 
 1119  
             // close italic and bold
 1120  0
             if (rtf.startsWith("\\i0", i) || rtf.startsWith("\\b0", i)) {
 1121  0
                 Element currentElement = (Element) stack.pop();
 1122  0
                 currentElement.addContent(text.toString());
 1123  0
                 text.delete(0, text.length());
 1124  0
                 i += (i + 3 < strlen && rtf.charAt(i + 3) == ' ') ? 3 : 2;
 1125  0
                 continue;
 1126  
             }
 1127  
 
 1128  
             // Skip escaped whitespace
 1129  0
             if (rtf.startsWith(" ", i) || rtf.startsWith("\n", i)) {
 1130  0
                 i += 1;
 1131  0
                 continue;
 1132  
             }
 1133  
 
 1134  
             // start italic
 1135  0
             if (rtf.startsWith("\\i", i)) {
 1136  0
                 Element hiElement = OSISUtil.factory.createHI();
 1137  0
                 hiElement.setAttribute(OSIS_ATTR_TYPE, HI_ITALIC);
 1138  0
                 Element currentElement = (Element) stack.peek();
 1139  0
                 currentElement.addContent(text.toString());
 1140  0
                 text.delete(0, text.length());
 1141  0
                 currentElement.addContent(hiElement);
 1142  0
                 stack.push(hiElement);
 1143  0
                 i += (i + 2 < strlen && rtf.charAt(i + 2) == ' ') ? 2 : 1;
 1144  0
                 continue;
 1145  
             }
 1146  
 
 1147  
             // start bold
 1148  0
             if (rtf.startsWith("\\b", i)) {
 1149  0
                 Element hiElement = OSISUtil.factory.createHI();
 1150  0
                 hiElement.setAttribute(OSIS_ATTR_TYPE, HI_BOLD);
 1151  0
                 Element currentElement = (Element) stack.peek();
 1152  0
                 currentElement.addContent(text.toString());
 1153  0
                 text.delete(0, text.length());
 1154  0
                 currentElement.addContent(hiElement);
 1155  0
                 stack.push(hiElement);
 1156  0
                 i += (i + 2 < strlen && rtf.charAt(i + 2) == ' ') ? 2 : 1;
 1157  0
                 continue;
 1158  
             }
 1159  
 
 1160  
         }
 1161  
 
 1162  
         // If there is any text that has not been consumed
 1163  0
         if (text.length() > 0) {
 1164  0
             div.addContent(text.toString());
 1165  
         }
 1166  
         // div.addContent(text.toString());
 1167  
         // // If the fragment is already in a document, then use that.
 1168  
         // Document doc = div.getDocument();
 1169  
         // if (doc == null)
 1170  
         // {
 1171  
         // doc = new Document(div);
 1172  
         // }
 1173  
         // SAXEventProvider ep = new JDOMSAXEventProvider(doc);
 1174  
         // ContentHandler osis = new
 1175  
         // PrettySerializingContentHandler(FormatType.CLASSIC_INDENT);
 1176  
         // try
 1177  
         // {
 1178  
         // ep.provideSAXEvents(osis);
 1179  
         // }
 1180  
         // catch (SAXException e)
 1181  
         // {
 1182  
         // e.printStackTrace();
 1183  
         // }
 1184  
         // System.err.println(osis.toString());
 1185  0
         return div.cloneContent();
 1186  
     }
 1187  
 
 1188  
     /**
 1189  
      * Find all the instances of elements of type <code>find</code> under the
 1190  
      * element <code>div</code>. For internal use only.
 1191  
      * 
 1192  
      * @param start the node under which searches occur
 1193  
      * @param name element name to search
 1194  
      * @param reply the list to modify with matching content
 1195  
      */
 1196  
     private static void recurseDeepContent(Element start, String name, List<Content> reply) {
 1197  0
         if (start.getName().equals(name)) {
 1198  0
             reply.add(start);
 1199  
         }
 1200  
 
 1201  
 //        Content data = null;
 1202  0
         Element ele = null;
 1203  0
         for (Content data : start.getContent()) {
 1204  0
             if (data instanceof Element) {
 1205  0
                 ele = (Element) data;
 1206  0
                 recurseDeepContent(ele, name, reply);
 1207  
             }
 1208  
         }
 1209  0
     }
 1210  
 
 1211  
     /**
 1212  
      * If we have a String just add it to the buffer, but if we have an Element
 1213  
      * then try to dig the strings out of it.
 1214  
      * 
 1215  
      * @param sub a sub element or text node
 1216  
      * @param buffer the buffer to build on match
 1217  
      */
 1218  
     private static void recurseElement(Object sub, StringBuilder buffer) {
 1219  0
         if (sub instanceof Text) {
 1220  0
             buffer.append(((Text) sub).getText());
 1221  0
         } else if (sub instanceof Element) {
 1222  0
             recurseChildren((Element) sub, buffer);
 1223  
         } else {
 1224  0
             log.error("unknown type: {}", sub.getClass().getName());
 1225  
         }
 1226  0
     }
 1227  
 
 1228  
     /**
 1229  
      * Helper to extract the Strings from a nest of JDOM elements
 1230  
      * 
 1231  
      * @param ele
 1232  
      *            The JDOM Element to dig into
 1233  
      * @param buffer
 1234  
      *            The place we accumulate strings.
 1235  
      */
 1236  
     private static void recurseChildren(Element ele, StringBuilder buffer) {
 1237  
         // ele is a JDOM Element that might have a getContent() method
 1238  0
         for (Content sub : ele.getContent()) {
 1239  0
             recurseElement(sub, buffer);
 1240  
         }
 1241  0
     }
 1242  
 
 1243  0
     private static String strongsNumber = "strong:([GgHh][0-9]+!?[A-Za-z]*)";
 1244  0
     private static Pattern strongsNumberPattern = Pattern.compile(strongsNumber);
 1245  
 }