1
21 package org.crosswire.jsword.book;
22
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collection;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Set;
30 import java.util.Stack;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
33
34 import org.crosswire.common.diff.Difference;
35 import org.crosswire.common.diff.EditType;
36 import org.crosswire.jsword.JSOtherMsg;
37 import org.crosswire.jsword.passage.Key;
38 import org.crosswire.jsword.passage.NoSuchKeyException;
39 import org.crosswire.jsword.passage.NoSuchVerseException;
40 import org.crosswire.jsword.passage.PassageKeyFactory;
41 import org.crosswire.jsword.passage.Verse;
42 import org.crosswire.jsword.passage.VerseFactory;
43 import org.crosswire.jsword.versification.Versification;
44 import org.jdom2.Content;
45 import org.jdom2.Element;
46 import org.jdom2.Parent;
47 import org.jdom2.Text;
48 import org.slf4j.Logger;
49 import org.slf4j.LoggerFactory;
50
51
58 public final class OSISUtil {
59 private static final char SPACE_SEPARATOR = ' ';
60 private static final char MORPH_INFO_SEPARATOR = '@';
61
62
65
68 public static final String HI_ACROSTIC = "acrostic";
69
70
73 public static final String HI_BOLD = "bold";
74
75
78 public static final String HI_EMPHASIS = "emphasis";
79
80
83 public static final String HI_ILLUMINATED = "illuminated";
84
85
88 public static final String HI_ITALIC = "italic";
89
90
93 public static final String HI_LINETHROUGH = "line-through";
94
95
98 public static final String HI_NORMAL = "normal";
99
100
103 public static final String HI_SMALL_CAPS = "small-caps";
104
105
108 public static final String HI_SUB = "sub";
109
110
113 public static final String HI_SUPER = "super";
114
115
118 public static final String HI_UNDERLINE = "underline";
119
120
123 public static final String HI_X_CAPS = "x-caps";
124
125
128 public static final String HI_X_BIG = "x-big";
129
130
133 public static final String HI_X_SMALL = "x-small";
134
135
138 public static final String HI_X_TT = "x-tt";
139
140
144 public static final String SEG_JUSTIFYRIGHT = "text-align: right;";
145
146
150 public static final String SEG_JUSTIFYLEFT = "text-align: left;";
151
152
156 public static final String SEG_CENTER = "text-align: center;";
157
158
162 public static final String DIV_PRE = "x-pre";
163
164
168 public static final String SEG_COLORPREFIX = "color: ";
169
170
174 public static final String SEG_SIZEPREFIX = "font-size: ";
175
176
179 public static final String TYPE_X_PREFIX = "x-";
180
181
184 public static final String NOTETYPE_STUDY = "x-StudyNote";
185
186
189 public static final String NOTETYPE_REFERENCE = "crossReference";
190
191
194 public static final String VARIANT_TYPE = "x-variant";
195 public static final String VARIANT_CLASS = "x-";
196
197
200 public static final String GENERATED_CONTENT = "x-gen";
201
202
205 public static final String POS_TYPE = "x-pos";
206
207
210 public static final String DEF_TYPE = "x-def";
211
212
215 public static final String LEMMA_STRONGS = "strong:";
216 public static final String MORPH_ROBINSONS = "robinson:";
217
218
221 public static final String MORPH_STRONGS = "x-StrongsMorph:T";
222
223
227 public static final String Q_BLOCK = "blockquote";
228
229
232 public static final String Q_CITATION = "citation";
233
234
237 public static final String Q_EMBEDDED = "embedded";
238
239
242 public static final String LIST_ORDERED = "x-ordered";
243 public static final String LIST_UNORDERED = "x-unordered";
244
245
249 public static final String TABLE_ROLE_LABEL = "label";
250
251
254 public static final String CELL_ALIGN_LEFT = "left";
255 public static final String CELL_ALIGN_RIGHT = "right";
256 public static final String CELL_ALIGN_CENTER = "center";
257 public static final String CELL_ALIGN_JUSTIFY = "justify";
258 public static final String CELL_ALIGN_START = "start";
259 public static final String CELL_ALIGN_END = "end";
260
261 public static final String OSIS_ELEMENT_ABBR = "abbr";
262 public static final String OSIS_ELEMENT_TITLE = "title";
263 public static final String OSIS_ELEMENT_TABLE = "table";
264 public static final String OSIS_ELEMENT_SPEECH = "speech";
265 public static final String OSIS_ELEMENT_SPEAKER = "speaker";
266 public static final String OSIS_ELEMENT_ROW = "row";
267 public static final String OSIS_ELEMENT_REFERENCE = "reference";
268 public static final String OSIS_ELEMENT_NOTE = "note";
269 public static final String OSIS_ELEMENT_NAME = "name";
270 public static final String OSIS_ELEMENT_Q = "q";
271 public static final String OSIS_ELEMENT_LIST = "list";
272 public static final String OSIS_ELEMENT_P = "p";
273 public static final String OSIS_ELEMENT_ITEM = "item";
274 public static final String OSIS_ELEMENT_FIGURE = "figure";
275 public static final String OSIS_ELEMENT_FOREIGN = "foreign";
276 public static final String OSIS_ELEMENT_W = "w";
277 public static final String OSIS_ELEMENT_CHAPTER = "chapter";
278 public static final String OSIS_ELEMENT_VERSE = "verse";
279 public static final String OSIS_ELEMENT_CELL = "cell";
280 public static final String OSIS_ELEMENT_DIV = "div";
281 public static final String OSIS_ELEMENT_OSIS = "osis";
282 public static final String OSIS_ELEMENT_WORK = "work";
283 public static final String OSIS_ELEMENT_HEADER = "header";
284 public static final String OSIS_ELEMENT_OSISTEXT = "osisText";
285 public static final String OSIS_ELEMENT_SEG = "seg";
286 public static final String OSIS_ELEMENT_LG = "lg";
287 public static final String OSIS_ELEMENT_L = "l";
288 public static final String OSIS_ELEMENT_LB = "lb";
289 public static final String OSIS_ELEMENT_HI = "hi";
290
291 public static final String ATTRIBUTE_TEXT_OSISIDWORK = "osisIDWork";
292 public static final String ATTRIBUTE_WORK_OSISWORK = "osisWork";
293 public static final String OSIS_ATTR_OSISID = "osisID";
294 public static final String OSIS_ATTR_SID = "sID";
295 public static final String OSIS_ATTR_EID = "eID";
296 public static final String ATTRIBUTE_W_LEMMA = "lemma";
297 public static final String ATTRIBUTE_FIGURE_SRC = "src";
298 public static final String ATTRIBUTE_TABLE_ROLE = "role";
299 public static final String ATTRIBUTE_CELL_ALIGN = "align";
300 public static final String OSIS_ATTR_TYPE = "type";
301 public static final String OSIS_ATTR_CANONICAL = "canonical";
302 public static final String OSIS_ATTR_SUBTYPE = "subType";
303 public static final String OSIS_ATTR_REF = "osisRef";
304 public static final String OSIS_ATTR_LEVEL = "level";
305 public static final String ATTRIBUTE_SPEAKER_WHO = "who";
306 public static final String ATTRIBUTE_Q_WHO = "who";
307 public static final String ATTRIBUTE_W_MORPH = "morph";
308 public static final String ATTRIBUTE_OSISTEXT_OSISIDWORK = "osisIDWork";
309 public static final String OSIS_ATTR_LANG = "lang";
313 public static final String ATTRIBUTE_DIV_BOOK = "book";
314
315
318 private static final String OSISID_PREFIX_BIBLE = "Bible.";
319
320 private static final Set<String> EXTRA_BIBLICAL_ELEMENTS = new HashSet<String>(Arrays.asList(new String[] {
321 OSIS_ELEMENT_NOTE, OSIS_ELEMENT_TITLE, OSIS_ELEMENT_REFERENCE
322 }));
323
324
327 private static final Logger log = LoggerFactory.getLogger(OSISUtil.class);
328
329
330
333 private OSISUtil() {
334 }
335
336 private static OSISFactory factory = new OSISFactory();
337
338
341 public static OSISFactory factory() {
342 return factory;
343 }
344
345
348 public static class OSISFactory {
349
352 public Element createAbbr() {
353 return new Element(OSIS_ELEMENT_ABBR);
354 }
355
356
359 public Element createSeg() {
360 return new Element(OSIS_ELEMENT_SEG);
361 }
362
363
366 public Element createOsisText() {
367 return new Element(OSIS_ELEMENT_OSISTEXT);
368 }
369
370
373 public Element createHeader() {
374 return new Element(OSIS_ELEMENT_HEADER);
375 }
376
377
380 public Element createWork() {
381 return new Element(OSIS_ELEMENT_WORK);
382 }
383
384
387 public Element createOsis() {
388 return new Element(OSIS_ELEMENT_OSIS);
389 }
390
391
394 public Element createDiv() {
395 return new Element(OSIS_ELEMENT_DIV);
396 }
397
398
401 public Element createCell() {
402 return new Element(OSIS_ELEMENT_CELL);
403 }
404
405
408 public Element createHeaderCell() {
409 Element ele = new Element(OSIS_ELEMENT_CELL);
410 ele.setAttribute(ATTRIBUTE_TABLE_ROLE, TABLE_ROLE_LABEL);
411 ele.setAttribute(ATTRIBUTE_CELL_ALIGN, CELL_ALIGN_CENTER);
412 return ele;
413 }
414
415
418 public Element createVerse() {
419 return new Element(OSIS_ELEMENT_VERSE);
420 }
421
422
425 public Element createW() {
426 return new Element(OSIS_ELEMENT_W);
427 }
428
429
432 public Element createFigure() {
433 return new Element(OSIS_ELEMENT_FIGURE);
434 }
435
436
439 public Element createForeign() {
440 return new Element(OSIS_ELEMENT_FOREIGN);
441 }
442
443
446 public Element createItem() {
447 return new Element(OSIS_ELEMENT_ITEM);
448 }
449
450
453 public Element createP() {
454 return new Element(OSIS_ELEMENT_P);
455 }
456
457
460 public Element createList() {
461 return new Element(OSIS_ELEMENT_LIST);
462 }
463
464
467 public Element createQ() {
468 return new Element(OSIS_ELEMENT_Q);
469 }
470
471
474 public Element createName() {
475 return new Element(OSIS_ELEMENT_NAME);
476 }
477
478
481 public Element createNote() {
482 return new Element(OSIS_ELEMENT_NOTE);
483 }
484
485
488 public Element createReference() {
489 return new Element(OSIS_ELEMENT_REFERENCE);
490 }
491
492
495 public Element createRow() {
496 return new Element(OSIS_ELEMENT_ROW);
497 }
498
499
502 public Element createSpeaker() {
503 return new Element(OSIS_ELEMENT_SPEAKER);
504 }
505
506
509 public Element createSpeech() {
510 return new Element(OSIS_ELEMENT_SPEECH);
511 }
512
513
516 public Element createTable() {
517 return new Element(OSIS_ELEMENT_TABLE);
518 }
519
520
523 public Element createTitle() {
524 return new Element(OSIS_ELEMENT_TITLE);
525 }
526
527
530 public Element createGeneratedTitle() {
531 Element title = new Element(OSIS_ELEMENT_TITLE);
532 title.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.GENERATED_CONTENT);
533 return title;
534 }
535
536
539 public Element createLG() {
540 return new Element(OSIS_ELEMENT_LG);
541 }
542
543
546 public Element createL() {
547 return new Element(OSIS_ELEMENT_L);
548 }
549
550
553 public Element createLB() {
554 return new Element(OSIS_ELEMENT_LB);
555 }
556
557
560 public Element createHI() {
561 return new Element(OSIS_ELEMENT_HI);
562 }
563
564
567 public Text createText(String text) {
568 return new Text(text);
569 }
570 }
571
572
578 public static List<Content> getFragment(Element root) {
579 if (root != null) {
580 Element content = root;
581 if (OSISUtil.OSIS_ELEMENT_OSIS.equals(root.getName())) {
582 content = root.getChild(OSISUtil.OSIS_ELEMENT_OSISTEXT);
583 }
584
585 if (OSISUtil.OSIS_ELEMENT_OSISTEXT.equals(root.getName())) {
586 content = root.getChild(OSISUtil.OSIS_ELEMENT_DIV);
587 }
588
589 if (content != null && content.getContentSize() == 1) {
593 Content firstChild = content.getContent(0);
594 if (firstChild instanceof Element && OSISUtil.OSIS_ELEMENT_DIV.equals(((Element) firstChild).getName())) {
595 content = (Element) firstChild;
596 }
597 }
598
599 if (content != null) {
600 return content.getContent();
601 }
602 }
603 return new ArrayList<Content>();
604 }
605
606
649 public static String getCanonicalText(Element root) {
650 StringBuilder buffer = new StringBuilder();
651
652 List<Content> frag = OSISUtil.getFragment(root);
654
655 Iterator<Content> dit = frag.iterator();
656 String sID = null;
657 Content data = null;
658 Element ele = null;
659 while (dit.hasNext()) {
660 data = dit.next();
661 if (data instanceof Element) {
662 ele = (Element) data;
663 if (!isCanonical(ele)) {
664 continue;
665 }
666
667 if (ele.getName().equals(OSISUtil.OSIS_ELEMENT_VERSE)) {
668 sID = ele.getAttributeValue(OSISUtil.OSIS_ATTR_SID);
669 }
670
671 if (sID != null) {
672 getCanonicalContent(ele, sID, dit, buffer);
673 } else {
674 getCanonicalContent(ele, null, ele.getContent().iterator(), buffer);
675 }
676 } else if (data instanceof Text) {
677 int lastIndex = buffer.length() - 1;
682 String text = ((Text) data).getText();
683 if (text.length() != 0) {
685 if (lastIndex >= 0 && !Character.isWhitespace(buffer.charAt(lastIndex)) && !Character.isWhitespace(text.charAt(0))) {
687 buffer.append(' ');
688 }
689 buffer.append(text);
690 }
691 }
692 }
693
694 return buffer.toString().trim();
695 }
696
697
703 public static String getPlainText(Element root) {
704 return getTextContent(OSISUtil.getFragment(root));
706 }
707
708
713 public static String getStrongsNumbers(Element root) {
714 return getLexicalInformation(root, false);
715 }
716
717
722 public static String getMorphologiesWithStrong(Element root) {
723 return getLexicalInformation(root, true);
724 }
725
726
732 public static String getLexicalInformation(Element root, boolean includeMorphology) {
733 StringBuilder buffer = new StringBuilder();
734
735 for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_W)) {
736 Element ele = (Element) content;
737 String attr = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_LEMMA);
738 if (attr != null) {
739 Matcher matcher = strongsNumberPattern.matcher(attr);
740 while (matcher.find()) {
741 String strongsNum = matcher.group(1);
742 if (buffer.length() > 0) {
743 buffer.append(' ');
744 }
745
746 if (includeMorphology) {
747 strongsNum = strongsNum.replace(SPACE_SEPARATOR, MORPH_INFO_SEPARATOR);
749 }
750 buffer.append(strongsNum);
751
752 if (includeMorphology) {
753 String morph = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_MORPH);
755 if (morph != null && morph.length() != 0) {
756 buffer.append(MORPH_INFO_SEPARATOR);
757 buffer.append(morph.replace(SPACE_SEPARATOR, MORPH_INFO_SEPARATOR));
758 }
759 }
760 }
761 }
762 }
763
764 return buffer.toString().trim();
765 }
766
767
772 public static String getReferences(Versification v11n, Element root) {
773 PassageKeyFactory keyf = PassageKeyFactory.instance();
774 Key collector = keyf.createEmptyKeyList(v11n);
775
776 for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_REFERENCE)) {
777 Element ele = (Element) content;
778 String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_REF);
779 if (attr != null) {
780 try {
781 Key key = keyf.getKey(v11n, attr);
782 collector.addAll(key);
783 } catch (NoSuchKeyException e) {
784 log.warn("Unable to parse: {}", attr, e);
785 }
786 }
787 }
788
789 return collector.getOsisID();
790 }
791
792
797 public static String getNotes(Element root) {
798 StringBuilder buffer = new StringBuilder();
799
800 for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_NOTE)) {
801 Element ele = (Element) content;
802 String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_TYPE);
803 if (attr == null || !attr.equals(NOTETYPE_REFERENCE)) {
804 if (buffer.length() > 0) {
805 buffer.append(' ');
806 }
807 buffer.append(OSISUtil.getTextContent(ele.getContent()));
808 }
809 }
810
811 return buffer.toString();
812 }
813
814
819 public static String getHeadings(Element root) {
820 StringBuilder buffer = new StringBuilder();
821
822 for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_TITLE)) {
823 Element ele = (Element) content;
824
825 if (buffer.length() > 0) {
826 buffer.append(' ');
827 }
828 buffer.append(OSISUtil.getTextContent(ele.getContent()));
829 }
830
831 return buffer.toString();
832 }
833
834 private static void getCanonicalContent(Element parent, String sID, Iterator<Content> iter, StringBuilder buffer) {
835 if (!isCanonical(parent)) {
836 return;
837 }
838
839 Content data = null;
840 Element ele = null;
841 String eleName = null;
842 String eID = null;
843 while (iter.hasNext()) {
844 data = iter.next();
845 if (data instanceof Element) {
846 ele = (Element) data;
847 eleName = ele.getName();
851 eID = ele.getAttributeValue(OSISUtil.OSIS_ATTR_SID);
852 if (eID != null && eID.equals(sID) && eleName.equals(parent.getName())) {
853 break;
854 }
855 OSISUtil.getCanonicalContent(ele, sID, ele.getContent().iterator(), buffer);
856 } else if (data instanceof Text) {
857 int lastIndex = buffer.length() - 1;
863 String text = ((Text) data).getText();
864 if (lastIndex >= 0 && !Character.isWhitespace(buffer.charAt(lastIndex)) && (text.length() == 0 || !Character.isWhitespace(text.charAt(0))) && !OSIS_ELEMENT_SEG.equals(parent.getName())) {
865 buffer.append(' ');
866 }
867 buffer.append(text);
868 }
869 }
870 }
871
872 private static boolean isCanonical(Content content) {
873 boolean result = true;
874 if (content instanceof Element) {
875 Element element = (Element) content;
876
877 if (EXTRA_BIBLICAL_ELEMENTS.contains(element.getName())) {
879 String canonical = element.getAttributeValue(OSISUtil.OSIS_ATTR_CANONICAL);
880 result = Boolean.valueOf(canonical).booleanValue();
881 }
882 }
883
884 return result;
885 }
886
887 private static String getTextContent(List<Content> fragment) {
888 StringBuilder buffer = new StringBuilder();
889
890 for (Content next : fragment) {
891 recurseElement(next, buffer);
892 }
893
894 return buffer.toString();
895 }
896
897
901 public static Collection<Content> getDeepContent(Element div, String name) {
902 List<Content> reply = new ArrayList<Content>();
903 recurseDeepContent(div, name, reply);
904 return reply;
905 }
906
907
914 public static Verse getVerse(Versification v11n, Element ele) throws BookException {
915 if (ele.getName().equals(OSIS_ELEMENT_VERSE)) {
916 String osisid = ele.getAttributeValue(OSIS_ATTR_OSISID);
918
919 try {
920 return VerseFactory.fromString(v11n, osisid);
921 } catch (NoSuchVerseException ex) {
922 throw new BookException(JSOtherMsg.lookupText("OsisID not valid: {0}", osisid), ex);
923 }
924 }
925
926 Parent parent = ele.getParent();
928 if (parent instanceof Element) {
929 return getVerse(v11n, (Element) parent);
930 }
931
932 throw new BookException(JSOtherMsg.lookupText("Verse element could not be found"));
933 }
934
935
939 public static Element createOsisFramework(BookMetaData bmd) {
940 Element osis = factory().createOsis();
941 String osisid = bmd.getInitials();
942
943 Element work = factory().createWork();
944 work.setAttribute(ATTRIBUTE_WORK_OSISWORK, osisid);
945
946 Element header = factory().createHeader();
947 header.addContent(work);
948
949 Element text = factory().createOsisText();
950 text.setAttribute(ATTRIBUTE_TEXT_OSISIDWORK, OSISID_PREFIX_BIBLE + osisid);
951 text.addContent(header);
952
953 osis.addContent(text);
954
955 return osis;
956 }
957
958
965 public static List<Content> diffToOsis(List<Difference> diffs) {
966 Element div = factory().createDiv();
967
968 for (int x = 0; x < diffs.size(); x++) {
969 Difference diff = diffs.get(x);
970 EditType editType = diff.getEditType(); Text text = factory.createText(diff.getText());
974 if (EditType.DELETE.equals(editType)) {
975 Element hi = factory().createHI();
976 hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_LINETHROUGH);
977 hi.addContent(text);
978 div.addContent(hi);
979 } else if (EditType.INSERT.equals(editType)) {
980 Element hi = factory().createHI();
981 hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_UNDERLINE);
982 hi.addContent(text);
983 div.addContent(hi);
984 } else {
985 div.addContent(text);
986 }
987 }
988 return div.cloneContent();
989 }
990
991 public static List<Content> rtfToOsis(String rtf) {
992 Element div = factory().createDiv();
993 Stack<Content> stack = new Stack<Content>();
994 stack.push(div);
995
996 int strlen = rtf.length();
997
998 StringBuilder text = new StringBuilder(strlen);
999
1000 int i = 0;
1001 for (i = 0; i < strlen; i++) {
1002 char curChar = rtf.charAt(i);
1003 if (curChar != '\\') {
1004 text.append(curChar);
1005 continue;
1006 }
1007
1008
1011 if (rtf.startsWith("\\pard", i)) {
1013 Element currentElement = (Element) stack.pop();
1014 currentElement.addContent(text.toString());
1015 text.delete(0, text.length());
1016 stack.clear();
1017 stack.push(div);
1018 i += (i + 5 < strlen && rtf.charAt(i + 5) == ' ') ? 5 : 4;
1019 continue;
1020 }
1021
1022 if (rtf.startsWith("\\par", i)) {
1024 Element currentElement = (Element) stack.peek();
1025 currentElement.addContent(text.toString());
1026 text.delete(0, text.length());
1027 currentElement.addContent(OSISUtil.factory.createLB());
1028 i += (i + 4 < strlen && rtf.charAt(i + 4) == ' ') ? 4 : 3;
1029 continue;
1030 }
1031
1032 if (rtf.startsWith("\\qc", i)) {
1035 Element centerDiv = OSISUtil.factory.createDiv();
1036 centerDiv.setAttribute(OSIS_ATTR_TYPE, "x-center");
1037 Element currentElement = (Element) stack.peek();
1038 currentElement.addContent(text.toString());
1039 text.delete(0, text.length());
1040 currentElement.addContent(centerDiv);
1041 stack.push(centerDiv);
1042 i += (i + 3 < strlen && rtf.charAt(i + 3) == ' ') ? 3 : 2;
1044 continue;
1045 }
1046
1047 if (rtf.startsWith("\\u", i)) {
1049 StringBuilder buf = new StringBuilder();
1050 i += 2;
1051 while (i < strlen) {
1052 char curDigit = rtf.charAt(i);
1053 if (curDigit != '-' && !Character.isDigit(curDigit)) {
1054 break;
1055 }
1056 buf.append(curDigit);
1057 i++;
1058 }
1059 int value = Integer.parseInt(buf.toString());
1065 if (value < 0) {
1066 value += 65536;
1067 }
1068 text.append((char) value);
1069 continue;
1071 }
1072
1073 if (rtf.startsWith("\\i0", i) || rtf.startsWith("\\b0", i)) {
1075 Element currentElement = (Element) stack.pop();
1076 currentElement.addContent(text.toString());
1077 text.delete(0, text.length());
1078 i += (i + 3 < strlen && rtf.charAt(i + 3) == ' ') ? 3 : 2;
1079 continue;
1080 }
1081
1082 if (rtf.startsWith(" ", i) || rtf.startsWith("\n", i)) {
1084 i += 1;
1085 continue;
1086 }
1087
1088 if (rtf.startsWith("\\i", i)) {
1090 Element hiElement = OSISUtil.factory.createHI();
1091 hiElement.setAttribute(OSIS_ATTR_TYPE, HI_ITALIC);
1092 Element currentElement = (Element) stack.peek();
1093 currentElement.addContent(text.toString());
1094 text.delete(0, text.length());
1095 currentElement.addContent(hiElement);
1096 stack.push(hiElement);
1097 i += (i + 2 < strlen && rtf.charAt(i + 2) == ' ') ? 2 : 1;
1098 continue;
1099 }
1100
1101 if (rtf.startsWith("\\b", i)) {
1103 Element hiElement = OSISUtil.factory.createHI();
1104 hiElement.setAttribute(OSIS_ATTR_TYPE, HI_BOLD);
1105 Element currentElement = (Element) stack.peek();
1106 currentElement.addContent(text.toString());
1107 text.delete(0, text.length());
1108 currentElement.addContent(hiElement);
1109 stack.push(hiElement);
1110 i += (i + 2 < strlen && rtf.charAt(i + 2) == ' ') ? 2 : 1;
1111 continue;
1112 }
1113
1114 }
1115
1116 if (text.length() > 0) {
1118 div.addContent(text.toString());
1119 }
1120 return div.cloneContent();
1140 }
1141
1142
1146 private static void recurseDeepContent(Element start, String name, List<Content> reply) {
1147 if (start.getName().equals(name)) {
1148 reply.add(start);
1149 }
1150
1151 Element ele = null;
1153 for (Content data : start.getContent()) {
1154 if (data instanceof Element) {
1155 ele = (Element) data;
1156 recurseDeepContent(ele, name, reply);
1157 }
1158 }
1159 }
1160
1161
1165 private static void recurseElement(Object sub, StringBuilder buffer) {
1166 if (sub instanceof Text) {
1167 buffer.append(((Text) sub).getText());
1168 } else if (sub instanceof Element) {
1169 recurseChildren((Element) sub, buffer);
1170 } else {
1171 log.error("unknown type: {}", sub.getClass().getName());
1172 }
1173 }
1174
1175
1183 private static void recurseChildren(Element ele, StringBuilder buffer) {
1184 for (Content sub : ele.getContent()) {
1186 recurseElement(sub, buffer);
1187 }
1188 }
1189
1190 private static String strongsNumber = "strong:([GgHh][0-9]+!?[A-Za-z]*)";
1191 private static Pattern strongsNumberPattern = Pattern.compile(strongsNumber);
1192 private static String robinsons = "robinson:([a-zA-Z][-a-zA-Z]*)";
1193 private static Pattern robinsonsPattern = Pattern.compile(robinsons);
1194}
1195