1
20 package org.crosswire.jsword.book;
21
22 import java.util.ArrayList;
23 import java.util.Arrays;
24 import java.util.Collection;
25 import java.util.HashSet;
26 import java.util.Iterator;
27 import java.util.List;
28 import java.util.Set;
29 import java.util.Stack;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 import org.crosswire.common.diff.Difference;
34 import org.crosswire.common.diff.EditType;
35 import org.crosswire.jsword.JSOtherMsg;
36 import org.crosswire.jsword.passage.Key;
37 import org.crosswire.jsword.passage.NoSuchKeyException;
38 import org.crosswire.jsword.passage.NoSuchVerseException;
39 import org.crosswire.jsword.passage.PassageKeyFactory;
40 import org.crosswire.jsword.passage.Verse;
41 import org.crosswire.jsword.passage.VerseFactory;
42 import org.crosswire.jsword.versification.Versification;
43 import org.jdom2.Content;
44 import org.jdom2.Element;
45 import org.jdom2.Parent;
46 import org.jdom2.Text;
47 import org.slf4j.Logger;
48 import org.slf4j.LoggerFactory;
49
50
56 public final class OSISUtil {
57 private static final char SPACE_SEPARATOR = ' ';
58 private static final char MORPH_INFO_SEPARATOR = '@';
59
60
63
66 public static final String HI_ACROSTIC = "acrostic";
67
68
71 public static final String HI_BOLD = "bold";
72
73
76 public static final String HI_EMPHASIS = "emphasis";
77
78
81 public static final String HI_ILLUMINATED = "illuminated";
82
83
86 public static final String HI_ITALIC = "italic";
87
88
91 public static final String HI_LINETHROUGH = "line-through";
92
93
96 public static final String HI_NORMAL = "normal";
97
98
101 public static final String HI_SMALL_CAPS = "small-caps";
102
103
106 public static final String HI_SUB = "sub";
107
108
111 public static final String HI_SUPER = "super";
112
113
116 public static final String HI_UNDERLINE = "underline";
117
118
121 public static final String HI_X_CAPS = "x-caps";
122
123
126 public static final String HI_X_BIG = "x-big";
127
128
131 public static final String HI_X_SMALL = "x-small";
132
133
136 public static final String HI_X_TT = "x-tt";
137
138
142 public static final String SEG_JUSTIFYRIGHT = "text-align: right;";
143
144
148 public static final String SEG_JUSTIFYLEFT = "text-align: left;";
149
150
154 public static final String SEG_CENTER = "text-align: center;";
155
156
160 public static final String DIV_PRE = "x-pre";
161
162
166 public static final String SEG_COLORPREFIX = "color: ";
167
168
172 public static final String SEG_SIZEPREFIX = "font-size: ";
173
174
177 public static final String TYPE_X_PREFIX = "x-";
178
179
182 public static final String NOTETYPE_STUDY = "x-StudyNote";
183
184
187 public static final String NOTETYPE_REFERENCE = "crossReference";
188
189
192 public static final String VARIANT_TYPE = "x-variant";
193 public static final String VARIANT_CLASS = "x-";
194
195
198 public static final String GENERATED_CONTENT = "x-gen";
199
200
203 public static final String POS_TYPE = "x-pos";
204
205
208 public static final String DEF_TYPE = "x-def";
209
210
213 public static final String LEMMA_STRONGS = "strong:";
214 public static final String LEMMA_MISC = "lemma:";
215 public static final String MORPH_ROBINSONS = "robinson:";
216
217
220 public static final String MORPH_STRONGS = "x-StrongsMorph:T";
221
222
226 public static final String Q_BLOCK = "blockquote";
227
228
231 public static final String Q_CITATION = "citation";
232
233
236 public static final String Q_EMBEDDED = "embedded";
237
238
241 public static final String LIST_ORDERED = "x-ordered";
242 public static final String LIST_UNORDERED = "x-unordered";
243
244
248 public static final String TABLE_ROLE_LABEL = "label";
249
250
253 public static final String CELL_ALIGN_LEFT = "left";
254 public static final String CELL_ALIGN_RIGHT = "right";
255 public static final String CELL_ALIGN_CENTER = "center";
256 public static final String CELL_ALIGN_JUSTIFY = "justify";
257 public static final String CELL_ALIGN_START = "start";
258 public static final String CELL_ALIGN_END = "end";
259
260 public static final String OSIS_ELEMENT_ABBR = "abbr";
261 public static final String OSIS_ELEMENT_TITLE = "title";
262 public static final String OSIS_ELEMENT_TABLE = "table";
263 public static final String OSIS_ELEMENT_SPEECH = "speech";
264 public static final String OSIS_ELEMENT_SPEAKER = "speaker";
265 public static final String OSIS_ELEMENT_ROW = "row";
266 public static final String OSIS_ELEMENT_REFERENCE = "reference";
267 public static final String OSIS_ELEMENT_NOTE = "note";
268 public static final String OSIS_ELEMENT_NAME = "name";
269 public static final String OSIS_ELEMENT_Q = "q";
270 public static final String OSIS_ELEMENT_LIST = "list";
271 public static final String OSIS_ELEMENT_P = "p";
272 public static final String OSIS_ELEMENT_ITEM = "item";
273 public static final String OSIS_ELEMENT_FIGURE = "figure";
274 public static final String OSIS_ELEMENT_FOREIGN = "foreign";
275 public static final String OSIS_ELEMENT_W = "w";
276 public static final String OSIS_ELEMENT_CHAPTER = "chapter";
277 public static final String OSIS_ELEMENT_VERSE = "verse";
278 public static final String OSIS_ELEMENT_CELL = "cell";
279 public static final String OSIS_ELEMENT_DIV = "div";
280 public static final String OSIS_ELEMENT_OSIS = "osis";
281 public static final String OSIS_ELEMENT_WORK = "work";
282 public static final String OSIS_ELEMENT_HEADER = "header";
283 public static final String OSIS_ELEMENT_OSISTEXT = "osisText";
284 public static final String OSIS_ELEMENT_SEG = "seg";
285 public static final String OSIS_ELEMENT_LG = "lg";
286 public static final String OSIS_ELEMENT_L = "l";
287 public static final String OSIS_ELEMENT_LB = "lb";
288 public static final String OSIS_ELEMENT_HI = "hi";
289
290 public static final String ATTRIBUTE_TEXT_OSISIDWORK = "osisIDWork";
291 public static final String ATTRIBUTE_WORK_OSISWORK = "osisWork";
292 public static final String OSIS_ATTR_OSISID = "osisID";
293 public static final String OSIS_ATTR_SID = "sID";
294 public static final String OSIS_ATTR_EID = "eID";
295 public static final String ATTRIBUTE_W_LEMMA = "lemma";
296 public static final String ATTRIBUTE_FIGURE_SRC = "src";
297 public static final String ATTRIBUTE_TABLE_BORDER = "border";
298 public static final String ATTRIBUTE_TABLE_ROLE = "role";
299 public static final String ATTRIBUTE_CELL_ALIGN = "align";
300 public static final String ATTRIBUTE_CELL_ROWS = "rows";
301 public static final String ATTRIBUTE_CELL_COLS = "cols";
302 public static final String OSIS_ATTR_TYPE = "type";
303 public static final String OSIS_ATTR_CANONICAL = "canonical";
304 public static final String OSIS_ATTR_SUBTYPE = "subType";
305 public static final String OSIS_ATTR_REF = "osisRef";
306 public static final String OSIS_ATTR_LEVEL = "level";
307 public static final String ATTRIBUTE_SPEAKER_WHO = "who";
308 public static final String ATTRIBUTE_Q_WHO = "who";
309 public static final String ATTRIBUTE_W_MORPH = "morph";
310 public static final String ATTRIBUTE_OSISTEXT_OSISIDWORK = "osisIDWork";
311 public static final String OSIS_ATTR_LANG = "lang";
315 public static final String ATTRIBUTE_DIV_BOOK = "book";
316
317
320 private static final String OSISID_PREFIX_BIBLE = "Bible.";
321
322 private static final Set<String> EXTRA_BIBLICAL_ELEMENTS = new HashSet<String>(Arrays.asList(new String[] {
323 OSIS_ELEMENT_NOTE, OSIS_ELEMENT_TITLE, OSIS_ELEMENT_REFERENCE
324 }));
325
326
329 private static final Logger log = LoggerFactory.getLogger(OSISUtil.class);
330
331
332
335 private OSISUtil() {
336 }
337
338 private static OSISFactory factory = new OSISFactory();
339
340
345 public static OSISFactory factory() {
346 return factory;
347 }
348
349
352 public static class OSISFactory {
353
356 public Element createAbbr() {
357 return new Element(OSIS_ELEMENT_ABBR);
358 }
359
360
363 public Element createSeg() {
364 return new Element(OSIS_ELEMENT_SEG);
365 }
366
367
370 public Element createOsisText() {
371 return new Element(OSIS_ELEMENT_OSISTEXT);
372 }
373
374
377 public Element createHeader() {
378 return new Element(OSIS_ELEMENT_HEADER);
379 }
380
381
384 public Element createWork() {
385 return new Element(OSIS_ELEMENT_WORK);
386 }
387
388
391 public Element createOsis() {
392 return new Element(OSIS_ELEMENT_OSIS);
393 }
394
395
398 public Element createDiv() {
399 return new Element(OSIS_ELEMENT_DIV);
400 }
401
402
405 public Element createCell() {
406 return new Element(OSIS_ELEMENT_CELL);
407 }
408
409
412 public Element createHeaderCell() {
413 Element ele = new Element(OSIS_ELEMENT_CELL);
414 ele.setAttribute(ATTRIBUTE_TABLE_ROLE, TABLE_ROLE_LABEL);
415 ele.setAttribute(ATTRIBUTE_CELL_ALIGN, CELL_ALIGN_CENTER);
416 return ele;
417 }
418
419
422 public Element createVerse() {
423 return new Element(OSIS_ELEMENT_VERSE);
424 }
425
426
429 public Element createW() {
430 return new Element(OSIS_ELEMENT_W);
431 }
432
433
436 public Element createFigure() {
437 return new Element(OSIS_ELEMENT_FIGURE);
438 }
439
440
443 public Element createForeign() {
444 return new Element(OSIS_ELEMENT_FOREIGN);
445 }
446
447
450 public Element createItem() {
451 return new Element(OSIS_ELEMENT_ITEM);
452 }
453
454
457 public Element createP() {
458 return new Element(OSIS_ELEMENT_P);
459 }
460
461
464 public Element createList() {
465 return new Element(OSIS_ELEMENT_LIST);
466 }
467
468
471 public Element createQ() {
472 return new Element(OSIS_ELEMENT_Q);
473 }
474
475
478 public Element createName() {
479 return new Element(OSIS_ELEMENT_NAME);
480 }
481
482
485 public Element createNote() {
486 return new Element(OSIS_ELEMENT_NOTE);
487 }
488
489
492 public Element createReference() {
493 return new Element(OSIS_ELEMENT_REFERENCE);
494 }
495
496
499 public Element createRow() {
500 return new Element(OSIS_ELEMENT_ROW);
501 }
502
503
506 public Element createSpeaker() {
507 return new Element(OSIS_ELEMENT_SPEAKER);
508 }
509
510
513 public Element createSpeech() {
514 return new Element(OSIS_ELEMENT_SPEECH);
515 }
516
517
520 public Element createTable() {
521 return new Element(OSIS_ELEMENT_TABLE);
522 }
523
524
527 public Element createTitle() {
528 return new Element(OSIS_ELEMENT_TITLE);
529 }
530
531
536 public Element createGeneratedTitle() {
537 Element title = new Element(OSIS_ELEMENT_TITLE);
538 title.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.GENERATED_CONTENT);
539 return title;
540 }
541
542
547 public Element createLG() {
548 return new Element(OSIS_ELEMENT_LG);
549 }
550
551
556 public Element createL() {
557 return new Element(OSIS_ELEMENT_L);
558 }
559
560
565 public Element createLB() {
566 return new Element(OSIS_ELEMENT_LB);
567 }
568
569
574 public Element createHI() {
575 return new Element(OSIS_ELEMENT_HI);
576 }
577
578
584 public Text createText(String text) {
585 return new Text(text);
586 }
587 }
588
589
596 public static List<Content> getFragment(Element root) {
597 if (root != null) {
598 Element content = root;
599 if (OSISUtil.OSIS_ELEMENT_OSIS.equals(root.getName())) {
600 content = root.getChild(OSISUtil.OSIS_ELEMENT_OSISTEXT);
601 }
602
603 if (OSISUtil.OSIS_ELEMENT_OSISTEXT.equals(root.getName())) {
604 content = root.getChild(OSISUtil.OSIS_ELEMENT_DIV);
605 }
606
607 if (content != null && content.getContentSize() == 1) {
611 Content firstChild = content.getContent(0);
612 if (firstChild instanceof Element && OSISUtil.OSIS_ELEMENT_DIV.equals(((Element) firstChild).getName())) {
613 content = (Element) firstChild;
614 }
615 }
616
617 if (content != null) {
618 return content.getContent();
619 }
620 }
621 return new ArrayList<Content>();
622 }
623
624
667 public static String getCanonicalText(Element root) {
668 if (!isCanonical(root)) {
671 return "";
673 }
674
675 StringBuilder buffer = new StringBuilder();
676
677 List<Content> frag = OSISUtil.getFragment(root);
679
680 Iterator<Content> dit = frag.iterator();
681 String sID = null;
682 Content data = null;
683 Element ele = null;
684 while (dit.hasNext()) {
685 data = dit.next();
686 if (data instanceof Element) {
687 ele = (Element) data;
688 if (!isCanonical(ele)) {
689 continue;
690 }
691
692 if (ele.getName().equals(OSISUtil.OSIS_ELEMENT_VERSE)) {
693 sID = ele.getAttributeValue(OSISUtil.OSIS_ATTR_SID);
694 }
695
696 if (sID != null) {
697 getCanonicalContent(ele, sID, dit, buffer);
698 } else {
699 getCanonicalContent(ele, null, ele.getContent().iterator(), buffer);
700 }
701 } else if (data instanceof Text) {
702 int lastIndex = buffer.length() - 1;
707 String text = ((Text) data).getText();
708 if (text.length() != 0) {
710 if (lastIndex >= 0 && !Character.isWhitespace(buffer.charAt(lastIndex)) && !Character.isWhitespace(text.charAt(0))) {
712 buffer.append(' ');
713 }
714 buffer.append(text);
715 }
716 }
717 }
718
719 return buffer.toString().trim();
720 }
721
722
730 public static String getPlainText(Element root) {
731 return getTextContent(OSISUtil.getFragment(root));
733 }
734
735
742 public static String getStrongsNumbers(Element root) {
743 return getLexicalInformation(root, false);
744 }
745
746
752 public static String getMorphologiesWithStrong(Element root) {
753 return getLexicalInformation(root, true);
754 }
755
756
763 public static String getLexicalInformation(Element root, boolean includeMorphology) {
764 StringBuilder buffer = new StringBuilder();
765
766 for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_W)) {
767 Element ele = (Element) content;
768 String attr = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_LEMMA);
769 if (attr != null) {
770 Matcher matcher = strongsNumberPattern.matcher(attr);
771 while (matcher.find()) {
772 String strongsNum = matcher.group(1);
773 if (buffer.length() > 0) {
774 buffer.append(' ');
775 }
776
777 if (includeMorphology) {
778 strongsNum = strongsNum.replace(SPACE_SEPARATOR, MORPH_INFO_SEPARATOR);
780 }
781 buffer.append(strongsNum);
782
783 if (includeMorphology) {
784 String morph = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_MORPH);
786 if (morph != null && morph.length() != 0) {
787 buffer.append(MORPH_INFO_SEPARATOR);
788 buffer.append(morph.replace(SPACE_SEPARATOR, MORPH_INFO_SEPARATOR));
789 }
790 }
791 }
792 }
793 }
794
795 return buffer.toString().trim();
796 }
797
798
808 public static String getReferences(Book book, Key key, Versification v11n, Element root) {
809 PassageKeyFactory keyf = PassageKeyFactory.instance();
810 Key collector = keyf.createEmptyKeyList(v11n);
811
812 for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_REFERENCE)) {
813 Element ele = (Element) content;
814 String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_REF);
815 if (attr != null) {
816 try {
817 collector.addAll(keyf.getKey(v11n, attr));
818 } catch (NoSuchKeyException e) {
819 DataPolice.report(book, key, "Unable to parse: " + attr + " - No such reference:" + e.getMessage());
820 }
821 }
822 }
823
824 return collector.getOsisID();
825 }
826
827
833 public static String getNotes(Element root) {
834 StringBuilder buffer = new StringBuilder();
835
836 for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_NOTE)) {
837 Element ele = (Element) content;
838 String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_TYPE);
839 if (attr == null || !attr.equals(NOTETYPE_REFERENCE)) {
840 if (buffer.length() > 0) {
841 buffer.append(' ');
842 }
843 buffer.append(OSISUtil.getTextContent(ele.getContent()));
844 }
845 }
846
847 return buffer.toString();
848 }
849
850
856 public static String getHeadings(Element root) {
857 StringBuilder buffer = new StringBuilder();
858
859 for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_TITLE)) {
860 Element ele = (Element) content;
861
862 if (buffer.length() > 0) {
863 buffer.append(' ');
864 }
865 buffer.append(OSISUtil.getTextContent(ele.getContent()));
866 }
867
868 return buffer.toString();
869 }
870
871 private static void getCanonicalContent(Element parent, String sID, Iterator<Content> iter, StringBuilder buffer) {
872 if (!isCanonical(parent)) {
873 return;
874 }
875
876 Content data = null;
877 Element ele = null;
878 String eleName = null;
879 String eID = null;
880 while (iter.hasNext()) {
881 data = iter.next();
882 if (data instanceof Element) {
883 ele = (Element) data;
884 eleName = ele.getName();
888 eID = ele.getAttributeValue(OSISUtil.OSIS_ATTR_SID);
889 if (eID != null && eID.equals(sID) && eleName.equals(parent.getName())) {
890 break;
891 }
892 OSISUtil.getCanonicalContent(ele, sID, ele.getContent().iterator(), buffer);
893 } else if (data instanceof Text) {
894 int lastIndex = buffer.length() - 1;
900 String text = ((Text) data).getText();
901 if (lastIndex >= 0 && !Character.isWhitespace(buffer.charAt(lastIndex)) && (text.length() == 0 || !Character.isWhitespace(text.charAt(0))) && !OSIS_ELEMENT_SEG.equals(parent.getName())) {
902 buffer.append(' ');
903 }
904 buffer.append(text);
905 }
906 }
907 }
908
909 private static boolean isCanonical(Content content) {
910 boolean result = true;
911 if (content instanceof Element) {
912 Element element = (Element) content;
913
914 if (EXTRA_BIBLICAL_ELEMENTS.contains(element.getName())) {
916 String canonical = element.getAttributeValue(OSISUtil.OSIS_ATTR_CANONICAL);
917 result = Boolean.valueOf(canonical).booleanValue();
918 }
919 }
920
921 return result;
922 }
923
924 private static String getTextContent(List<Content> fragment) {
925 StringBuilder buffer = new StringBuilder();
926
927 for (Content next : fragment) {
928 recurseElement(next, buffer);
929 }
930
931 return buffer.toString();
932 }
933
934
942 public static Collection<Content> getDeepContent(Element div, String name) {
943 List<Content> reply = new ArrayList<Content>();
944 recurseDeepContent(div, name, reply);
945 return reply;
946 }
947
948
957 public static Verse getVerse(Versification v11n, Element ele) throws BookException {
958 if (ele.getName().equals(OSIS_ELEMENT_VERSE)) {
959 String osisid = ele.getAttributeValue(OSIS_ATTR_OSISID);
961
962 try {
963 return VerseFactory.fromString(v11n, osisid);
964 } catch (NoSuchVerseException ex) {
965 throw new BookException(JSOtherMsg.lookupText("OsisID not valid: {0}", osisid), ex);
966 }
967 }
968
969 Parent parent = ele.getParent();
971 if (parent instanceof Element) {
972 return getVerse(v11n, (Element) parent);
973 }
974
975 throw new BookException(JSOtherMsg.lookupText("Verse element could not be found"));
976 }
977
978
985 public static Element createOsisFramework(BookMetaData bmd) {
986 Element osis = factory().createOsis();
987 String osisid = bmd.getInitials();
988
989 Element work = factory().createWork();
990 work.setAttribute(ATTRIBUTE_WORK_OSISWORK, osisid);
991
992 Element header = factory().createHeader();
993 header.addContent(work);
994
995 Element text = factory().createOsisText();
996 text.setAttribute(ATTRIBUTE_TEXT_OSISIDWORK, OSISID_PREFIX_BIBLE + osisid);
997 text.addContent(header);
998
999 osis.addContent(text);
1000
1001 return osis;
1002 }
1003
1004
1011 public static List<Content> diffToOsis(List<Difference> diffs) {
1012 Element div = factory().createDiv();
1013
1014 for (int x = 0; x < diffs.size(); x++) {
1015 Difference diff = diffs.get(x);
1016 EditType editType = diff.getEditType(); Text text = factory.createText(diff.getText());
1020 if (EditType.DELETE.equals(editType)) {
1021 Element hi = factory().createHI();
1022 hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_LINETHROUGH);
1023 hi.addContent(text);
1024 div.addContent(hi);
1025 } else if (EditType.INSERT.equals(editType)) {
1026 Element hi = factory().createHI();
1027 hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_UNDERLINE);
1028 hi.addContent(text);
1029 div.addContent(hi);
1030 } else {
1031 div.addContent(text);
1032 }
1033 }
1034 return div.cloneContent();
1035 }
1036
1037 public static List<Content> rtfToOsis(String rtf) {
1038 Element div = factory().createDiv();
1039 Stack<Content> stack = new Stack<Content>();
1040 stack.push(div);
1041
1042 int strlen = rtf.length();
1043
1044 StringBuilder text = new StringBuilder(strlen);
1045
1046 int i = 0;
1047 for (i = 0; i < strlen; i++) {
1048 char curChar = rtf.charAt(i);
1049 if (curChar != '\\') {
1050 text.append(curChar);
1051 continue;
1052 }
1053
1054
1057 if (rtf.startsWith("\\pard", i)) {
1059 Element currentElement = (Element) stack.pop();
1060 currentElement.addContent(text.toString());
1061 text.delete(0, text.length());
1062 stack.clear();
1063 stack.push(div);
1064 i += (i + 5 < strlen && rtf.charAt(i + 5) == ' ') ? 5 : 4;
1065 continue;
1066 }
1067
1068 if (rtf.startsWith("\\par", i)) {
1070 Element currentElement = (Element) stack.peek();
1071 currentElement.addContent(text.toString());
1072 text.delete(0, text.length());
1073 currentElement.addContent(OSISUtil.factory.createLB());
1074 i += (i + 4 < strlen && rtf.charAt(i + 4) == ' ') ? 4 : 3;
1075 continue;
1076 }
1077
1078 if (rtf.startsWith("\\qc", i)) {
1081 Element centerDiv = OSISUtil.factory.createDiv();
1082 centerDiv.setAttribute(OSIS_ATTR_TYPE, "x-center");
1083 Element currentElement = (Element) stack.peek();
1084 currentElement.addContent(text.toString());
1085 text.delete(0, text.length());
1086 currentElement.addContent(centerDiv);
1087 stack.push(centerDiv);
1088 i += (i + 3 < strlen && rtf.charAt(i + 3) == ' ') ? 3 : 2;
1090 continue;
1091 }
1092
1093 if (rtf.startsWith("\\u", i)) {
1095 StringBuilder buf = new StringBuilder();
1096 i += 2;
1097 while (i < strlen) {
1098 char curDigit = rtf.charAt(i);
1099 if (curDigit != '-' && !Character.isDigit(curDigit)) {
1100 break;
1101 }
1102 buf.append(curDigit);
1103 i++;
1104 }
1105 int value = Integer.parseInt(buf.toString());
1111 if (value < 0) {
1112 value += 65536;
1113 }
1114 text.append((char) value);
1115 continue;
1117 }
1118
1119 if (rtf.startsWith("\\i0", i) || rtf.startsWith("\\b0", i)) {
1121 Element currentElement = (Element) stack.pop();
1122 currentElement.addContent(text.toString());
1123 text.delete(0, text.length());
1124 i += (i + 3 < strlen && rtf.charAt(i + 3) == ' ') ? 3 : 2;
1125 continue;
1126 }
1127
1128 if (rtf.startsWith(" ", i) || rtf.startsWith("\n", i)) {
1130 i += 1;
1131 continue;
1132 }
1133
1134 if (rtf.startsWith("\\i", i)) {
1136 Element hiElement = OSISUtil.factory.createHI();
1137 hiElement.setAttribute(OSIS_ATTR_TYPE, HI_ITALIC);
1138 Element currentElement = (Element) stack.peek();
1139 currentElement.addContent(text.toString());
1140 text.delete(0, text.length());
1141 currentElement.addContent(hiElement);
1142 stack.push(hiElement);
1143 i += (i + 2 < strlen && rtf.charAt(i + 2) == ' ') ? 2 : 1;
1144 continue;
1145 }
1146
1147 if (rtf.startsWith("\\b", i)) {
1149 Element hiElement = OSISUtil.factory.createHI();
1150 hiElement.setAttribute(OSIS_ATTR_TYPE, HI_BOLD);
1151 Element currentElement = (Element) stack.peek();
1152 currentElement.addContent(text.toString());
1153 text.delete(0, text.length());
1154 currentElement.addContent(hiElement);
1155 stack.push(hiElement);
1156 i += (i + 2 < strlen && rtf.charAt(i + 2) == ' ') ? 2 : 1;
1157 continue;
1158 }
1159
1160 }
1161
1162 if (text.length() > 0) {
1164 div.addContent(text.toString());
1165 }
1166 return div.cloneContent();
1186 }
1187
1188
1196 private static void recurseDeepContent(Element start, String name, List<Content> reply) {
1197 if (start.getName().equals(name)) {
1198 reply.add(start);
1199 }
1200
1201 Element ele = null;
1203 for (Content data : start.getContent()) {
1204 if (data instanceof Element) {
1205 ele = (Element) data;
1206 recurseDeepContent(ele, name, reply);
1207 }
1208 }
1209 }
1210
1211
1218 private static void recurseElement(Object sub, StringBuilder buffer) {
1219 if (sub instanceof Text) {
1220 buffer.append(((Text) sub).getText());
1221 } else if (sub instanceof Element) {
1222 recurseChildren((Element) sub, buffer);
1223 } else {
1224 log.error("unknown type: {}", sub.getClass().getName());
1225 }
1226 }
1227
1228
1236 private static void recurseChildren(Element ele, StringBuilder buffer) {
1237 for (Content sub : ele.getContent()) {
1239 recurseElement(sub, buffer);
1240 }
1241 }
1242
1243 private static String strongsNumber = "strong:([GgHh][0-9]+!?[A-Za-z]*)";
1244 private static Pattern strongsNumberPattern = Pattern.compile(strongsNumber);
1245}
1246