1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
package org.crosswire.jsword.book; |
21 | |
|
22 | |
import java.util.ArrayList; |
23 | |
import java.util.Arrays; |
24 | |
import java.util.Collection; |
25 | |
import java.util.HashSet; |
26 | |
import java.util.Iterator; |
27 | |
import java.util.List; |
28 | |
import java.util.Set; |
29 | |
import java.util.Stack; |
30 | |
import java.util.regex.Matcher; |
31 | |
import java.util.regex.Pattern; |
32 | |
|
33 | |
import org.crosswire.common.diff.Difference; |
34 | |
import org.crosswire.common.diff.EditType; |
35 | |
import org.crosswire.jsword.JSOtherMsg; |
36 | |
import org.crosswire.jsword.passage.Key; |
37 | |
import org.crosswire.jsword.passage.NoSuchKeyException; |
38 | |
import org.crosswire.jsword.passage.NoSuchVerseException; |
39 | |
import org.crosswire.jsword.passage.PassageKeyFactory; |
40 | |
import org.crosswire.jsword.passage.Verse; |
41 | |
import org.crosswire.jsword.passage.VerseFactory; |
42 | |
import org.crosswire.jsword.versification.Versification; |
43 | |
import org.jdom2.Content; |
44 | |
import org.jdom2.Element; |
45 | |
import org.jdom2.Parent; |
46 | |
import org.jdom2.Text; |
47 | |
import org.slf4j.Logger; |
48 | |
import org.slf4j.LoggerFactory; |
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
public final class OSISUtil { |
57 | |
private static final char SPACE_SEPARATOR = ' '; |
58 | |
private static final char MORPH_INFO_SEPARATOR = '@'; |
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
public static final String HI_ACROSTIC = "acrostic"; |
67 | |
|
68 | |
|
69 | |
|
70 | |
|
71 | |
public static final String HI_BOLD = "bold"; |
72 | |
|
73 | |
|
74 | |
|
75 | |
|
76 | |
public static final String HI_EMPHASIS = "emphasis"; |
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
public static final String HI_ILLUMINATED = "illuminated"; |
82 | |
|
83 | |
|
84 | |
|
85 | |
|
86 | |
public static final String HI_ITALIC = "italic"; |
87 | |
|
88 | |
|
89 | |
|
90 | |
|
91 | |
public static final String HI_LINETHROUGH = "line-through"; |
92 | |
|
93 | |
|
94 | |
|
95 | |
|
96 | |
public static final String HI_NORMAL = "normal"; |
97 | |
|
98 | |
|
99 | |
|
100 | |
|
101 | |
public static final String HI_SMALL_CAPS = "small-caps"; |
102 | |
|
103 | |
|
104 | |
|
105 | |
|
106 | |
public static final String HI_SUB = "sub"; |
107 | |
|
108 | |
|
109 | |
|
110 | |
|
111 | |
public static final String HI_SUPER = "super"; |
112 | |
|
113 | |
|
114 | |
|
115 | |
|
116 | |
public static final String HI_UNDERLINE = "underline"; |
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
public static final String HI_X_CAPS = "x-caps"; |
122 | |
|
123 | |
|
124 | |
|
125 | |
|
126 | |
public static final String HI_X_BIG = "x-big"; |
127 | |
|
128 | |
|
129 | |
|
130 | |
|
131 | |
public static final String HI_X_SMALL = "x-small"; |
132 | |
|
133 | |
|
134 | |
|
135 | |
|
136 | |
public static final String HI_X_TT = "x-tt"; |
137 | |
|
138 | |
|
139 | |
|
140 | |
|
141 | |
|
142 | |
public static final String SEG_JUSTIFYRIGHT = "text-align: right;"; |
143 | |
|
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | |
public static final String SEG_JUSTIFYLEFT = "text-align: left;"; |
149 | |
|
150 | |
|
151 | |
|
152 | |
|
153 | |
|
154 | |
public static final String SEG_CENTER = "text-align: center;"; |
155 | |
|
156 | |
|
157 | |
|
158 | |
|
159 | |
|
160 | |
public static final String DIV_PRE = "x-pre"; |
161 | |
|
162 | |
|
163 | |
|
164 | |
|
165 | |
|
166 | |
public static final String SEG_COLORPREFIX = "color: "; |
167 | |
|
168 | |
|
169 | |
|
170 | |
|
171 | |
|
172 | |
public static final String SEG_SIZEPREFIX = "font-size: "; |
173 | |
|
174 | |
|
175 | |
|
176 | |
|
177 | |
public static final String TYPE_X_PREFIX = "x-"; |
178 | |
|
179 | |
|
180 | |
|
181 | |
|
182 | |
public static final String NOTETYPE_STUDY = "x-StudyNote"; |
183 | |
|
184 | |
|
185 | |
|
186 | |
|
187 | |
public static final String NOTETYPE_REFERENCE = "crossReference"; |
188 | |
|
189 | |
|
190 | |
|
191 | |
|
192 | |
public static final String VARIANT_TYPE = "x-variant"; |
193 | |
public static final String VARIANT_CLASS = "x-"; |
194 | |
|
195 | |
|
196 | |
|
197 | |
|
198 | |
public static final String GENERATED_CONTENT = "x-gen"; |
199 | |
|
200 | |
|
201 | |
|
202 | |
|
203 | |
public static final String POS_TYPE = "x-pos"; |
204 | |
|
205 | |
|
206 | |
|
207 | |
|
208 | |
public static final String DEF_TYPE = "x-def"; |
209 | |
|
210 | |
|
211 | |
|
212 | |
|
213 | |
public static final String LEMMA_STRONGS = "strong:"; |
214 | |
public static final String LEMMA_MISC = "lemma:"; |
215 | |
public static final String MORPH_ROBINSONS = "robinson:"; |
216 | |
|
217 | |
|
218 | |
|
219 | |
|
220 | |
public static final String MORPH_STRONGS = "x-StrongsMorph:T"; |
221 | |
|
222 | |
|
223 | |
|
224 | |
|
225 | |
|
226 | |
public static final String Q_BLOCK = "blockquote"; |
227 | |
|
228 | |
|
229 | |
|
230 | |
|
231 | |
public static final String Q_CITATION = "citation"; |
232 | |
|
233 | |
|
234 | |
|
235 | |
|
236 | |
public static final String Q_EMBEDDED = "embedded"; |
237 | |
|
238 | |
|
239 | |
|
240 | |
|
241 | |
public static final String LIST_ORDERED = "x-ordered"; |
242 | |
public static final String LIST_UNORDERED = "x-unordered"; |
243 | |
|
244 | |
|
245 | |
|
246 | |
|
247 | |
|
248 | |
public static final String TABLE_ROLE_LABEL = "label"; |
249 | |
|
250 | |
|
251 | |
|
252 | |
|
253 | |
public static final String CELL_ALIGN_LEFT = "left"; |
254 | |
public static final String CELL_ALIGN_RIGHT = "right"; |
255 | |
public static final String CELL_ALIGN_CENTER = "center"; |
256 | |
public static final String CELL_ALIGN_JUSTIFY = "justify"; |
257 | |
public static final String CELL_ALIGN_START = "start"; |
258 | |
public static final String CELL_ALIGN_END = "end"; |
259 | |
|
260 | |
public static final String OSIS_ELEMENT_ABBR = "abbr"; |
261 | |
public static final String OSIS_ELEMENT_TITLE = "title"; |
262 | |
public static final String OSIS_ELEMENT_TABLE = "table"; |
263 | |
public static final String OSIS_ELEMENT_SPEECH = "speech"; |
264 | |
public static final String OSIS_ELEMENT_SPEAKER = "speaker"; |
265 | |
public static final String OSIS_ELEMENT_ROW = "row"; |
266 | |
public static final String OSIS_ELEMENT_REFERENCE = "reference"; |
267 | |
public static final String OSIS_ELEMENT_NOTE = "note"; |
268 | |
public static final String OSIS_ELEMENT_NAME = "name"; |
269 | |
public static final String OSIS_ELEMENT_Q = "q"; |
270 | |
public static final String OSIS_ELEMENT_LIST = "list"; |
271 | |
public static final String OSIS_ELEMENT_P = "p"; |
272 | |
public static final String OSIS_ELEMENT_ITEM = "item"; |
273 | |
public static final String OSIS_ELEMENT_FIGURE = "figure"; |
274 | |
public static final String OSIS_ELEMENT_FOREIGN = "foreign"; |
275 | |
public static final String OSIS_ELEMENT_W = "w"; |
276 | |
public static final String OSIS_ELEMENT_CHAPTER = "chapter"; |
277 | |
public static final String OSIS_ELEMENT_VERSE = "verse"; |
278 | |
public static final String OSIS_ELEMENT_CELL = "cell"; |
279 | |
public static final String OSIS_ELEMENT_DIV = "div"; |
280 | |
public static final String OSIS_ELEMENT_OSIS = "osis"; |
281 | |
public static final String OSIS_ELEMENT_WORK = "work"; |
282 | |
public static final String OSIS_ELEMENT_HEADER = "header"; |
283 | |
public static final String OSIS_ELEMENT_OSISTEXT = "osisText"; |
284 | |
public static final String OSIS_ELEMENT_SEG = "seg"; |
285 | |
public static final String OSIS_ELEMENT_LG = "lg"; |
286 | |
public static final String OSIS_ELEMENT_L = "l"; |
287 | |
public static final String OSIS_ELEMENT_LB = "lb"; |
288 | |
public static final String OSIS_ELEMENT_HI = "hi"; |
289 | |
|
290 | |
public static final String ATTRIBUTE_TEXT_OSISIDWORK = "osisIDWork"; |
291 | |
public static final String ATTRIBUTE_WORK_OSISWORK = "osisWork"; |
292 | |
public static final String OSIS_ATTR_OSISID = "osisID"; |
293 | |
public static final String OSIS_ATTR_SID = "sID"; |
294 | |
public static final String OSIS_ATTR_EID = "eID"; |
295 | |
public static final String ATTRIBUTE_W_LEMMA = "lemma"; |
296 | |
public static final String ATTRIBUTE_FIGURE_SRC = "src"; |
297 | |
public static final String ATTRIBUTE_TABLE_BORDER = "border"; |
298 | |
public static final String ATTRIBUTE_TABLE_ROLE = "role"; |
299 | |
public static final String ATTRIBUTE_CELL_ALIGN = "align"; |
300 | |
public static final String ATTRIBUTE_CELL_ROWS = "rows"; |
301 | |
public static final String ATTRIBUTE_CELL_COLS = "cols"; |
302 | |
public static final String OSIS_ATTR_TYPE = "type"; |
303 | |
public static final String OSIS_ATTR_CANONICAL = "canonical"; |
304 | |
public static final String OSIS_ATTR_SUBTYPE = "subType"; |
305 | |
public static final String OSIS_ATTR_REF = "osisRef"; |
306 | |
public static final String OSIS_ATTR_LEVEL = "level"; |
307 | |
public static final String ATTRIBUTE_SPEAKER_WHO = "who"; |
308 | |
public static final String ATTRIBUTE_Q_WHO = "who"; |
309 | |
public static final String ATTRIBUTE_W_MORPH = "morph"; |
310 | |
public static final String ATTRIBUTE_OSISTEXT_OSISIDWORK = "osisIDWork"; |
311 | |
|
312 | |
|
313 | |
|
314 | |
public static final String OSIS_ATTR_LANG = "lang"; |
315 | |
public static final String ATTRIBUTE_DIV_BOOK = "book"; |
316 | |
|
317 | |
|
318 | |
|
319 | |
|
320 | |
private static final String OSISID_PREFIX_BIBLE = "Bible."; |
321 | |
|
322 | 0 | private static final Set<String> EXTRA_BIBLICAL_ELEMENTS = new HashSet<String>(Arrays.asList(new String[] { |
323 | |
OSIS_ELEMENT_NOTE, OSIS_ELEMENT_TITLE, OSIS_ELEMENT_REFERENCE |
324 | |
})); |
325 | |
|
326 | |
|
327 | |
|
328 | |
|
329 | 0 | private static final Logger log = LoggerFactory.getLogger(OSISUtil.class); |
330 | |
|
331 | |
|
332 | |
|
333 | |
|
334 | |
|
335 | 0 | private OSISUtil() { |
336 | 0 | } |
337 | |
|
338 | 0 | private static OSISFactory factory = new OSISFactory(); |
339 | |
|
340 | |
|
341 | |
|
342 | |
|
343 | |
|
344 | |
|
345 | |
public static OSISFactory factory() { |
346 | 0 | return factory; |
347 | |
} |
348 | |
|
349 | |
|
350 | |
|
351 | |
|
352 | 0 | public static class OSISFactory { |
353 | |
|
354 | |
|
355 | |
|
356 | |
public Element createAbbr() { |
357 | 0 | return new Element(OSIS_ELEMENT_ABBR); |
358 | |
} |
359 | |
|
360 | |
|
361 | |
|
362 | |
|
363 | |
public Element createSeg() { |
364 | 0 | return new Element(OSIS_ELEMENT_SEG); |
365 | |
} |
366 | |
|
367 | |
|
368 | |
|
369 | |
|
370 | |
public Element createOsisText() { |
371 | 0 | return new Element(OSIS_ELEMENT_OSISTEXT); |
372 | |
} |
373 | |
|
374 | |
|
375 | |
|
376 | |
|
377 | |
public Element createHeader() { |
378 | 0 | return new Element(OSIS_ELEMENT_HEADER); |
379 | |
} |
380 | |
|
381 | |
|
382 | |
|
383 | |
|
384 | |
public Element createWork() { |
385 | 0 | return new Element(OSIS_ELEMENT_WORK); |
386 | |
} |
387 | |
|
388 | |
|
389 | |
|
390 | |
|
391 | |
public Element createOsis() { |
392 | 0 | return new Element(OSIS_ELEMENT_OSIS); |
393 | |
} |
394 | |
|
395 | |
|
396 | |
|
397 | |
|
398 | |
public Element createDiv() { |
399 | 0 | return new Element(OSIS_ELEMENT_DIV); |
400 | |
} |
401 | |
|
402 | |
|
403 | |
|
404 | |
|
405 | |
public Element createCell() { |
406 | 0 | return new Element(OSIS_ELEMENT_CELL); |
407 | |
} |
408 | |
|
409 | |
|
410 | |
|
411 | |
|
412 | |
public Element createHeaderCell() { |
413 | 0 | Element ele = new Element(OSIS_ELEMENT_CELL); |
414 | 0 | ele.setAttribute(ATTRIBUTE_TABLE_ROLE, TABLE_ROLE_LABEL); |
415 | 0 | ele.setAttribute(ATTRIBUTE_CELL_ALIGN, CELL_ALIGN_CENTER); |
416 | 0 | return ele; |
417 | |
} |
418 | |
|
419 | |
|
420 | |
|
421 | |
|
422 | |
public Element createVerse() { |
423 | 0 | return new Element(OSIS_ELEMENT_VERSE); |
424 | |
} |
425 | |
|
426 | |
|
427 | |
|
428 | |
|
429 | |
public Element createW() { |
430 | 0 | return new Element(OSIS_ELEMENT_W); |
431 | |
} |
432 | |
|
433 | |
|
434 | |
|
435 | |
|
436 | |
public Element createFigure() { |
437 | 0 | return new Element(OSIS_ELEMENT_FIGURE); |
438 | |
} |
439 | |
|
440 | |
|
441 | |
|
442 | |
|
443 | |
public Element createForeign() { |
444 | 0 | return new Element(OSIS_ELEMENT_FOREIGN); |
445 | |
} |
446 | |
|
447 | |
|
448 | |
|
449 | |
|
450 | |
public Element createItem() { |
451 | 0 | return new Element(OSIS_ELEMENT_ITEM); |
452 | |
} |
453 | |
|
454 | |
|
455 | |
|
456 | |
|
457 | |
public Element createP() { |
458 | 0 | return new Element(OSIS_ELEMENT_P); |
459 | |
} |
460 | |
|
461 | |
|
462 | |
|
463 | |
|
464 | |
public Element createList() { |
465 | 0 | return new Element(OSIS_ELEMENT_LIST); |
466 | |
} |
467 | |
|
468 | |
|
469 | |
|
470 | |
|
471 | |
public Element createQ() { |
472 | 0 | return new Element(OSIS_ELEMENT_Q); |
473 | |
} |
474 | |
|
475 | |
|
476 | |
|
477 | |
|
478 | |
public Element createName() { |
479 | 0 | return new Element(OSIS_ELEMENT_NAME); |
480 | |
} |
481 | |
|
482 | |
|
483 | |
|
484 | |
|
485 | |
public Element createNote() { |
486 | 0 | return new Element(OSIS_ELEMENT_NOTE); |
487 | |
} |
488 | |
|
489 | |
|
490 | |
|
491 | |
|
492 | |
public Element createReference() { |
493 | 0 | return new Element(OSIS_ELEMENT_REFERENCE); |
494 | |
} |
495 | |
|
496 | |
|
497 | |
|
498 | |
|
499 | |
public Element createRow() { |
500 | 0 | return new Element(OSIS_ELEMENT_ROW); |
501 | |
} |
502 | |
|
503 | |
|
504 | |
|
505 | |
|
506 | |
public Element createSpeaker() { |
507 | 0 | return new Element(OSIS_ELEMENT_SPEAKER); |
508 | |
} |
509 | |
|
510 | |
|
511 | |
|
512 | |
|
513 | |
public Element createSpeech() { |
514 | 0 | return new Element(OSIS_ELEMENT_SPEECH); |
515 | |
} |
516 | |
|
517 | |
|
518 | |
|
519 | |
|
520 | |
public Element createTable() { |
521 | 0 | return new Element(OSIS_ELEMENT_TABLE); |
522 | |
} |
523 | |
|
524 | |
|
525 | |
|
526 | |
|
527 | |
public Element createTitle() { |
528 | 0 | return new Element(OSIS_ELEMENT_TITLE); |
529 | |
} |
530 | |
|
531 | |
|
532 | |
|
533 | |
|
534 | |
|
535 | |
|
536 | |
public Element createGeneratedTitle() { |
537 | 0 | Element title = new Element(OSIS_ELEMENT_TITLE); |
538 | 0 | title.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.GENERATED_CONTENT); |
539 | 0 | return title; |
540 | |
} |
541 | |
|
542 | |
|
543 | |
|
544 | |
|
545 | |
|
546 | |
|
547 | |
public Element createLG() { |
548 | 0 | return new Element(OSIS_ELEMENT_LG); |
549 | |
} |
550 | |
|
551 | |
|
552 | |
|
553 | |
|
554 | |
|
555 | |
|
556 | |
public Element createL() { |
557 | 0 | return new Element(OSIS_ELEMENT_L); |
558 | |
} |
559 | |
|
560 | |
|
561 | |
|
562 | |
|
563 | |
|
564 | |
|
565 | |
public Element createLB() { |
566 | 0 | return new Element(OSIS_ELEMENT_LB); |
567 | |
} |
568 | |
|
569 | |
|
570 | |
|
571 | |
|
572 | |
|
573 | |
|
574 | |
public Element createHI() { |
575 | 0 | return new Element(OSIS_ELEMENT_HI); |
576 | |
} |
577 | |
|
578 | |
|
579 | |
|
580 | |
|
581 | |
|
582 | |
|
583 | |
|
584 | |
public Text createText(String text) { |
585 | 0 | return new Text(text); |
586 | |
} |
587 | |
} |
588 | |
|
589 | |
|
590 | |
|
591 | |
|
592 | |
|
593 | |
|
594 | |
|
595 | |
|
596 | |
public static List<Content> getFragment(Element root) { |
597 | 0 | if (root != null) { |
598 | 0 | Element content = root; |
599 | 0 | if (OSISUtil.OSIS_ELEMENT_OSIS.equals(root.getName())) { |
600 | 0 | content = root.getChild(OSISUtil.OSIS_ELEMENT_OSISTEXT); |
601 | |
} |
602 | |
|
603 | 0 | if (OSISUtil.OSIS_ELEMENT_OSISTEXT.equals(root.getName())) { |
604 | 0 | content = root.getChild(OSISUtil.OSIS_ELEMENT_DIV); |
605 | |
} |
606 | |
|
607 | |
|
608 | |
|
609 | |
|
610 | 0 | if (content != null && content.getContentSize() == 1) { |
611 | 0 | Content firstChild = content.getContent(0); |
612 | 0 | if (firstChild instanceof Element && OSISUtil.OSIS_ELEMENT_DIV.equals(((Element) firstChild).getName())) { |
613 | 0 | content = (Element) firstChild; |
614 | |
} |
615 | |
} |
616 | |
|
617 | 0 | if (content != null) { |
618 | 0 | return content.getContent(); |
619 | |
} |
620 | |
} |
621 | 0 | return new ArrayList<Content>(); |
622 | |
} |
623 | |
|
624 | |
|
625 | |
|
626 | |
|
627 | |
|
628 | |
|
629 | |
|
630 | |
|
631 | |
|
632 | |
|
633 | |
|
634 | |
|
635 | |
|
636 | |
|
637 | |
|
638 | |
|
639 | |
|
640 | |
|
641 | |
|
642 | |
|
643 | |
|
644 | |
|
645 | |
|
646 | |
|
647 | |
|
648 | |
|
649 | |
|
650 | |
|
651 | |
|
652 | |
|
653 | |
|
654 | |
|
655 | |
|
656 | |
|
657 | |
|
658 | |
|
659 | |
|
660 | |
|
661 | |
|
662 | |
|
663 | |
|
664 | |
|
665 | |
|
666 | |
|
667 | |
public static String getCanonicalText(Element root) { |
668 | |
|
669 | |
|
670 | 0 | if (!isCanonical(root)) { |
671 | |
|
672 | 0 | return ""; |
673 | |
} |
674 | |
|
675 | 0 | StringBuilder buffer = new StringBuilder(); |
676 | |
|
677 | |
|
678 | 0 | List<Content> frag = OSISUtil.getFragment(root); |
679 | |
|
680 | 0 | Iterator<Content> dit = frag.iterator(); |
681 | 0 | String sID = null; |
682 | 0 | Content data = null; |
683 | 0 | Element ele = null; |
684 | 0 | while (dit.hasNext()) { |
685 | 0 | data = dit.next(); |
686 | 0 | if (data instanceof Element) { |
687 | 0 | ele = (Element) data; |
688 | 0 | if (!isCanonical(ele)) { |
689 | 0 | continue; |
690 | |
} |
691 | |
|
692 | 0 | if (ele.getName().equals(OSISUtil.OSIS_ELEMENT_VERSE)) { |
693 | 0 | sID = ele.getAttributeValue(OSISUtil.OSIS_ATTR_SID); |
694 | |
} |
695 | |
|
696 | 0 | if (sID != null) { |
697 | 0 | getCanonicalContent(ele, sID, dit, buffer); |
698 | |
} else { |
699 | 0 | getCanonicalContent(ele, null, ele.getContent().iterator(), buffer); |
700 | |
} |
701 | 0 | } else if (data instanceof Text) { |
702 | |
|
703 | |
|
704 | |
|
705 | |
|
706 | 0 | int lastIndex = buffer.length() - 1; |
707 | 0 | String text = ((Text) data).getText(); |
708 | |
|
709 | 0 | if (text.length() != 0) { |
710 | |
|
711 | 0 | if (lastIndex >= 0 && !Character.isWhitespace(buffer.charAt(lastIndex)) && !Character.isWhitespace(text.charAt(0))) { |
712 | 0 | buffer.append(' '); |
713 | |
} |
714 | 0 | buffer.append(text); |
715 | |
} |
716 | 0 | } |
717 | |
} |
718 | |
|
719 | 0 | return buffer.toString().trim(); |
720 | |
} |
721 | |
|
722 | |
|
723 | |
|
724 | |
|
725 | |
|
726 | |
|
727 | |
|
728 | |
|
729 | |
|
730 | |
public static String getPlainText(Element root) { |
731 | |
|
732 | 0 | return getTextContent(OSISUtil.getFragment(root)); |
733 | |
} |
734 | |
|
735 | |
|
736 | |
|
737 | |
|
738 | |
|
739 | |
|
740 | |
|
741 | |
|
742 | |
public static String getStrongsNumbers(Element root) { |
743 | 0 | return getLexicalInformation(root, false); |
744 | |
} |
745 | |
|
746 | |
|
747 | |
|
748 | |
|
749 | |
|
750 | |
|
751 | |
|
752 | |
public static String getMorphologiesWithStrong(Element root) { |
753 | 0 | return getLexicalInformation(root, true); |
754 | |
} |
755 | |
|
756 | |
|
757 | |
|
758 | |
|
759 | |
|
760 | |
|
761 | |
|
762 | |
|
763 | |
public static String getLexicalInformation(Element root, boolean includeMorphology) { |
764 | 0 | StringBuilder buffer = new StringBuilder(); |
765 | |
|
766 | 0 | for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_W)) { |
767 | 0 | Element ele = (Element) content; |
768 | 0 | String attr = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_LEMMA); |
769 | 0 | if (attr != null) { |
770 | 0 | Matcher matcher = strongsNumberPattern.matcher(attr); |
771 | 0 | while (matcher.find()) { |
772 | 0 | String strongsNum = matcher.group(1); |
773 | 0 | if (buffer.length() > 0) { |
774 | 0 | buffer.append(' '); |
775 | |
} |
776 | |
|
777 | 0 | if (includeMorphology) { |
778 | |
|
779 | 0 | strongsNum = strongsNum.replace(SPACE_SEPARATOR, MORPH_INFO_SEPARATOR); |
780 | |
} |
781 | 0 | buffer.append(strongsNum); |
782 | |
|
783 | 0 | if (includeMorphology) { |
784 | |
|
785 | 0 | String morph = ele.getAttributeValue(OSISUtil.ATTRIBUTE_W_MORPH); |
786 | 0 | if (morph != null && morph.length() != 0) { |
787 | 0 | buffer.append(MORPH_INFO_SEPARATOR); |
788 | 0 | buffer.append(morph.replace(SPACE_SEPARATOR, MORPH_INFO_SEPARATOR)); |
789 | |
} |
790 | |
} |
791 | 0 | } |
792 | |
} |
793 | 0 | } |
794 | |
|
795 | 0 | return buffer.toString().trim(); |
796 | |
} |
797 | |
|
798 | |
|
799 | |
|
800 | |
|
801 | |
|
802 | |
|
803 | |
|
804 | |
|
805 | |
|
806 | |
|
807 | |
|
808 | |
public static String getReferences(Book book, Key key, Versification v11n, Element root) { |
809 | 0 | PassageKeyFactory keyf = PassageKeyFactory.instance(); |
810 | 0 | Key collector = keyf.createEmptyKeyList(v11n); |
811 | |
|
812 | 0 | for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_REFERENCE)) { |
813 | 0 | Element ele = (Element) content; |
814 | 0 | String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_REF); |
815 | 0 | if (attr != null) { |
816 | |
try { |
817 | 0 | collector.addAll(keyf.getKey(v11n, attr)); |
818 | 0 | } catch (NoSuchKeyException e) { |
819 | 0 | DataPolice.report(book, key, "Unable to parse: " + attr + " - No such reference:" + e.getMessage()); |
820 | 0 | } |
821 | |
} |
822 | 0 | } |
823 | |
|
824 | 0 | return collector.getOsisID(); |
825 | |
} |
826 | |
|
827 | |
|
828 | |
|
829 | |
|
830 | |
|
831 | |
|
832 | |
|
833 | |
public static String getNotes(Element root) { |
834 | 0 | StringBuilder buffer = new StringBuilder(); |
835 | |
|
836 | 0 | for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_NOTE)) { |
837 | 0 | Element ele = (Element) content; |
838 | 0 | String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_TYPE); |
839 | 0 | if (attr == null || !attr.equals(NOTETYPE_REFERENCE)) { |
840 | 0 | if (buffer.length() > 0) { |
841 | 0 | buffer.append(' '); |
842 | |
} |
843 | 0 | buffer.append(OSISUtil.getTextContent(ele.getContent())); |
844 | |
} |
845 | 0 | } |
846 | |
|
847 | 0 | return buffer.toString(); |
848 | |
} |
849 | |
|
850 | |
|
851 | |
|
852 | |
|
853 | |
|
854 | |
|
855 | |
|
856 | |
public static String getHeadings(Element root) { |
857 | 0 | StringBuilder buffer = new StringBuilder(); |
858 | |
|
859 | 0 | for (Content content : getDeepContent(root, OSISUtil.OSIS_ELEMENT_TITLE)) { |
860 | 0 | Element ele = (Element) content; |
861 | |
|
862 | 0 | if (buffer.length() > 0) { |
863 | 0 | buffer.append(' '); |
864 | |
} |
865 | 0 | buffer.append(OSISUtil.getTextContent(ele.getContent())); |
866 | 0 | } |
867 | |
|
868 | 0 | return buffer.toString(); |
869 | |
} |
870 | |
|
871 | |
private static void getCanonicalContent(Element parent, String sID, Iterator<Content> iter, StringBuilder buffer) { |
872 | 0 | if (!isCanonical(parent)) { |
873 | 0 | return; |
874 | |
} |
875 | |
|
876 | 0 | Content data = null; |
877 | 0 | Element ele = null; |
878 | 0 | String eleName = null; |
879 | 0 | String eID = null; |
880 | 0 | while (iter.hasNext()) { |
881 | 0 | data = iter.next(); |
882 | 0 | if (data instanceof Element) { |
883 | 0 | ele = (Element) data; |
884 | |
|
885 | |
|
886 | |
|
887 | 0 | eleName = ele.getName(); |
888 | 0 | eID = ele.getAttributeValue(OSISUtil.OSIS_ATTR_SID); |
889 | 0 | if (eID != null && eID.equals(sID) && eleName.equals(parent.getName())) { |
890 | 0 | break; |
891 | |
} |
892 | 0 | OSISUtil.getCanonicalContent(ele, sID, ele.getContent().iterator(), buffer); |
893 | 0 | } else if (data instanceof Text) { |
894 | |
|
895 | |
|
896 | |
|
897 | |
|
898 | |
|
899 | 0 | int lastIndex = buffer.length() - 1; |
900 | 0 | String text = ((Text) data).getText(); |
901 | 0 | if (lastIndex >= 0 && !Character.isWhitespace(buffer.charAt(lastIndex)) && (text.length() == 0 || !Character.isWhitespace(text.charAt(0))) && !OSIS_ELEMENT_SEG.equals(parent.getName())) { |
902 | 0 | buffer.append(' '); |
903 | |
} |
904 | 0 | buffer.append(text); |
905 | 0 | } |
906 | |
} |
907 | 0 | } |
908 | |
|
909 | |
private static boolean isCanonical(Content content) { |
910 | 0 | boolean result = true; |
911 | 0 | if (content instanceof Element) { |
912 | 0 | Element element = (Element) content; |
913 | |
|
914 | |
|
915 | 0 | if (EXTRA_BIBLICAL_ELEMENTS.contains(element.getName())) { |
916 | 0 | String canonical = element.getAttributeValue(OSISUtil.OSIS_ATTR_CANONICAL); |
917 | 0 | result = Boolean.valueOf(canonical).booleanValue(); |
918 | |
} |
919 | |
} |
920 | |
|
921 | 0 | return result; |
922 | |
} |
923 | |
|
924 | |
private static String getTextContent(List<Content> fragment) { |
925 | 0 | StringBuilder buffer = new StringBuilder(); |
926 | |
|
927 | 0 | for (Content next : fragment) { |
928 | 0 | recurseElement(next, buffer); |
929 | |
} |
930 | |
|
931 | 0 | return buffer.toString(); |
932 | |
} |
933 | |
|
934 | |
|
935 | |
|
936 | |
|
937 | |
|
938 | |
|
939 | |
|
940 | |
|
941 | |
|
942 | |
public static Collection<Content> getDeepContent(Element div, String name) { |
943 | 0 | List<Content> reply = new ArrayList<Content>(); |
944 | 0 | recurseDeepContent(div, name, reply); |
945 | 0 | return reply; |
946 | |
} |
947 | |
|
948 | |
|
949 | |
|
950 | |
|
951 | |
|
952 | |
|
953 | |
|
954 | |
|
955 | |
|
956 | |
|
957 | |
public static Verse getVerse(Versification v11n, Element ele) throws BookException { |
958 | 0 | if (ele.getName().equals(OSIS_ELEMENT_VERSE)) { |
959 | |
|
960 | 0 | String osisid = ele.getAttributeValue(OSIS_ATTR_OSISID); |
961 | |
|
962 | |
try { |
963 | 0 | return VerseFactory.fromString(v11n, osisid); |
964 | 0 | } catch (NoSuchVerseException ex) { |
965 | 0 | throw new BookException(JSOtherMsg.lookupText("OsisID not valid: {0}", osisid), ex); |
966 | |
} |
967 | |
} |
968 | |
|
969 | |
|
970 | 0 | Parent parent = ele.getParent(); |
971 | 0 | if (parent instanceof Element) { |
972 | 0 | return getVerse(v11n, (Element) parent); |
973 | |
} |
974 | |
|
975 | 0 | throw new BookException(JSOtherMsg.lookupText("Verse element could not be found")); |
976 | |
} |
977 | |
|
978 | |
|
979 | |
|
980 | |
|
981 | |
|
982 | |
|
983 | |
|
984 | |
|
985 | |
public static Element createOsisFramework(BookMetaData bmd) { |
986 | 0 | Element osis = factory().createOsis(); |
987 | 0 | String osisid = bmd.getInitials(); |
988 | |
|
989 | 0 | Element work = factory().createWork(); |
990 | 0 | work.setAttribute(ATTRIBUTE_WORK_OSISWORK, osisid); |
991 | |
|
992 | 0 | Element header = factory().createHeader(); |
993 | 0 | header.addContent(work); |
994 | |
|
995 | 0 | Element text = factory().createOsisText(); |
996 | 0 | text.setAttribute(ATTRIBUTE_TEXT_OSISIDWORK, OSISID_PREFIX_BIBLE + osisid); |
997 | 0 | text.addContent(header); |
998 | |
|
999 | 0 | osis.addContent(text); |
1000 | |
|
1001 | 0 | return osis; |
1002 | |
} |
1003 | |
|
1004 | |
|
1005 | |
|
1006 | |
|
1007 | |
|
1008 | |
|
1009 | |
|
1010 | |
|
1011 | |
public static List<Content> diffToOsis(List<Difference> diffs) { |
1012 | 0 | Element div = factory().createDiv(); |
1013 | |
|
1014 | 0 | for (int x = 0; x < diffs.size(); x++) { |
1015 | 0 | Difference diff = diffs.get(x); |
1016 | 0 | EditType editType = diff.getEditType(); |
1017 | |
|
1018 | 0 | Text text = factory.createText(diff.getText()); |
1019 | |
|
1020 | 0 | if (EditType.DELETE.equals(editType)) { |
1021 | 0 | Element hi = factory().createHI(); |
1022 | 0 | hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_LINETHROUGH); |
1023 | 0 | hi.addContent(text); |
1024 | 0 | div.addContent(hi); |
1025 | 0 | } else if (EditType.INSERT.equals(editType)) { |
1026 | 0 | Element hi = factory().createHI(); |
1027 | 0 | hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_UNDERLINE); |
1028 | 0 | hi.addContent(text); |
1029 | 0 | div.addContent(hi); |
1030 | 0 | } else { |
1031 | 0 | div.addContent(text); |
1032 | |
} |
1033 | |
} |
1034 | 0 | return div.cloneContent(); |
1035 | |
} |
1036 | |
|
1037 | |
public static List<Content> rtfToOsis(String rtf) { |
1038 | 0 | Element div = factory().createDiv(); |
1039 | 0 | Stack<Content> stack = new Stack<Content>(); |
1040 | 0 | stack.push(div); |
1041 | |
|
1042 | 0 | int strlen = rtf.length(); |
1043 | |
|
1044 | 0 | StringBuilder text = new StringBuilder(strlen); |
1045 | |
|
1046 | 0 | int i = 0; |
1047 | 0 | for (i = 0; i < strlen; i++) { |
1048 | 0 | char curChar = rtf.charAt(i); |
1049 | 0 | if (curChar != '\\') { |
1050 | 0 | text.append(curChar); |
1051 | 0 | continue; |
1052 | |
} |
1053 | |
|
1054 | |
|
1055 | |
|
1056 | |
|
1057 | |
|
1058 | 0 | if (rtf.startsWith("\\pard", i)) { |
1059 | 0 | Element currentElement = (Element) stack.pop(); |
1060 | 0 | currentElement.addContent(text.toString()); |
1061 | 0 | text.delete(0, text.length()); |
1062 | 0 | stack.clear(); |
1063 | 0 | stack.push(div); |
1064 | 0 | i += (i + 5 < strlen && rtf.charAt(i + 5) == ' ') ? 5 : 4; |
1065 | 0 | continue; |
1066 | |
} |
1067 | |
|
1068 | |
|
1069 | 0 | if (rtf.startsWith("\\par", i)) { |
1070 | 0 | Element currentElement = (Element) stack.peek(); |
1071 | 0 | currentElement.addContent(text.toString()); |
1072 | 0 | text.delete(0, text.length()); |
1073 | 0 | currentElement.addContent(OSISUtil.factory.createLB()); |
1074 | 0 | i += (i + 4 < strlen && rtf.charAt(i + 4) == ' ') ? 4 : 3; |
1075 | 0 | continue; |
1076 | |
} |
1077 | |
|
1078 | |
|
1079 | |
|
1080 | 0 | if (rtf.startsWith("\\qc", i)) { |
1081 | 0 | Element centerDiv = OSISUtil.factory.createDiv(); |
1082 | 0 | centerDiv.setAttribute(OSIS_ATTR_TYPE, "x-center"); |
1083 | 0 | Element currentElement = (Element) stack.peek(); |
1084 | 0 | currentElement.addContent(text.toString()); |
1085 | 0 | text.delete(0, text.length()); |
1086 | 0 | currentElement.addContent(centerDiv); |
1087 | 0 | stack.push(centerDiv); |
1088 | |
|
1089 | 0 | i += (i + 3 < strlen && rtf.charAt(i + 3) == ' ') ? 3 : 2; |
1090 | 0 | continue; |
1091 | |
} |
1092 | |
|
1093 | |
|
1094 | 0 | if (rtf.startsWith("\\u", i)) { |
1095 | 0 | StringBuilder buf = new StringBuilder(); |
1096 | 0 | i += 2; |
1097 | 0 | while (i < strlen) { |
1098 | 0 | char curDigit = rtf.charAt(i); |
1099 | 0 | if (curDigit != '-' && !Character.isDigit(curDigit)) { |
1100 | 0 | break; |
1101 | |
} |
1102 | 0 | buf.append(curDigit); |
1103 | 0 | i++; |
1104 | 0 | } |
1105 | |
|
1106 | |
|
1107 | |
|
1108 | |
|
1109 | |
|
1110 | 0 | int value = Integer.parseInt(buf.toString()); |
1111 | 0 | if (value < 0) { |
1112 | 0 | value += 65536; |
1113 | |
} |
1114 | 0 | text.append((char) value); |
1115 | |
|
1116 | 0 | continue; |
1117 | |
} |
1118 | |
|
1119 | |
|
1120 | 0 | if (rtf.startsWith("\\i0", i) || rtf.startsWith("\\b0", i)) { |
1121 | 0 | Element currentElement = (Element) stack.pop(); |
1122 | 0 | currentElement.addContent(text.toString()); |
1123 | 0 | text.delete(0, text.length()); |
1124 | 0 | i += (i + 3 < strlen && rtf.charAt(i + 3) == ' ') ? 3 : 2; |
1125 | 0 | continue; |
1126 | |
} |
1127 | |
|
1128 | |
|
1129 | 0 | if (rtf.startsWith(" ", i) || rtf.startsWith("\n", i)) { |
1130 | 0 | i += 1; |
1131 | 0 | continue; |
1132 | |
} |
1133 | |
|
1134 | |
|
1135 | 0 | if (rtf.startsWith("\\i", i)) { |
1136 | 0 | Element hiElement = OSISUtil.factory.createHI(); |
1137 | 0 | hiElement.setAttribute(OSIS_ATTR_TYPE, HI_ITALIC); |
1138 | 0 | Element currentElement = (Element) stack.peek(); |
1139 | 0 | currentElement.addContent(text.toString()); |
1140 | 0 | text.delete(0, text.length()); |
1141 | 0 | currentElement.addContent(hiElement); |
1142 | 0 | stack.push(hiElement); |
1143 | 0 | i += (i + 2 < strlen && rtf.charAt(i + 2) == ' ') ? 2 : 1; |
1144 | 0 | continue; |
1145 | |
} |
1146 | |
|
1147 | |
|
1148 | 0 | if (rtf.startsWith("\\b", i)) { |
1149 | 0 | Element hiElement = OSISUtil.factory.createHI(); |
1150 | 0 | hiElement.setAttribute(OSIS_ATTR_TYPE, HI_BOLD); |
1151 | 0 | Element currentElement = (Element) stack.peek(); |
1152 | 0 | currentElement.addContent(text.toString()); |
1153 | 0 | text.delete(0, text.length()); |
1154 | 0 | currentElement.addContent(hiElement); |
1155 | 0 | stack.push(hiElement); |
1156 | 0 | i += (i + 2 < strlen && rtf.charAt(i + 2) == ' ') ? 2 : 1; |
1157 | 0 | continue; |
1158 | |
} |
1159 | |
|
1160 | |
} |
1161 | |
|
1162 | |
|
1163 | 0 | if (text.length() > 0) { |
1164 | 0 | div.addContent(text.toString()); |
1165 | |
} |
1166 | |
|
1167 | |
|
1168 | |
|
1169 | |
|
1170 | |
|
1171 | |
|
1172 | |
|
1173 | |
|
1174 | |
|
1175 | |
|
1176 | |
|
1177 | |
|
1178 | |
|
1179 | |
|
1180 | |
|
1181 | |
|
1182 | |
|
1183 | |
|
1184 | |
|
1185 | 0 | return div.cloneContent(); |
1186 | |
} |
1187 | |
|
1188 | |
|
1189 | |
|
1190 | |
|
1191 | |
|
1192 | |
|
1193 | |
|
1194 | |
|
1195 | |
|
1196 | |
private static void recurseDeepContent(Element start, String name, List<Content> reply) { |
1197 | 0 | if (start.getName().equals(name)) { |
1198 | 0 | reply.add(start); |
1199 | |
} |
1200 | |
|
1201 | |
|
1202 | 0 | Element ele = null; |
1203 | 0 | for (Content data : start.getContent()) { |
1204 | 0 | if (data instanceof Element) { |
1205 | 0 | ele = (Element) data; |
1206 | 0 | recurseDeepContent(ele, name, reply); |
1207 | |
} |
1208 | |
} |
1209 | 0 | } |
1210 | |
|
1211 | |
|
1212 | |
|
1213 | |
|
1214 | |
|
1215 | |
|
1216 | |
|
1217 | |
|
1218 | |
private static void recurseElement(Object sub, StringBuilder buffer) { |
1219 | 0 | if (sub instanceof Text) { |
1220 | 0 | buffer.append(((Text) sub).getText()); |
1221 | 0 | } else if (sub instanceof Element) { |
1222 | 0 | recurseChildren((Element) sub, buffer); |
1223 | |
} else { |
1224 | 0 | log.error("unknown type: {}", sub.getClass().getName()); |
1225 | |
} |
1226 | 0 | } |
1227 | |
|
1228 | |
|
1229 | |
|
1230 | |
|
1231 | |
|
1232 | |
|
1233 | |
|
1234 | |
|
1235 | |
|
1236 | |
private static void recurseChildren(Element ele, StringBuilder buffer) { |
1237 | |
|
1238 | 0 | for (Content sub : ele.getContent()) { |
1239 | 0 | recurseElement(sub, buffer); |
1240 | |
} |
1241 | 0 | } |
1242 | |
|
1243 | 0 | private static String strongsNumber = "strong:([GgHh][0-9]+!?[A-Za-z]*)"; |
1244 | 0 | private static Pattern strongsNumberPattern = Pattern.compile(strongsNumber); |
1245 | |
} |