1
22 package org.crosswire.jsword.book.filter.osis;
23
24 import java.io.IOException;
25 import java.io.StringReader;
26 import java.util.List;
27
28 import org.crosswire.common.util.Logger;
29 import org.crosswire.common.xml.XMLUtil;
30 import org.crosswire.jsword.book.Book;
31 import org.crosswire.jsword.book.DataPolice;
32 import org.crosswire.jsword.book.OSISUtil;
33 import org.crosswire.jsword.book.filter.Filter;
34 import org.crosswire.jsword.passage.Key;
35 import org.jdom.Content;
36 import org.jdom.Document;
37 import org.jdom.Element;
38 import org.jdom.JDOMException;
39 import org.jdom.input.SAXBuilder;
40 import org.xml.sax.InputSource;
41
42
49 public class OSISFilter implements Filter {
50
53 public OSISFilter() {
54 builder = new SAXBuilder();
55 builder.setFastReconfigure(true);
56 }
57
58
61 public List<Content> toOSIS(Book book, Key key, String plain) {
62 Element ele = null;
63 Exception ex = null;
64 String clean = plain;
65
66 if (book.getInitials().startsWith("NET") && plain.endsWith("</div>")) {
69 clean = clean.substring(0, plain.length() - 6);
70 }
71
72 try {
73 ele = parse(clean);
74 } catch (JDOMException e) {
75 ex = e;
76 } catch (IOException e) {
77 ex = e;
78 }
79
80 if (ele == null) {
81 clean = XMLUtil.cleanAllEntities(clean);
82
83 try {
84 ele = parse(clean);
85 } catch (JDOMException e) {
86 ex = e;
87 } catch (IOException e) {
88 ex = e;
89 }
90 }
91
92 if (ex != null) {
93 DataPolice.report(book, key, "Parse " + book.getInitials() + "(" + key.getName() + ") failed: " + ex.getMessage() + "\non: " + plain);
94 ele = cleanTags(book, key, clean);
95 }
96
97 if (ele == null) {
98 ele = OSISUtil.factory().createP();
99 }
100
101 return ele.removeContent();
102 }
103
104 @Override
105 public OSISFilter clone() {
106 OSISFilter clone = null;
107 try {
108 clone = (OSISFilter) super.clone();
109 } catch (CloneNotSupportedException e) {
110 assert false : e;
111 }
112 return clone;
113 }
114
115 private Element cleanTags(Book book, Key key, String plain) {
116 String shawn = XMLUtil.cleanAllTags(plain);
118 Exception ex = null;
119 try {
120 return parse(shawn);
121 } catch (JDOMException e) {
122 ex = e;
123 } catch (IOException e) {
124 ex = e;
125 }
126
127 log.warn("Could not fix " + book.getInitials() + "(" + key.getName() + ") by cleaning tags: " + ex.getMessage());
128
129 return null;
130 }
131
132
136 private Element parse(String plain) throws JDOMException, IOException {
137 StringReader in = new StringReader("<div>" + plain + "</div>");
139 InputSource is = new InputSource(in);
140 Document doc = builder.build(is);
141 Element div = doc.getRootElement();
142
143 return div;
144 }
145
146
149 private static final Logger log = Logger.getLogger(OSISFilter.class);
150
151
154 private SAXBuilder builder;
155 }
156