1
22 package org.crosswire.jsword.book.filter.thml;
23
24 import java.io.IOException;
25 import java.io.StringReader;
26 import java.util.List;
27
28 import javax.xml.parsers.ParserConfigurationException;
29 import javax.xml.parsers.SAXParser;
30 import javax.xml.parsers.SAXParserFactory;
31
32 import org.crosswire.common.util.Logger;
33 import org.crosswire.common.xml.XMLUtil;
34 import org.crosswire.jsword.book.Book;
35 import org.crosswire.jsword.book.OSISUtil;
36 import org.crosswire.jsword.book.filter.Filter;
37 import org.crosswire.jsword.passage.Key;
38 import org.jdom.Content;
39 import org.jdom.Element;
40 import org.xml.sax.InputSource;
41 import org.xml.sax.SAXException;
42 import org.xml.sax.SAXParseException;
43
44
58 public class THMLFilter implements Filter {
59
62 public List<Content> toOSIS(Book book, Key key, String plain) {
63 Element ele = cleanParse(book, key, plain);
64
65 if (ele == null) {
66 if (error instanceof SAXParseException) {
67 SAXParseException spe = (SAXParseException) error;
68 int colNumber = spe.getColumnNumber();
69 int start = Math.max(0, colNumber - 40);
70 int stop = Math.min(finalInput.length(), colNumber + 40);
71 int here = stop - start;
72 log.warn("Could not fix " + book.getInitials() + '(' + key.getName() + ") by "
73 + errorMessage + ": Error here(" + colNumber + ',' + finalInput.length() + ',' + here + "): " + finalInput.substring(start, stop));
74 } else {
75 log.warn("Could not fix " + book.getInitials() + "(" + key.getName() + ") by " + errorMessage + ": " + error.getMessage());
76 }
77 ele = OSISUtil.factory().createP();
78 }
79
80 return ele.removeContent();
81 }
82
83 @Override
84 public THMLFilter clone() {
85 THMLFilter clone = null;
86 try {
87 clone = (THMLFilter) super.clone();
88 } catch (CloneNotSupportedException e) {
89 assert false : e;
90 }
91 return clone;
92 }
93
94 private Element cleanParse(Book book, Key key, String plain) {
95 String clean = XMLUtil.cleanAllEntities(plain);
97 Element ele = parse(book, key, clean, "cleaning entities");
98
99 if (ele == null) {
100 ele = cleanText(book, key, clean);
101 }
102
103 return ele;
104 }
105
106 private Element cleanText(Book book, Key key, String plain) {
107 String clean = XMLUtil.cleanAllCharacters(plain);
109 Element ele = parse(book, key, clean, "cleaning text");
110
111 if (ele == null) {
112 ele = parse(book, key, XMLUtil.closeEmptyTags(clean), "closing empty tags");
113 }
114
115 if (ele == null) {
116 ele = cleanTags(book, key, clean);
117 }
118
119 return ele;
120 }
121
122 private Element cleanTags(Book book, Key key, String plain) {
123 String clean = XMLUtil.cleanAllTags(plain);
125 return parse(book, key, clean, "cleaning tags");
126 }
127
128 private Element parse(Book book, Key key, String plain, String failMessage) {
129 Exception ex = null;
130 StringBuilder buf = new StringBuilder(15 + plain.length());
133 buf.append('<').append(RootTag.TAG_ROOT).append('>').append(plain).append("</").append(RootTag.TAG_ROOT).append('>');
134 finalInput = buf.toString();
135 try {
136 StringReader in = new StringReader(finalInput);
137 InputSource is = new InputSource(in);
138 SAXParserFactory spf = SAXParserFactory.newInstance();
139 SAXParser parser = spf.newSAXParser();
140 CustomHandler handler = new CustomHandler(book, key);
141
142 parser.parse(is, handler);
143 return handler.getRootElement();
144 } catch (SAXParseException e) {
145 ex = e;
146 } catch (SAXException e) {
147 ex = e;
148 } catch (IOException e) {
149 ex = e;
150 } catch (ParserConfigurationException e) {
151 ex = e;
152 } catch (IllegalArgumentException e) {
153 ex = e;
155 } catch (RuntimeException e) {
156 ex = e;
158 }
159
160 errorMessage = failMessage;
161 error = ex;
162 return null;
163 }
164
165 private String errorMessage;
166 private Exception error;
167 private String finalInput;
168
169
172 private static final Logger log = Logger.getLogger(THMLFilter.class);
173 }
174