| PrettySerializingContentHandler.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 or later
5 * as published by the Free Software Foundation. This program is distributed
6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * © CrossWire Bible Society, 2005 - 2016
18 *
19 */
20 package org.crosswire.common.xml;
21
22 import java.io.IOException;
23 import java.io.StringWriter;
24 import java.io.Writer;
25
26 import org.xml.sax.Attributes;
27 import org.xml.sax.ContentHandler;
28 import org.xml.sax.Locator;
29
30 /**
31 * This class provides for the formatted serialization of a SAX stream to a
32 * <code>Writer</code>.
33 *
34 * @see gnu.lgpl.License The GNU Lesser General Public License for details.
35 * @author DM Smith
36 */
37 public class PrettySerializingContentHandler implements ContentHandler {
38 /**
39 * A formatting serializer that does not add whitespace to the document.
40 * This uses a StringWriter and the toString method will return its content.
41 */
42 public PrettySerializingContentHandler() {
43 this(FormatType.AS_IS);
44 }
45
46 /**
47 * A formatting serializer that adds whitespace to the document according to
48 * the specified <code>FormatType</code>. This uses a StringWriter and the
49 * toString method will return its content.
50 *
51 * @param theFormat
52 * the formatting to use
53 */
54 public PrettySerializingContentHandler(FormatType theFormat) {
55 this(theFormat, null);
56 }
57
58 /**
59 * A formatting serializer that adds whitespace to the document according to
60 * the specified <code>FormatType</code>. As the document is serialized it
61 * is written to the provided <code>Writer</code>.
62 *
63 * @param theFormat
64 * the formatting to use
65 * @param theWriter
66 * the writer to use
67 */
68 public PrettySerializingContentHandler(FormatType theFormat, Writer theWriter) {
69 formatting = theFormat;
70 writer = theWriter == null ? new StringWriter() : theWriter;
71 }
72
73 /*
74 * (non-Javadoc)
75 *
76 * @see java.lang.Object#toString()
77 */
78 @Override
79 public String toString() {
80 return writer.toString();
81 }
82
83 /*
84 * (non-Javadoc)
85 *
86 * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
87 */
88 public void setDocumentLocator(Locator locator) {
89 }
90
91 /*
92 * (non-Javadoc)
93 *
94 * @see org.xml.sax.ContentHandler#startDocument()
95 */
96 public void startDocument() {
97 // write("<?xml version=\"1.0\"?>");
98 }
99
100 /*
101 * (non-Javadoc)
102 *
103 * @see org.xml.sax.ContentHandler#endDocument()
104 */
105 public void endDocument() {
106 }
107
108 /*
109 * (non-Javadoc)
110 *
111 * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String,
112 * java.lang.String)
113 */
114 public void startPrefixMapping(String prefix, String uri) {
115 }
116
117 /*
118 * (non-Javadoc)
119 *
120 * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
121 */
122 public void endPrefixMapping(String prefix) {
123 }
124
125 /*
126 * (non-Javadoc)
127 *
128 * @see org.xml.sax.ContentHandler#startElement(java.lang.String,
129 * java.lang.String, java.lang.String, org.xml.sax.Attributes)
130 */
131 public void startElement(String uri, String localname, String qname, Attributes attrs) {
132 if (depth > 0) {
133 handlePending();
134 }
135
136 write(getTagStart());
137 write(decorateTagName(localname));
138
139 for (int i = 0; i < attrs.getLength(); i++) {
140 write(' ');
141 write(decorateAttributeName(XMLUtil.getAttributeName(attrs, i)));
142 write("='");
143 write(decorateAttributeValue(XMLUtil.escape(attrs.getValue(i))));
144 write('\'');
145 }
146
147 pendingEndTag = true;
148 depth++;
149 }
150
151 /*
152 * (non-Javadoc)
153 *
154 * @see org.xml.sax.ContentHandler#endElement(java.lang.String,
155 * java.lang.String, java.lang.String)
156 */
157 public void endElement(String uri, String localname, String qname) {
158 depth--;
159 // Java cannot display empty tags <tag/> so most of the following is
160 // commented out
161 if (pendingEndTag) {
162 if (formatting.isAnalytic() && depth > 0) {
163 emitWhitespace(depth - 1);
164 }
165 //
166 // // Hack alert JTextPane cannot handle <br/>
167 // if (localname.equalsIgnoreCase("br"))
168 // {
169 write(getTagEnd());
170 // }
171 // else
172 // {
173 // write(getEmptyTagEnd());
174 // }
175 }
176 // else
177 // {
178 if (formatting.isClassic()) {
179 emitWhitespace(depth);
180 }
181
182 write(getEndTagStart());
183
184 write(decorateTagName(localname));
185
186 if (formatting.isAnalytic()) {
187 emitWhitespace(depth);
188 }
189
190 write(getTagEnd());
191 // }
192 pendingEndTag = false;
193 lookingForChars = false;
194 }
195
196 /*
197 * (non-Javadoc)
198 *
199 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
200 */
201 public void characters(char[] chars, int start, int length) {
202 if (!lookingForChars) {
203 handlePending();
204 }
205
206 String s = new String(chars, start, length);
207 write(decorateCharacters(s));
208 lookingForChars = true;
209 }
210
211 /*
212 * (non-Javadoc)
213 *
214 * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
215 */
216 public void ignorableWhitespace(char[] chars, int start, int length) {
217 characters(chars, start, length);
218 }
219
220 /*
221 * (non-Javadoc)
222 *
223 * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String,
224 * java.lang.String)
225 */
226 public void processingInstruction(String target, String data) {
227 handlePending();
228
229 write(getPIStart());
230 write(target);
231 write(' ');
232 write(decorateCharacters(data));
233 write(getPIEnd());
234
235 if (formatting.isMultiline()) {
236 write(getNewline());
237 }
238 }
239
240 /*
241 * (non-Javadoc)
242 *
243 * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
244 */
245 public void skippedEntity(String name) {
246 }
247
248 protected String getTagStart() {
249 return "<";
250 }
251
252 protected String getTagEnd() {
253 return ">";
254 }
255
256 protected String getEmptyTagEnd() {
257 return "/>";
258 }
259
260 protected String getEndTagStart() {
261 return "</";
262 }
263
264 protected String getPIStart() {
265 return "<!";
266 }
267
268 protected String getPIEnd() {
269 return "!>";
270 }
271
272 protected String getNewline() {
273 return "\n";
274 }
275
276 protected String decorateTagName(String tagName) {
277 return tagName;
278 }
279
280 protected String decorateAttributeName(String attrName) {
281 return attrName;
282 }
283
284 protected String decorateAttributeValue(String attrValue) {
285 return attrValue;
286 }
287
288 protected String decorateCharacters(String characters) {
289 return characters;
290 }
291
292 protected String decorateIndent(int indentLevel) {
293 return new String(indentation, 0, indentLevel).intern();
294 }
295
296 protected void write(String obj) {
297 try {
298 writer.write(obj);
299 } catch (IOException e) {
300 e.printStackTrace(System.err);
301 }
302 }
303
304 protected void write(char obj) {
305 try {
306 writer.write(obj);
307 } catch (IOException e) {
308 e.printStackTrace(System.err);
309 }
310 }
311
312 private void handlePending() {
313 if (pendingEndTag) {
314 pendingEndTag = false;
315
316 if (formatting.isAnalytic()) {
317 emitWhitespace(depth);
318 }
319
320 write(getTagEnd());
321
322 }
323 if (formatting.isClassic()) {
324 emitWhitespace(depth);
325 }
326 lookingForChars = false;
327 }
328
329 private void emitWhitespace(int indentLevel) {
330 write(getNewline());
331 if (formatting.isIndented()) {
332 write(decorateIndent(indentLevel));
333 }
334 }
335
336 /**
337 * This allows for rapid output of whitespace.
338 */
339 private static char[] indentation = {
340 '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t',
341 '\t', '\t', '\t', '\t', '\t', '\t',
342 };
343
344 /**
345 * The depth is incremented on each startElement and decremented on each
346 * endElement. This is used to output the indentation.
347 */
348 private int depth;
349
350 /**
351 * It is possible that characters(...) will be called for adjacent pieces of
352 * text. Often this is due to entities in the text. This will allow for
353 * these to be joined back together.
354 */
355 private boolean lookingForChars;
356
357 /**
358 * One of the difficulties in SAX parsing is that it does not retain state.
359 * Even for an empty tag, it calls startElement and endElement. This allows
360 * for making empty elements to have the empty tag notation: <tag/>.
361 */
362 private boolean pendingEndTag;
363
364 private FormatType formatting;
365
366 private Writer writer;
367 }
368