| PrettySerializingContentHandler.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2005
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: PrettySerializingContentHandler.java 2050 2010-12-09 15:31:45Z dmsmith $
21 */
22 package org.crosswire.common.xml;
23
24 import java.io.IOException;
25 import java.io.StringWriter;
26 import java.io.Writer;
27
28 import org.xml.sax.Attributes;
29 import org.xml.sax.ContentHandler;
30 import org.xml.sax.Locator;
31
32 /**
33 * This class provides for the formatted serialization of a SAX stream to a
34 * <code>Writer</code>.
35 *
36 * @see gnu.lgpl.License for license details.<br>
37 * The copyright to this program is held by it's authors.
38 * @author DM Smith [dmsmith555 at yahoo dot com]
39 */
40 public class PrettySerializingContentHandler implements ContentHandler {
41 /**
42 * A formatting serializer that does not add whitespace to the document.
43 * This uses a StringWriter and the toString method will return its content.
44 */
45 public PrettySerializingContentHandler() {
46 this(FormatType.AS_IS);
47 }
48
49 /**
50 * A formatting serializer that adds whitespace to the document according to
51 * the specified <code>FormatType</code>. This uses a StringWriter and the
52 * toString method will return its content.
53 *
54 * @param theFormat
55 * the formatting to use
56 */
57 public PrettySerializingContentHandler(FormatType theFormat) {
58 this(theFormat, null);
59 }
60
61 /**
62 * A formatting serializer that adds whitespace to the document according to
63 * the specified <code>FormatType</code>. As the document is serialized it
64 * is written to the provided <code>Writer</code>.
65 *
66 * @param theFormat
67 * the formatting to use
68 * @param theWriter
69 * the writer to use
70 */
71 public PrettySerializingContentHandler(FormatType theFormat, Writer theWriter) {
72 formatting = theFormat;
73 writer = theWriter == null ? new StringWriter() : theWriter;
74 }
75
76 /*
77 * (non-Javadoc)
78 *
79 * @see java.lang.Object#toString()
80 */
81 @Override
82 public String toString() {
83 return writer.toString();
84 }
85
86 /*
87 * (non-Javadoc)
88 *
89 * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
90 */
91 public void setDocumentLocator(Locator locator) {
92 }
93
94 /*
95 * (non-Javadoc)
96 *
97 * @see org.xml.sax.ContentHandler#startDocument()
98 */
99 public void startDocument() {
100 // write("<?xml version=\"1.0\"?>");
101 }
102
103 /*
104 * (non-Javadoc)
105 *
106 * @see org.xml.sax.ContentHandler#endDocument()
107 */
108 public void endDocument() {
109 }
110
111 /*
112 * (non-Javadoc)
113 *
114 * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String,
115 * java.lang.String)
116 */
117 public void startPrefixMapping(String prefix, String uri) {
118 }
119
120 /*
121 * (non-Javadoc)
122 *
123 * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
124 */
125 public void endPrefixMapping(String prefix) {
126 }
127
128 /*
129 * (non-Javadoc)
130 *
131 * @see org.xml.sax.ContentHandler#startElement(java.lang.String,
132 * java.lang.String, java.lang.String, org.xml.sax.Attributes)
133 */
134 public void startElement(String uri, String localname, String qname, Attributes attrs) {
135 if (depth > 0) {
136 handlePending();
137 }
138
139 write(getTagStart());
140 write(decorateTagName(localname));
141
142 for (int i = 0; i < attrs.getLength(); i++) {
143 write(' ');
144 write(decorateAttributeName(XMLUtil.getAttributeName(attrs, i)));
145 write("='");
146 write(decorateAttributeValue(XMLUtil.escape(attrs.getValue(i))));
147 write('\'');
148 }
149
150 pendingEndTag = true;
151 depth++;
152 }
153
154 /*
155 * (non-Javadoc)
156 *
157 * @see org.xml.sax.ContentHandler#endElement(java.lang.String,
158 * java.lang.String, java.lang.String)
159 */
160 public void endElement(String uri, String localname, String qname) {
161 depth--;
162 // Java cannot display empty tags <tag/> so most of the following is
163 // commented out
164 if (pendingEndTag) {
165 if (formatting.isAnalytic() && depth > 0) {
166 emitWhitespace(depth - 1);
167 }
168 //
169 // // Hack alert JTextPane cannot handle <br/>
170 // if (localname.equalsIgnoreCase("br"))
171 // {
172 write(getTagEnd());
173 // }
174 // else
175 // {
176 // write(getEmptyTagEnd());
177 // }
178 }
179 // else
180 // {
181 if (formatting.isClassic()) {
182 emitWhitespace(depth);
183 }
184
185 write(getEndTagStart());
186
187 write(decorateTagName(localname));
188
189 if (formatting.isAnalytic()) {
190 emitWhitespace(depth);
191 }
192
193 write(getTagEnd());
194 // }
195 pendingEndTag = false;
196 lookingForChars = false;
197 }
198
199 /*
200 * (non-Javadoc)
201 *
202 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
203 */
204 public void characters(char[] chars, int start, int length) {
205 if (!lookingForChars) {
206 handlePending();
207 }
208
209 String s = new String(chars, start, length);
210 write(decorateCharacters(s));
211 lookingForChars = true;
212 }
213
214 /*
215 * (non-Javadoc)
216 *
217 * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
218 */
219 public void ignorableWhitespace(char[] chars, int start, int length) {
220 characters(chars, start, length);
221 }
222
223 /*
224 * (non-Javadoc)
225 *
226 * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String,
227 * java.lang.String)
228 */
229 public void processingInstruction(String target, String data) {
230 handlePending();
231
232 write(getPIStart());
233 write(target);
234 write(' ');
235 write(decorateCharacters(data));
236 write(getPIEnd());
237
238 if (formatting.isMultiline()) {
239 write(getNewline());
240 }
241 }
242
243 /*
244 * (non-Javadoc)
245 *
246 * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
247 */
248 public void skippedEntity(String name) {
249 }
250
251 protected String getTagStart() {
252 return "<";
253 }
254
255 protected String getTagEnd() {
256 return ">";
257 }
258
259 protected String getEmptyTagEnd() {
260 return "/>";
261 }
262
263 protected String getEndTagStart() {
264 return "</";
265 }
266
267 protected String getPIStart() {
268 return "<!";
269 }
270
271 protected String getPIEnd() {
272 return "!>";
273 }
274
275 protected String getNewline() {
276 return "\n";
277 }
278
279 protected String decorateTagName(String tagName) {
280 return tagName;
281 }
282
283 protected String decorateAttributeName(String attrName) {
284 return attrName;
285 }
286
287 protected String decorateAttributeValue(String attrValue) {
288 return attrValue;
289 }
290
291 protected String decorateCharacters(String characters) {
292 return characters;
293 }
294
295 protected String decorateIndent(int indentLevel) {
296 return new String(indentation, 0, indentLevel).intern();
297 }
298
299 protected void write(String obj) {
300 try {
301 writer.write(obj);
302 } catch (IOException e) {
303 e.printStackTrace(System.err);
304 }
305 }
306
307 protected void write(char obj) {
308 try {
309 writer.write(obj);
310 } catch (IOException e) {
311 e.printStackTrace(System.err);
312 }
313 }
314
315 private void handlePending() {
316 if (pendingEndTag) {
317 pendingEndTag = false;
318
319 if (formatting.isAnalytic()) {
320 emitWhitespace(depth);
321 }
322
323 write(getTagEnd());
324
325 }
326 if (formatting.isClassic()) {
327 emitWhitespace(depth);
328 }
329 lookingForChars = false;
330 }
331
332 private void emitWhitespace(int indentLevel) {
333 write(getNewline());
334 if (formatting.isIndented()) {
335 write(decorateIndent(indentLevel));
336 }
337 }
338
339 /**
340 * This allows for rapid output of whitespace.
341 */
342 private static char[] indentation = {
343 '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t',
344 '\t', '\t', '\t', '\t', '\t', '\t',
345 };
346
347 /**
348 * The depth is incremented on each startElement and decremented on each
349 * endElement. This is used to output the indentation.
350 */
351 private int depth;
352
353 /**
354 * It is possible that characters(...) will be called for adjacent pieces of
355 * text. Often this is due to entities in the text. This will allow for
356 * these to be joined back together.
357 */
358 private boolean lookingForChars;
359
360 /**
361 * One of the difficulties in SAX parsing is that it does not retain state.
362 * Even for an empty tag, it calls startElement and endElement. This allows
363 * for making empty elements to have the empty tag notation: <tag/>.
364 */
365 private boolean pendingEndTag;
366
367 private FormatType formatting;
368
369 private Writer writer;
370 }
371