1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: PrettySerializingContentHandler.java 2050 2010-12-09 15:31:45Z dmsmith $
21   */
22  package org.crosswire.common.xml;
23  
24  import java.io.IOException;
25  import java.io.StringWriter;
26  import java.io.Writer;
27  
28  import org.xml.sax.Attributes;
29  import org.xml.sax.ContentHandler;
30  import org.xml.sax.Locator;
31  
32  /**
33   * This class provides for the formatted serialization of a SAX stream to a
34   * <code>Writer</code>.
35   * 
36   * @see gnu.lgpl.License for license details.<br>
37   *      The copyright to this program is held by it's authors.
38   * @author DM Smith [dmsmith555 at yahoo dot com]
39   */
40  public class PrettySerializingContentHandler implements ContentHandler {
41      /**
42       * A formatting serializer that does not add whitespace to the document.
43       * This uses a StringWriter and the toString method will return its content.
44       */
45      public PrettySerializingContentHandler() {
46          this(FormatType.AS_IS);
47      }
48  
49      /**
50       * A formatting serializer that adds whitespace to the document according to
51       * the specified <code>FormatType</code>. This uses a StringWriter and the
52       * toString method will return its content.
53       * 
54       * @param theFormat
55       *            the formatting to use
56       */
57      public PrettySerializingContentHandler(FormatType theFormat) {
58          this(theFormat, null);
59      }
60  
61      /**
62       * A formatting serializer that adds whitespace to the document according to
63       * the specified <code>FormatType</code>. As the document is serialized it
64       * is written to the provided <code>Writer</code>.
65       * 
66       * @param theFormat
67       *            the formatting to use
68       * @param theWriter
69       *            the writer to use
70       */
71      public PrettySerializingContentHandler(FormatType theFormat, Writer theWriter) {
72          formatting = theFormat;
73          writer = theWriter == null ? new StringWriter() : theWriter;
74      }
75  
76      /*
77       * (non-Javadoc)
78       * 
79       * @see java.lang.Object#toString()
80       */
81      @Override
82      public String toString() {
83          return writer.toString();
84      }
85  
86      /*
87       * (non-Javadoc)
88       * 
89       * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
90       */
91      public void setDocumentLocator(Locator locator) {
92      }
93  
94      /*
95       * (non-Javadoc)
96       * 
97       * @see org.xml.sax.ContentHandler#startDocument()
98       */
99      public void startDocument() {
100         // write("<?xml version=\"1.0\"?>");
101     }
102 
103     /*
104      * (non-Javadoc)
105      * 
106      * @see org.xml.sax.ContentHandler#endDocument()
107      */
108     public void endDocument() {
109     }
110 
111     /*
112      * (non-Javadoc)
113      * 
114      * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String,
115      * java.lang.String)
116      */
117     public void startPrefixMapping(String prefix, String uri) {
118     }
119 
120     /*
121      * (non-Javadoc)
122      * 
123      * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
124      */
125     public void endPrefixMapping(String prefix) {
126     }
127 
128     /*
129      * (non-Javadoc)
130      * 
131      * @see org.xml.sax.ContentHandler#startElement(java.lang.String,
132      * java.lang.String, java.lang.String, org.xml.sax.Attributes)
133      */
134     public void startElement(String uri, String localname, String qname, Attributes attrs) {
135         if (depth > 0) {
136             handlePending();
137         }
138 
139         write(getTagStart());
140         write(decorateTagName(localname));
141 
142         for (int i = 0; i < attrs.getLength(); i++) {
143             write(' ');
144             write(decorateAttributeName(XMLUtil.getAttributeName(attrs, i)));
145             write("='");
146             write(decorateAttributeValue(XMLUtil.escape(attrs.getValue(i))));
147             write('\'');
148         }
149 
150         pendingEndTag = true;
151         depth++;
152     }
153 
154     /*
155      * (non-Javadoc)
156      * 
157      * @see org.xml.sax.ContentHandler#endElement(java.lang.String,
158      * java.lang.String, java.lang.String)
159      */
160     public void endElement(String uri, String localname, String qname) {
161         depth--;
162         // Java cannot display empty tags <tag/> so most of the following is
163         // commented out
164         if (pendingEndTag) {
165             if (formatting.isAnalytic() && depth > 0) {
166                 emitWhitespace(depth - 1);
167             }
168             //
169             // // Hack alert JTextPane cannot handle <br/>
170             //            if (localname.equalsIgnoreCase("br"))
171             // {
172             write(getTagEnd());
173             // }
174             // else
175             // {
176             // write(getEmptyTagEnd());
177             // }
178         }
179         // else
180         // {
181         if (formatting.isClassic()) {
182             emitWhitespace(depth);
183         }
184 
185         write(getEndTagStart());
186 
187         write(decorateTagName(localname));
188 
189         if (formatting.isAnalytic()) {
190             emitWhitespace(depth);
191         }
192 
193         write(getTagEnd());
194         // }
195         pendingEndTag = false;
196         lookingForChars = false;
197     }
198 
199     /*
200      * (non-Javadoc)
201      * 
202      * @see org.xml.sax.ContentHandler#characters(char[], int, int)
203      */
204     public void characters(char[] chars, int start, int length) {
205         if (!lookingForChars) {
206             handlePending();
207         }
208 
209         String s = new String(chars, start, length);
210         write(decorateCharacters(s));
211         lookingForChars = true;
212     }
213 
214     /*
215      * (non-Javadoc)
216      * 
217      * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
218      */
219     public void ignorableWhitespace(char[] chars, int start, int length) {
220         characters(chars, start, length);
221     }
222 
223     /*
224      * (non-Javadoc)
225      * 
226      * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String,
227      * java.lang.String)
228      */
229     public void processingInstruction(String target, String data) {
230         handlePending();
231 
232         write(getPIStart());
233         write(target);
234         write(' ');
235         write(decorateCharacters(data));
236         write(getPIEnd());
237 
238         if (formatting.isMultiline()) {
239             write(getNewline());
240         }
241     }
242 
243     /*
244      * (non-Javadoc)
245      * 
246      * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
247      */
248     public void skippedEntity(String name) {
249     }
250 
251     protected String getTagStart() {
252         return "<";
253     }
254 
255     protected String getTagEnd() {
256         return ">";
257     }
258 
259     protected String getEmptyTagEnd() {
260         return "/>";
261     }
262 
263     protected String getEndTagStart() {
264         return "</";
265     }
266 
267     protected String getPIStart() {
268         return "<!";
269     }
270 
271     protected String getPIEnd() {
272         return "!>";
273     }
274 
275     protected String getNewline() {
276         return "\n";
277     }
278 
279     protected String decorateTagName(String tagName) {
280         return tagName;
281     }
282 
283     protected String decorateAttributeName(String attrName) {
284         return attrName;
285     }
286 
287     protected String decorateAttributeValue(String attrValue) {
288         return attrValue;
289     }
290 
291     protected String decorateCharacters(String characters) {
292         return characters;
293     }
294 
295     protected String decorateIndent(int indentLevel) {
296         return new String(indentation, 0, indentLevel).intern();
297     }
298 
299     protected void write(String obj) {
300         try {
301             writer.write(obj);
302         } catch (IOException e) {
303             e.printStackTrace(System.err);
304         }
305     }
306 
307     protected void write(char obj) {
308         try {
309             writer.write(obj);
310         } catch (IOException e) {
311             e.printStackTrace(System.err);
312         }
313     }
314 
315     private void handlePending() {
316         if (pendingEndTag) {
317             pendingEndTag = false;
318 
319             if (formatting.isAnalytic()) {
320                 emitWhitespace(depth);
321             }
322 
323             write(getTagEnd());
324 
325         }
326         if (formatting.isClassic()) {
327             emitWhitespace(depth);
328         }
329         lookingForChars = false;
330     }
331 
332     private void emitWhitespace(int indentLevel) {
333         write(getNewline());
334         if (formatting.isIndented()) {
335             write(decorateIndent(indentLevel));
336         }
337     }
338 
339     /**
340      * This allows for rapid output of whitespace.
341      */
342     private static char[] indentation = {
343             '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t',
344             '\t', '\t', '\t', '\t', '\t', '\t',
345     };
346 
347     /**
348      * The depth is incremented on each startElement and decremented on each
349      * endElement. This is used to output the indentation.
350      */
351     private int depth;
352 
353     /**
354      * It is possible that characters(...) will be called for adjacent pieces of
355      * text. Often this is due to entities in the text. This will allow for
356      * these to be joined back together.
357      */
358     private boolean lookingForChars;
359 
360     /**
361      * One of the difficulties in SAX parsing is that it does not retain state.
362      * Even for an empty tag, it calls startElement and endElement. This allows
363      * for making empty elements to have the empty tag notation: &lt;tag/&gt;.
364      */
365     private boolean pendingEndTag;
366 
367     private FormatType formatting;
368 
369     private Writer writer;
370 }
371