1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2005 - 2016
18   *
19   */
20  package org.crosswire.common.xml;
21  
22  import java.io.IOException;
23  import java.io.StringWriter;
24  import java.io.Writer;
25  
26  import org.xml.sax.Attributes;
27  import org.xml.sax.ContentHandler;
28  import org.xml.sax.Locator;
29  
30  /**
31   * This class provides for the formatted serialization of a SAX stream to a
32   * <code>Writer</code>.
33   * 
34   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
35   * @author DM Smith
36   */
37  public class PrettySerializingContentHandler implements ContentHandler {
38      /**
39       * A formatting serializer that does not add whitespace to the document.
40       * This uses a StringWriter and the toString method will return its content.
41       */
42      public PrettySerializingContentHandler() {
43          this(FormatType.AS_IS);
44      }
45  
46      /**
47       * A formatting serializer that adds whitespace to the document according to
48       * the specified <code>FormatType</code>. This uses a StringWriter and the
49       * toString method will return its content.
50       * 
51       * @param theFormat
52       *            the formatting to use
53       */
54      public PrettySerializingContentHandler(FormatType theFormat) {
55          this(theFormat, null);
56      }
57  
58      /**
59       * A formatting serializer that adds whitespace to the document according to
60       * the specified <code>FormatType</code>. As the document is serialized it
61       * is written to the provided <code>Writer</code>.
62       * 
63       * @param theFormat
64       *            the formatting to use
65       * @param theWriter
66       *            the writer to use
67       */
68      public PrettySerializingContentHandler(FormatType theFormat, Writer theWriter) {
69          formatting = theFormat;
70          writer = theWriter == null ? new StringWriter() : theWriter;
71      }
72  
73      /*
74       * (non-Javadoc)
75       * 
76       * @see java.lang.Object#toString()
77       */
78      @Override
79      public String toString() {
80          return writer.toString();
81      }
82  
83      /*
84       * (non-Javadoc)
85       * 
86       * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator)
87       */
88      public void setDocumentLocator(Locator locator) {
89      }
90  
91      /*
92       * (non-Javadoc)
93       * 
94       * @see org.xml.sax.ContentHandler#startDocument()
95       */
96      public void startDocument() {
97          // write("<?xml version=\"1.0\"?>");
98      }
99  
100     /*
101      * (non-Javadoc)
102      * 
103      * @see org.xml.sax.ContentHandler#endDocument()
104      */
105     public void endDocument() {
106     }
107 
108     /*
109      * (non-Javadoc)
110      * 
111      * @see org.xml.sax.ContentHandler#startPrefixMapping(java.lang.String,
112      * java.lang.String)
113      */
114     public void startPrefixMapping(String prefix, String uri) {
115     }
116 
117     /*
118      * (non-Javadoc)
119      * 
120      * @see org.xml.sax.ContentHandler#endPrefixMapping(java.lang.String)
121      */
122     public void endPrefixMapping(String prefix) {
123     }
124 
125     /*
126      * (non-Javadoc)
127      * 
128      * @see org.xml.sax.ContentHandler#startElement(java.lang.String,
129      * java.lang.String, java.lang.String, org.xml.sax.Attributes)
130      */
131     public void startElement(String uri, String localname, String qname, Attributes attrs) {
132         if (depth > 0) {
133             handlePending();
134         }
135 
136         write(getTagStart());
137         write(decorateTagName(localname));
138 
139         for (int i = 0; i < attrs.getLength(); i++) {
140             write(' ');
141             write(decorateAttributeName(XMLUtil.getAttributeName(attrs, i)));
142             write("='");
143             write(decorateAttributeValue(XMLUtil.escape(attrs.getValue(i))));
144             write('\'');
145         }
146 
147         pendingEndTag = true;
148         depth++;
149     }
150 
151     /*
152      * (non-Javadoc)
153      * 
154      * @see org.xml.sax.ContentHandler#endElement(java.lang.String,
155      * java.lang.String, java.lang.String)
156      */
157     public void endElement(String uri, String localname, String qname) {
158         depth--;
159         // Java cannot display empty tags <tag/> so most of the following is
160         // commented out
161         if (pendingEndTag) {
162             if (formatting.isAnalytic() && depth > 0) {
163                 emitWhitespace(depth - 1);
164             }
165             //
166             // // Hack alert JTextPane cannot handle <br/>
167             //            if (localname.equalsIgnoreCase("br"))
168             // {
169             write(getTagEnd());
170             // }
171             // else
172             // {
173             // write(getEmptyTagEnd());
174             // }
175         }
176         // else
177         // {
178         if (formatting.isClassic()) {
179             emitWhitespace(depth);
180         }
181 
182         write(getEndTagStart());
183 
184         write(decorateTagName(localname));
185 
186         if (formatting.isAnalytic()) {
187             emitWhitespace(depth);
188         }
189 
190         write(getTagEnd());
191         // }
192         pendingEndTag = false;
193         lookingForChars = false;
194     }
195 
196     /*
197      * (non-Javadoc)
198      * 
199      * @see org.xml.sax.ContentHandler#characters(char[], int, int)
200      */
201     public void characters(char[] chars, int start, int length) {
202         if (!lookingForChars) {
203             handlePending();
204         }
205 
206         String s = new String(chars, start, length);
207         write(decorateCharacters(s));
208         lookingForChars = true;
209     }
210 
211     /*
212      * (non-Javadoc)
213      * 
214      * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
215      */
216     public void ignorableWhitespace(char[] chars, int start, int length) {
217         characters(chars, start, length);
218     }
219 
220     /*
221      * (non-Javadoc)
222      * 
223      * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String,
224      * java.lang.String)
225      */
226     public void processingInstruction(String target, String data) {
227         handlePending();
228 
229         write(getPIStart());
230         write(target);
231         write(' ');
232         write(decorateCharacters(data));
233         write(getPIEnd());
234 
235         if (formatting.isMultiline()) {
236             write(getNewline());
237         }
238     }
239 
240     /*
241      * (non-Javadoc)
242      * 
243      * @see org.xml.sax.ContentHandler#skippedEntity(java.lang.String)
244      */
245     public void skippedEntity(String name) {
246     }
247 
248     protected String getTagStart() {
249         return "<";
250     }
251 
252     protected String getTagEnd() {
253         return ">";
254     }
255 
256     protected String getEmptyTagEnd() {
257         return "/>";
258     }
259 
260     protected String getEndTagStart() {
261         return "</";
262     }
263 
264     protected String getPIStart() {
265         return "<!";
266     }
267 
268     protected String getPIEnd() {
269         return "!>";
270     }
271 
272     protected String getNewline() {
273         return "\n";
274     }
275 
276     protected String decorateTagName(String tagName) {
277         return tagName;
278     }
279 
280     protected String decorateAttributeName(String attrName) {
281         return attrName;
282     }
283 
284     protected String decorateAttributeValue(String attrValue) {
285         return attrValue;
286     }
287 
288     protected String decorateCharacters(String characters) {
289         return characters;
290     }
291 
292     protected String decorateIndent(int indentLevel) {
293         return new String(indentation, 0, indentLevel).intern();
294     }
295 
296     protected void write(String obj) {
297         try {
298             writer.write(obj);
299         } catch (IOException e) {
300             e.printStackTrace(System.err);
301         }
302     }
303 
304     protected void write(char obj) {
305         try {
306             writer.write(obj);
307         } catch (IOException e) {
308             e.printStackTrace(System.err);
309         }
310     }
311 
312     private void handlePending() {
313         if (pendingEndTag) {
314             pendingEndTag = false;
315 
316             if (formatting.isAnalytic()) {
317                 emitWhitespace(depth);
318             }
319 
320             write(getTagEnd());
321 
322         }
323         if (formatting.isClassic()) {
324             emitWhitespace(depth);
325         }
326         lookingForChars = false;
327     }
328 
329     private void emitWhitespace(int indentLevel) {
330         write(getNewline());
331         if (formatting.isIndented()) {
332             write(decorateIndent(indentLevel));
333         }
334     }
335 
336     /**
337      * This allows for rapid output of whitespace.
338      */
339     private static char[] indentation = {
340             '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t',
341             '\t', '\t', '\t', '\t', '\t', '\t',
342     };
343 
344     /**
345      * The depth is incremented on each startElement and decremented on each
346      * endElement. This is used to output the indentation.
347      */
348     private int depth;
349 
350     /**
351      * It is possible that characters(...) will be called for adjacent pieces of
352      * text. Often this is due to entities in the text. This will allow for
353      * these to be joined back together.
354      */
355     private boolean lookingForChars;
356 
357     /**
358      * One of the difficulties in SAX parsing is that it does not retain state.
359      * Even for an empty tag, it calls startElement and endElement. This allows
360      * for making empty elements to have the empty tag notation: &lt;tag/&gt;.
361      */
362     private boolean pendingEndTag;
363 
364     private FormatType formatting;
365 
366     private Writer writer;
367 }
368