1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: DictToOsis.java 2099 2011-03-07 17:13:00Z dmsmith $
21   */
22  package org.crosswire.jsword.examples;
23  
24  import java.io.FileOutputStream;
25  import java.io.IOException;
26  import java.io.OutputStreamWriter;
27  import java.io.Writer;
28  import java.text.FieldPosition;
29  import java.text.MessageFormat;
30  
31  import org.crosswire.common.xml.SAXEventProvider;
32  import org.crosswire.common.xml.XMLProcess;
33  import org.crosswire.common.xml.XMLUtil;
34  import org.crosswire.jsword.book.Book;
35  import org.crosswire.jsword.book.BookData;
36  import org.crosswire.jsword.book.BookException;
37  import org.crosswire.jsword.book.BookMetaData;
38  import org.crosswire.jsword.book.Books;
39  import org.crosswire.jsword.passage.Key;
40  import org.xml.sax.SAXException;
41  
42  /**
43   * Start of a mechanism to extract a Dictionary module to OSIS.
44   * 
45   * @see gnu.lgpl.License for license details.<br>
46   *      The copyright to this program is held by it's authors.
47   * @author DM Smith [dmsmith555 at yahoo dot com]
48   */
49  public class DictToOsis {
50      /**
51       * The name of a Bible to find
52       */
53      private static final String BOOK_NAME = "WebstersDict";
54  
55      /**
56       * @param args
57       */
58      public static void main(String[] args) throws BookException, IOException {
59          new DictToOsis().dump(BOOK_NAME);
60      }
61  
62      public void dump(String name) throws BookException, IOException {
63          Books books = Books.installed();
64          Book book = books.getBook(name);
65          BookMetaData bmd = book.getBookMetaData();
66          StringBuffer buf = new StringBuffer();
67  
68          Key keys = book.getGlobalKeyList();
69  
70          buildDocumentOpen(buf, bmd);
71  
72          // Get a verse iterator
73          for (Key key : keys) {
74              BookData bdata = new BookData(book, key);
75              SAXEventProvider osissep = bdata.getSAXEventProvider();
76              try {
77                  buildEntryOpen(buf, key.getName(), XMLUtil.writeToString(osissep));
78              } catch (SAXException e) {
79                  e.printStackTrace(System.err);
80              }
81          }
82  
83          buildDocumentClose(buf);
84  
85          Writer writer = null;
86          try {
87              writer = new OutputStreamWriter(new FileOutputStream(bmd.getInitials() + ".xml"), "UTF-8");
88              writer.write(buf.toString());
89          } finally {
90              if (writer != null) {
91                  writer.close();
92              }
93          }
94          XMLProcess parser = new XMLProcess();
95          // parser.getFeatures().setFeatureStates("-s", "-f", "-va", "-dv");
96          parser.parse(bmd.getInitials() + ".xml");
97      }
98  
99      private void buildDocumentOpen(StringBuffer buf, BookMetaData bmd) {
100         MessageFormat msgFormat = new MessageFormat(
101                 "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<osis\n  xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\"\n  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n  xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.2.1.xsd\">\n<osisText osisIDWork=\"{0}\" osisRefWork=\"defaultReferenceScheme\" xml:lang=\"en\">\n  <header>\n    <work osisWork=\"{0}\">\n      <title>{1}</title>\n      <identifier type=\"OSIS\">Dict.{0}</identifier>\n      <refSystem>Dict.{0}</refSystem>\n    </work>\n    <work osisWork=\"defaultReferenceScheme\">\n      <refSystem>Dict.{0}</refSystem>\n    </work>\n  </header>\n<div>\n");
102         msgFormat.format(new Object[] {
103                 bmd.getInitials(), bmd.getName()
104         }, buf, pos);
105     }
106 
107     private void buildDocumentClose(StringBuffer buf) {
108         buf.append("</div>\n</osisText>\n</osis>\n");
109     }
110 
111     private void buildEntryOpen(StringBuffer buf, String entryName, String entryDef) {
112         String tmp = entryName;
113         if (tmp.indexOf(' ') != -1) {
114             tmp = "x";
115         }
116         MessageFormat msgFormat = new MessageFormat(
117                 "<div type=\"entry\" osisID=\"{0}\" canonical=\"true\"><seg type=\"x-form\"><seg type=\"x-orth\">{0}</seg></seg><seg type=\"x-def\">{1}</seg></div>\n");
118         msgFormat.format(new Object[] {
119                 tmp, entryDef
120         }, buf, pos);
121     }
122 
123     private static FieldPosition pos = new FieldPosition(0);
124 }
125