1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: BibleToOsis.java 2140 2011-04-03 02:07:01Z dmsmith $
21   */
22  package org.crosswire.jsword.examples;
23  
24  import java.io.FileOutputStream;
25  import java.io.IOException;
26  import java.io.OutputStreamWriter;
27  import java.io.Writer;
28  import java.text.FieldPosition;
29  import java.text.MessageFormat;
30  import java.util.regex.Matcher;
31  import java.util.regex.Pattern;
32  
33  import org.crosswire.common.xml.XMLProcess;
34  import org.crosswire.jsword.book.Book;
35  import org.crosswire.jsword.book.BookException;
36  import org.crosswire.jsword.book.BookMetaData;
37  import org.crosswire.jsword.book.Books;
38  import org.crosswire.jsword.passage.Key;
39  import org.crosswire.jsword.passage.NoSuchKeyException;
40  import org.crosswire.jsword.passage.Verse;
41  
42  /**
43   * Start of a mechanism to extract a Bible module to OSIS.
44   * 
45   * @see gnu.lgpl.License for license details.<br>
46   *      The copyright to this program is held by it's authors.
47   * @author DM Smith [dmsmith555 at yahoo dot com]
48   */
49  public class BibleToOsis {
50      /**
51       * The name of a Bible to find
52       */
53      private static final String BIBLE_NAME = "KJV";
54      private static final String BIBLE_RANGE = "Gen-Rev";
55      private static final boolean BY_BOOK = false;
56  
57      /**
58       * @param args
59       */
60      public static void main(String[] args) {
61          try {
62              new BibleToOsis().dump(BIBLE_NAME, BIBLE_RANGE);
63          } catch (Exception e) {
64              e.printStackTrace(System.err);
65          }
66      }
67  
68      public void dump(String name, String range) throws NoSuchKeyException, IOException, BookException {
69          Books books = Books.installed();
70          Book bible = books.getBook(name);
71          BookMetaData bmd = bible.getBookMetaData();
72          String lastBookName = "";
73          int lastChapter = -1;
74          StringBuffer buf = new StringBuffer();
75          boolean inPreVerse = false;
76  
77          Key keys = bible.getKey(range);
78  
79          openOutputFile(bmd.getInitials(), !BY_BOOK);
80          buildDocumentOpen(buf, bmd, range, !BY_BOOK);
81          if (!BY_BOOK) {
82              writeDocument(buf);
83          }
84  
85          // Get a verse iterator
86          for (Key key : keys) {
87              Verse verse = (Verse) key;
88              String raw = bible.getRawText(verse);
89              String osisID = verse.getOsisID();
90              String currentBookName = verse.getBook().getOSIS();
91              int currentChapter = verse.getChapter();
92  
93              boolean newBookFound = !lastBookName.equals(currentBookName);
94  
95              if (newBookFound) {
96                  if (lastBookName.length() > 0) {
97                      if (currentChapter == 1) {
98                          if (inPreVerse) {
99                              buildPreVerseClose(buf);
100                             inPreVerse = false;
101                         }
102                         buildChapterClose(buf);
103                     }
104                     buildBookClose(buf);
105                     buildDocumentClose(buf, BY_BOOK);
106                     openOutputFile(lastBookName, BY_BOOK);
107                     writeDocument(buf);
108                     closeOutputFile(BY_BOOK);
109                 }
110 
111                 buf = new StringBuffer();
112                 buildDocumentOpen(buf, bmd, currentBookName, BY_BOOK);
113                 buildBookOpen(buf, currentBookName);
114             }
115 
116             if (newBookFound || lastChapter != currentChapter) {
117                 if (currentChapter != 1) {
118                     if (inPreVerse) {
119                         buildPreVerseClose(buf);
120                         inPreVerse = false;
121                     }
122                     buildChapterClose(buf);
123                 }
124                 buildChapterOpen(buf, currentBookName, currentChapter);
125             }
126 
127             /* Output the verse */
128 
129             boolean foundPreVerse = false;
130             String preVerseText = "";
131             if (raw.indexOf(preVerseStart) != -1) {
132                 Matcher matcher = preVersePattern.matcher(raw);
133                 StringBuffer rawbuf = new StringBuffer();
134                 if (matcher.find()) {
135                     foundPreVerse = true;
136                     preVerseText = matcher.group(1);
137                     matcher.appendReplacement(rawbuf, "");
138                 }
139                 matcher.appendTail(rawbuf);
140                 raw = rawbuf.toString();
141             }
142 
143             boolean foundPsalmTitle = false;
144             String psalmTitleText = "";
145             if (raw.indexOf(psalmTitleStart) != -1) {
146                 Matcher matcher = psalmTitlePattern.matcher(raw);
147                 StringBuffer rawbuf = new StringBuffer();
148                 if (matcher.find()) {
149                     foundPsalmTitle = true;
150                     psalmTitleText = matcher.group(1);
151                     matcher.appendReplacement(rawbuf, "");
152                 }
153                 matcher.appendTail(rawbuf);
154                 raw = rawbuf.toString();
155             }
156 
157             if (foundPsalmTitle) {
158                 buildPsalmTitle(buf, psalmTitleText);
159             }
160 
161             if (foundPreVerse && !preVerseText.equals(psalmTitleText)) {
162                 if (inPreVerse) {
163                     buildPreVerseClose(buf);
164                 }
165                 buildPreVerseOpen(buf, preVerseText);
166                 inPreVerse = true;
167             }
168 
169             buildVerseOpen(buf, osisID);
170             buf.append(raw);
171             buildVerseClose(buf, osisID);
172 
173             lastChapter = currentChapter;
174             lastBookName = currentBookName;
175         }
176 
177         // Close everything that is open
178         if (inPreVerse) {
179             buildPreVerseClose(buf);
180             inPreVerse = false;
181         }
182 
183         buildChapterClose(buf);
184         buildBookClose(buf);
185         buildDocumentClose(buf, true);
186         openOutputFile(lastBookName, BY_BOOK);
187         writeDocument(buf);
188         closeOutputFile(true);
189     }
190 
191     private void buildDocumentOpen(StringBuffer buf, BookMetaData bmd, String range, boolean force) {
192         if (!force) {
193             return;
194         }
195 
196         MessageFormat msgFormat = new MessageFormat(
197                 "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<osis\n  xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\"\n  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n  xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd\">\n<osisText osisIDWork=\"{0}\" osisRefWork=\"defaultReferenceScheme\" xml:lang=\"en\">\n<header>\n  <work osisWork=\"{0}\">\n    <title>{1}</title>\n    <identifier type=\"OSIS\">Bible.{0}</identifier>\n    <scope>{2}</scope>\n    <refSystem>Bible.KJV</refSystem>\n  </work>\n  <work osisWork=\"defaultReferenceScheme\">\n    <refSystem>Bible.KJV</refSystem>\n  </work>\n  <work osisWork=\"strong\">\n    <refSystem>Dict.Strongs</refSystem>\n  </work>\n  <work osisWork=\"robinson\">\n    <refSystem>Dict.Robinsons</refSystem>\n  </work>\n  <work osisWork=\"strongMorph\">\n    <refSystem>Dict.strongMorph</refSystem>\n  </work>\n</header>\n");
198         msgFormat.format(new Object[] {
199                 bmd.getInitials(), bmd.getName(), range
200         }, buf, pos);
201     }
202 
203     private void buildDocumentClose(StringBuffer buf, boolean force) {
204         if (force) {
205             buf.append("</osisText>\n</osis>\n");
206         }
207     }
208 
209     private void buildBookOpen(StringBuffer buf, String bookName) {
210         System.err.println("processing " + bookName);
211         MessageFormat msgFormat = new MessageFormat("<div type=\"book\" osisID=\"{0}\" canonical=\"true\">\n");
212         msgFormat.format(new Object[] {
213             bookName
214         }, buf, pos);
215     }
216 
217     private void buildBookClose(StringBuffer buf) {
218         buf.append("</div>\n");
219     }
220 
221     private void buildChapterClose(StringBuffer buf) {
222         buf.append("</chapter>\n");
223     }
224 
225     private void buildChapterOpen(StringBuffer buf, String bookName, int chapter) {
226         MessageFormat msgFormat = new MessageFormat("<chapter osisID=\"{0}.{1}\" chapterTitle=\"{2} {1}.\">\n");
227         if ("Obad".equals(bookName)
228                 || "Phlm".equals(bookName)
229                 || "2John".equals(bookName)
230                 || "3John".equals(bookName)
231                 || "Jude".equals(bookName))
232         {
233             return;
234         }
235 
236         String chapterName = "CHAPTER";
237         if ("Ps".equals(bookName)) {
238             chapterName = "PSALM";
239         }
240 
241         msgFormat.format(new Object[] {
242                 bookName, Integer.valueOf(chapter), chapterName
243         }, buf, pos);
244     }
245 
246     private void buildPsalmTitle(StringBuffer buf, String psalmTitle) {
247         MessageFormat msgFormat = new MessageFormat("<title type=\"psalm\" canonical=\"true\">{0}</title>");
248         msgFormat.format(new Object[] {
249             psalmTitle
250         }, buf, pos);
251     }
252 
253     // private void buildPsalmAcrostic(StringBuffer buf, String psalmTitle)
254     // {
255     //        MessageFormat msgFormat = new MessageFormat("<title type=\"acrostic\" canonical=\"true\">{0}</title>");
256     // msgFormat.format(new Object[] { psalmTitle }, buf, pos);
257     // }
258 
259     private void buildPreVerseOpen(StringBuffer buf, String preVerse) {
260         MessageFormat msgFormat = new MessageFormat("<div type=\"section\" canonical=\"true\"><title canonical=\"true\">{0}</title>");
261         msgFormat.format(new Object[] {
262             preVerse
263         }, buf, pos);
264     }
265 
266     private void buildPreVerseClose(StringBuffer buf) {
267         buf.append("</div>\n");
268     }
269 
270     private void buildVerseOpen(StringBuffer buf, String osisID) {
271         //        MessageFormat msgFormat = new MessageFormat("<verse sID=\"{0}\" osisID=\"{0}\"/>");
272         MessageFormat msgFormat = new MessageFormat("<verse osisID=\"{0}\">");
273         msgFormat.format(new Object[] {
274             osisID
275         }, buf, pos);
276     }
277 
278     private void buildVerseClose(StringBuffer buf, String osisID) {
279         //        MessageFormat msgFormat = new MessageFormat("<verse eID=\"{0}\"/>");
280         MessageFormat msgFormat = new MessageFormat("</verse>\n");
281         msgFormat.format(new Object[] {
282             osisID
283         }, buf, pos);
284     }
285 
286     private void openOutputFile(String newFilename, boolean open) throws IOException {
287         if (open) {
288             filename = newFilename;
289             writer = new OutputStreamWriter(new FileOutputStream(filename + ".xml"), "UTF-8");
290         }
291     }
292 
293     private void writeDocument(StringBuffer buf) throws IOException {
294         writer.write(buf.toString());
295     }
296 
297     private void closeOutputFile(boolean close) throws IOException {
298         if (close) {
299             writer.close();
300             parse();
301         }
302     }
303 
304     private void parse() {
305         XMLProcess parser = new XMLProcess();
306         parser.getFeatures().setFeatureStates(new String[] {
307                 "-s", "-f", "-va", "-dv"});
308         parser.parse(filename + ".xml");
309     }
310 
311     private static FieldPosition pos = new FieldPosition(0);
312 
313     private static String preVerseStart = "<title subtype=\"x-preverse\" type=\"section\">";
314     private static String preVerseElement = "<title subtype=\"x-preverse\" type=\"section\">(.*?)</title>";
315     private static Pattern preVersePattern = Pattern.compile(preVerseElement);
316     //    private static String preVerseEnd = "</title>";
317     // private static Pattern preVerseStartPattern =
318     // Pattern.compile(preVerseStart);
319     //    private static Pattern preVerseEndPattern = Pattern.compile(preVerseEnd);
320 
321     private static String psalmTitleStart = "<title type=\"psalm\">";
322     private static String psalmTitleElement = "<title type=\"psalm\">(.*?)</title>";
323     private static Pattern psalmTitlePattern = Pattern.compile(psalmTitleElement);
324     //    private static String psalmTitleEnd = "</title>";
325     // private static Pattern psalmTitleStartPattern =
326     // Pattern.compile(psalmTitleStart);
327     // private static Pattern psalmTitleEndPattern =
328     // Pattern.compile(psalmTitleEnd);
329 
330     private Writer writer;
331     private String filename;
332 }
333