1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2005 - 2016
18   *
19   */
20  package org.crosswire.jsword.examples;
21  
22  import java.io.FileOutputStream;
23  import java.io.IOException;
24  import java.io.OutputStreamWriter;
25  import java.io.Writer;
26  import java.text.FieldPosition;
27  import java.text.MessageFormat;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.crosswire.common.xml.XMLProcess;
32  import org.crosswire.jsword.book.Book;
33  import org.crosswire.jsword.book.BookException;
34  import org.crosswire.jsword.book.BookMetaData;
35  import org.crosswire.jsword.book.Books;
36  import org.crosswire.jsword.passage.Key;
37  import org.crosswire.jsword.passage.NoSuchKeyException;
38  import org.crosswire.jsword.passage.Verse;
39  
40  /**
41   * Start of a mechanism to extract a Bible module to OSIS.
42   * 
43   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
44   * @author DM Smith
45   */
46  public class BibleToOsis {
47      /**
48       * The name of a Bible to find
49       */
50      private static final String BIBLE_NAME = "KJV";
51      private static final String BIBLE_RANGE = "Gen-Rev";
52      private static final boolean BY_BOOK = false;
53  
54      /**
55       * @param args
56       */
57      public static void main(String[] args) {
58          Exception error = null;
59          try {
60              new BibleToOsis().dump(BIBLE_NAME, BIBLE_RANGE);
61          } catch (NoSuchKeyException e) {
62              error = e;
63          } catch (BookException e) {
64              error = e;
65          } catch (IOException e) {
66              error = e;
67          }
68          if (error != null) {
69              error.printStackTrace();
70          }
71      }
72  
73      public void dump(String name, String range) throws NoSuchKeyException, IOException, BookException {
74          Books books = Books.installed();
75          Book bible = books.getBook(name);
76          BookMetaData bmd = bible.getBookMetaData();
77          String lastBookName = "";
78          int lastChapter = -1;
79          StringBuffer buf = new StringBuffer();
80          boolean inPreVerse = false;
81  
82          Key keys = bible.getKey(range);
83  
84          openOutputFile(bmd.getInitials(), !BY_BOOK);
85          buildDocumentOpen(buf, bmd, range, !BY_BOOK);
86          if (!BY_BOOK) {
87              writeDocument(buf);
88          }
89  
90          // Get a verse iterator
91          for (Key key : keys) {
92              Verse verse = (Verse) key;
93              String raw = bible.getRawText(verse);
94              String osisID = verse.getOsisID();
95              String currentBookName = verse.getBook().getOSIS();
96              int currentChapter = verse.getChapter();
97  
98              boolean newBookFound = !lastBookName.equals(currentBookName);
99  
100             if (newBookFound) {
101                 if (lastBookName.length() > 0) {
102                     if (currentChapter == 1) {
103                         if (inPreVerse) {
104                             buildPreVerseClose(buf);
105                             inPreVerse = false;
106                         }
107                         buildChapterClose(buf);
108                     }
109                     buildBookClose(buf);
110                     buildDocumentClose(buf, BY_BOOK);
111                     openOutputFile(lastBookName, BY_BOOK);
112                     writeDocument(buf);
113                     closeOutputFile(BY_BOOK);
114                 }
115 
116                 buf = new StringBuffer();
117                 buildDocumentOpen(buf, bmd, currentBookName, BY_BOOK);
118                 buildBookOpen(buf, currentBookName);
119             }
120 
121             if (newBookFound || lastChapter != currentChapter) {
122                 if (currentChapter != 1) {
123                     if (inPreVerse) {
124                         buildPreVerseClose(buf);
125                         inPreVerse = false;
126                     }
127                     buildChapterClose(buf);
128                 }
129                 buildChapterOpen(buf, currentBookName, currentChapter);
130             }
131 
132             /* Output the verse */
133 
134             boolean foundPreVerse = false;
135             String preVerseText = "";
136             if (raw.indexOf(preVerseStart) != -1) {
137                 Matcher matcher = preVersePattern.matcher(raw);
138                 StringBuffer rawbuf = new StringBuffer();
139                 if (matcher.find()) {
140                     foundPreVerse = true;
141                     preVerseText = matcher.group(1);
142                     matcher.appendReplacement(rawbuf, "");
143                 }
144                 matcher.appendTail(rawbuf);
145                 raw = rawbuf.toString();
146             }
147 
148             boolean foundPsalmTitle = false;
149             String psalmTitleText = "";
150             if (raw.indexOf(psalmTitleStart) != -1) {
151                 Matcher matcher = psalmTitlePattern.matcher(raw);
152                 StringBuffer rawbuf = new StringBuffer();
153                 if (matcher.find()) {
154                     foundPsalmTitle = true;
155                     psalmTitleText = matcher.group(1);
156                     matcher.appendReplacement(rawbuf, "");
157                 }
158                 matcher.appendTail(rawbuf);
159                 raw = rawbuf.toString();
160             }
161 
162             if (foundPsalmTitle) {
163                 buildPsalmTitle(buf, psalmTitleText);
164             }
165 
166             if (foundPreVerse && !preVerseText.equals(psalmTitleText)) {
167                 if (inPreVerse) {
168                     buildPreVerseClose(buf);
169                 }
170                 buildPreVerseOpen(buf, preVerseText);
171                 inPreVerse = true;
172             }
173 
174             buildVerseOpen(buf, osisID);
175             buf.append(raw);
176             buildVerseClose(buf, osisID);
177 
178             lastChapter = currentChapter;
179             lastBookName = currentBookName;
180         }
181 
182         // Close everything that is open
183         if (inPreVerse) {
184             buildPreVerseClose(buf);
185             inPreVerse = false;
186         }
187 
188         buildChapterClose(buf);
189         buildBookClose(buf);
190         buildDocumentClose(buf, true);
191         openOutputFile(lastBookName, BY_BOOK);
192         writeDocument(buf);
193         closeOutputFile(true);
194     }
195 
196     private void buildDocumentOpen(StringBuffer buf, BookMetaData bmd, String range, boolean force) {
197         if (!force) {
198             return;
199         }
200 
201         MessageFormat msgFormat = new MessageFormat(
202                 "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<osis\n  xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\"\n  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n  xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd\">\n<osisText osisIDWork=\"{0}\" osisRefWork=\"defaultReferenceScheme\" xml:lang=\"en\">\n<header>\n  <work osisWork=\"{0}\">\n    <title>{1}</title>\n    <identifier type=\"OSIS\">Bible.{0}</identifier>\n    <scope>{2}</scope>\n    <refSystem>Bible.KJV</refSystem>\n  </work>\n  <work osisWork=\"defaultReferenceScheme\">\n    <refSystem>Bible.KJV</refSystem>\n  </work>\n  <work osisWork=\"strong\">\n    <refSystem>Dict.Strongs</refSystem>\n  </work>\n  <work osisWork=\"robinson\">\n    <refSystem>Dict.Robinsons</refSystem>\n  </work>\n  <work osisWork=\"strongMorph\">\n    <refSystem>Dict.strongMorph</refSystem>\n  </work>\n</header>\n");
203         msgFormat.format(new Object[] {
204                 bmd.getInitials(), bmd.getName(), range
205         }, buf, pos);
206     }
207 
208     private void buildDocumentClose(StringBuffer buf, boolean force) {
209         if (force) {
210             buf.append("</osisText>\n</osis>\n");
211         }
212     }
213 
214     private void buildBookOpen(StringBuffer buf, String bookName) {
215         System.err.println("processing " + bookName);
216         MessageFormat msgFormat = new MessageFormat("<div type=\"book\" osisID=\"{0}\" canonical=\"true\">\n");
217         msgFormat.format(new Object[] {
218             bookName
219         }, buf, pos);
220     }
221 
222     private void buildBookClose(StringBuffer buf) {
223         buf.append("</div>\n");
224     }
225 
226     private void buildChapterClose(StringBuffer buf) {
227         buf.append("</chapter>\n");
228     }
229 
230     private void buildChapterOpen(StringBuffer buf, String bookName, int chapter) {
231         MessageFormat msgFormat = new MessageFormat("<chapter osisID=\"{0}.{1}\" chapterTitle=\"{2} {1}.\">\n");
232         if ("Obad".equals(bookName)
233                 || "Phlm".equals(bookName)
234                 || "2John".equals(bookName)
235                 || "3John".equals(bookName)
236                 || "Jude".equals(bookName))
237         {
238             return;
239         }
240 
241         String chapterName = "CHAPTER";
242         if ("Ps".equals(bookName)) {
243             chapterName = "PSALM";
244         }
245 
246         msgFormat.format(new Object[] {
247                 bookName, Integer.valueOf(chapter), chapterName
248         }, buf, pos);
249     }
250 
251     private void buildPsalmTitle(StringBuffer buf, String psalmTitle) {
252         MessageFormat msgFormat = new MessageFormat("<title type=\"psalm\" canonical=\"true\">{0}</title>");
253         msgFormat.format(new Object[] {
254             psalmTitle
255         }, buf, pos);
256     }
257 
258     // private void buildPsalmAcrostic(StringBuffer buf, String psalmTitle)
259     // {
260     //        MessageFormat msgFormat = new MessageFormat("<title type=\"acrostic\" canonical=\"true\">{0}</title>");
261     // msgFormat.format(new Object[] { psalmTitle }, buf, pos);
262     // }
263 
264     private void buildPreVerseOpen(StringBuffer buf, String preVerse) {
265         MessageFormat msgFormat = new MessageFormat("<div type=\"section\" canonical=\"true\"><title canonical=\"true\">{0}</title>");
266         msgFormat.format(new Object[] {
267             preVerse
268         }, buf, pos);
269     }
270 
271     private void buildPreVerseClose(StringBuffer buf) {
272         buf.append("</div>\n");
273     }
274 
275     private void buildVerseOpen(StringBuffer buf, String osisID) {
276         //        MessageFormat msgFormat = new MessageFormat("<verse sID=\"{0}\" osisID=\"{0}\"/>");
277         MessageFormat msgFormat = new MessageFormat("<verse osisID=\"{0}\">");
278         msgFormat.format(new Object[] {
279             osisID
280         }, buf, pos);
281     }
282 
283     private void buildVerseClose(StringBuffer buf, String osisID) {
284         //        MessageFormat msgFormat = new MessageFormat("<verse eID=\"{0}\"/>");
285         MessageFormat msgFormat = new MessageFormat("</verse>\n");
286         msgFormat.format(new Object[] {
287             osisID
288         }, buf, pos);
289     }
290 
291     private void openOutputFile(String newFilename, boolean open) throws IOException {
292         if (open) {
293             filename = newFilename;
294             writer = new OutputStreamWriter(new FileOutputStream(filename + ".xml"), "UTF-8");
295         }
296     }
297 
298     private void writeDocument(StringBuffer buf) throws IOException {
299         writer.write(buf.toString());
300     }
301 
302     private void closeOutputFile(boolean close) throws IOException {
303         if (close) {
304             writer.close();
305             parse();
306         }
307     }
308 
309     private void parse() {
310         XMLProcess parser = new XMLProcess();
311         parser.getFeatures().setFeatureStates(new String[] {
312                 "-s", "-f", "-va", "-dv"});
313         parser.parse(filename + ".xml");
314     }
315 
316     private static FieldPosition pos = new FieldPosition(0);
317 
318     private static String preVerseStart = "<title subtype=\"x-preverse\" type=\"section\">";
319     private static String preVerseElement = "<title subtype=\"x-preverse\" type=\"section\">(.*?)</title>";
320     private static Pattern preVersePattern = Pattern.compile(preVerseElement);
321     //    private static String preVerseEnd = "</title>";
322     // private static Pattern preVerseStartPattern =
323     // Pattern.compile(preVerseStart);
324     //    private static Pattern preVerseEndPattern = Pattern.compile(preVerseEnd);
325 
326     private static String psalmTitleStart = "<title type=\"psalm\">";
327     private static String psalmTitleElement = "<title type=\"psalm\">(.*?)</title>";
328     private static Pattern psalmTitlePattern = Pattern.compile(psalmTitleElement);
329     //    private static String psalmTitleEnd = "</title>";
330     // private static Pattern psalmTitleStartPattern =
331     // Pattern.compile(psalmTitleStart);
332     // private static Pattern psalmTitleEndPattern =
333     // Pattern.compile(psalmTitleEnd);
334 
335     private Writer writer;
336     private String filename;
337 }
338