1
22 package org.crosswire.jsword.examples;
23
24 import java.io.FileOutputStream;
25 import java.io.IOException;
26 import java.io.OutputStreamWriter;
27 import java.io.Writer;
28 import java.text.FieldPosition;
29 import java.text.MessageFormat;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32
33 import org.crosswire.common.xml.XMLProcess;
34 import org.crosswire.jsword.book.Book;
35 import org.crosswire.jsword.book.BookException;
36 import org.crosswire.jsword.book.BookMetaData;
37 import org.crosswire.jsword.book.Books;
38 import org.crosswire.jsword.passage.Key;
39 import org.crosswire.jsword.passage.NoSuchKeyException;
40 import org.crosswire.jsword.passage.Verse;
41
42
49 public class BibleToOsis {
50
53 private static final String BIBLE_NAME = "KJV";
54 private static final String BIBLE_RANGE = "Gen-Rev";
55 private static final boolean BY_BOOK = false;
56
57
60 public static void main(String[] args) {
61 try {
62 new BibleToOsis().dump(BIBLE_NAME, BIBLE_RANGE);
63 } catch (Exception e) {
64 e.printStackTrace(System.err);
65 }
66 }
67
68 public void dump(String name, String range) throws NoSuchKeyException, IOException, BookException {
69 Books books = Books.installed();
70 Book bible = books.getBook(name);
71 BookMetaData bmd = bible.getBookMetaData();
72 String lastBookName = "";
73 int lastChapter = -1;
74 StringBuffer buf = new StringBuffer();
75 boolean inPreVerse = false;
76
77 Key keys = bible.getKey(range);
78
79 openOutputFile(bmd.getInitials(), !BY_BOOK);
80 buildDocumentOpen(buf, bmd, range, !BY_BOOK);
81 if (!BY_BOOK) {
82 writeDocument(buf);
83 }
84
85 for (Key key : keys) {
87 Verse verse = (Verse) key;
88 String raw = bible.getRawText(verse);
89 String osisID = verse.getOsisID();
90 String currentBookName = verse.getBook().getOSIS();
91 int currentChapter = verse.getChapter();
92
93 boolean newBookFound = !lastBookName.equals(currentBookName);
94
95 if (newBookFound) {
96 if (lastBookName.length() > 0) {
97 if (currentChapter == 1) {
98 if (inPreVerse) {
99 buildPreVerseClose(buf);
100 inPreVerse = false;
101 }
102 buildChapterClose(buf);
103 }
104 buildBookClose(buf);
105 buildDocumentClose(buf, BY_BOOK);
106 openOutputFile(lastBookName, BY_BOOK);
107 writeDocument(buf);
108 closeOutputFile(BY_BOOK);
109 }
110
111 buf = new StringBuffer();
112 buildDocumentOpen(buf, bmd, currentBookName, BY_BOOK);
113 buildBookOpen(buf, currentBookName);
114 }
115
116 if (newBookFound || lastChapter != currentChapter) {
117 if (currentChapter != 1) {
118 if (inPreVerse) {
119 buildPreVerseClose(buf);
120 inPreVerse = false;
121 }
122 buildChapterClose(buf);
123 }
124 buildChapterOpen(buf, currentBookName, currentChapter);
125 }
126
127
128
129 boolean foundPreVerse = false;
130 String preVerseText = "";
131 if (raw.indexOf(preVerseStart) != -1) {
132 Matcher matcher = preVersePattern.matcher(raw);
133 StringBuffer rawbuf = new StringBuffer();
134 if (matcher.find()) {
135 foundPreVerse = true;
136 preVerseText = matcher.group(1);
137 matcher.appendReplacement(rawbuf, "");
138 }
139 matcher.appendTail(rawbuf);
140 raw = rawbuf.toString();
141 }
142
143 boolean foundPsalmTitle = false;
144 String psalmTitleText = "";
145 if (raw.indexOf(psalmTitleStart) != -1) {
146 Matcher matcher = psalmTitlePattern.matcher(raw);
147 StringBuffer rawbuf = new StringBuffer();
148 if (matcher.find()) {
149 foundPsalmTitle = true;
150 psalmTitleText = matcher.group(1);
151 matcher.appendReplacement(rawbuf, "");
152 }
153 matcher.appendTail(rawbuf);
154 raw = rawbuf.toString();
155 }
156
157 if (foundPsalmTitle) {
158 buildPsalmTitle(buf, psalmTitleText);
159 }
160
161 if (foundPreVerse && !preVerseText.equals(psalmTitleText)) {
162 if (inPreVerse) {
163 buildPreVerseClose(buf);
164 }
165 buildPreVerseOpen(buf, preVerseText);
166 inPreVerse = true;
167 }
168
169 buildVerseOpen(buf, osisID);
170 buf.append(raw);
171 buildVerseClose(buf, osisID);
172
173 lastChapter = currentChapter;
174 lastBookName = currentBookName;
175 }
176
177 if (inPreVerse) {
179 buildPreVerseClose(buf);
180 inPreVerse = false;
181 }
182
183 buildChapterClose(buf);
184 buildBookClose(buf);
185 buildDocumentClose(buf, true);
186 openOutputFile(lastBookName, BY_BOOK);
187 writeDocument(buf);
188 closeOutputFile(true);
189 }
190
191 private void buildDocumentOpen(StringBuffer buf, BookMetaData bmd, String range, boolean force) {
192 if (!force) {
193 return;
194 }
195
196 MessageFormat msgFormat = new MessageFormat(
197 "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n<osis\n xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd\">\n<osisText osisIDWork=\"{0}\" osisRefWork=\"defaultReferenceScheme\" xml:lang=\"en\">\n<header>\n <work osisWork=\"{0}\">\n <title>{1}</title>\n <identifier type=\"OSIS\">Bible.{0}</identifier>\n <scope>{2}</scope>\n <refSystem>Bible.KJV</refSystem>\n </work>\n <work osisWork=\"defaultReferenceScheme\">\n <refSystem>Bible.KJV</refSystem>\n </work>\n <work osisWork=\"strong\">\n <refSystem>Dict.Strongs</refSystem>\n </work>\n <work osisWork=\"robinson\">\n <refSystem>Dict.Robinsons</refSystem>\n </work>\n <work osisWork=\"strongMorph\">\n <refSystem>Dict.strongMorph</refSystem>\n </work>\n</header>\n");
198 msgFormat.format(new Object[] {
199 bmd.getInitials(), bmd.getName(), range
200 }, buf, pos);
201 }
202
203 private void buildDocumentClose(StringBuffer buf, boolean force) {
204 if (force) {
205 buf.append("</osisText>\n</osis>\n");
206 }
207 }
208
209 private void buildBookOpen(StringBuffer buf, String bookName) {
210 System.err.println("processing " + bookName);
211 MessageFormat msgFormat = new MessageFormat("<div type=\"book\" osisID=\"{0}\" canonical=\"true\">\n");
212 msgFormat.format(new Object[] {
213 bookName
214 }, buf, pos);
215 }
216
217 private void buildBookClose(StringBuffer buf) {
218 buf.append("</div>\n");
219 }
220
221 private void buildChapterClose(StringBuffer buf) {
222 buf.append("</chapter>\n");
223 }
224
225 private void buildChapterOpen(StringBuffer buf, String bookName, int chapter) {
226 MessageFormat msgFormat = new MessageFormat("<chapter osisID=\"{0}.{1}\" chapterTitle=\"{2} {1}.\">\n");
227 if ("Obad".equals(bookName)
228 || "Phlm".equals(bookName)
229 || "2John".equals(bookName)
230 || "3John".equals(bookName)
231 || "Jude".equals(bookName))
232 {
233 return;
234 }
235
236 String chapterName = "CHAPTER";
237 if ("Ps".equals(bookName)) {
238 chapterName = "PSALM";
239 }
240
241 msgFormat.format(new Object[] {
242 bookName, Integer.valueOf(chapter), chapterName
243 }, buf, pos);
244 }
245
246 private void buildPsalmTitle(StringBuffer buf, String psalmTitle) {
247 MessageFormat msgFormat = new MessageFormat("<title type=\"psalm\" canonical=\"true\">{0}</title>");
248 msgFormat.format(new Object[] {
249 psalmTitle
250 }, buf, pos);
251 }
252
253
259 private void buildPreVerseOpen(StringBuffer buf, String preVerse) {
260 MessageFormat msgFormat = new MessageFormat("<div type=\"section\" canonical=\"true\"><title canonical=\"true\">{0}</title>");
261 msgFormat.format(new Object[] {
262 preVerse
263 }, buf, pos);
264 }
265
266 private void buildPreVerseClose(StringBuffer buf) {
267 buf.append("</div>\n");
268 }
269
270 private void buildVerseOpen(StringBuffer buf, String osisID) {
271 MessageFormat msgFormat = new MessageFormat("<verse osisID=\"{0}\">");
273 msgFormat.format(new Object[] {
274 osisID
275 }, buf, pos);
276 }
277
278 private void buildVerseClose(StringBuffer buf, String osisID) {
279 MessageFormat msgFormat = new MessageFormat("</verse>\n");
281 msgFormat.format(new Object[] {
282 osisID
283 }, buf, pos);
284 }
285
286 private void openOutputFile(String newFilename, boolean open) throws IOException {
287 if (open) {
288 filename = newFilename;
289 writer = new OutputStreamWriter(new FileOutputStream(filename + ".xml"), "UTF-8");
290 }
291 }
292
293 private void writeDocument(StringBuffer buf) throws IOException {
294 writer.write(buf.toString());
295 }
296
297 private void closeOutputFile(boolean close) throws IOException {
298 if (close) {
299 writer.close();
300 parse();
301 }
302 }
303
304 private void parse() {
305 XMLProcess parser = new XMLProcess();
306 parser.getFeatures().setFeatureStates(new String[] {
307 "-s", "-f", "-va", "-dv"});
308 parser.parse(filename + ".xml");
309 }
310
311 private static FieldPosition pos = new FieldPosition(0);
312
313 private static String preVerseStart = "<title subtype=\"x-preverse\" type=\"section\">";
314 private static String preVerseElement = "<title subtype=\"x-preverse\" type=\"section\">(.*?)</title>";
315 private static Pattern preVersePattern = Pattern.compile(preVerseElement);
316
321 private static String psalmTitleStart = "<title type=\"psalm\">";
322 private static String psalmTitleElement = "<title type=\"psalm\">(.*?)</title>";
323 private static Pattern psalmTitlePattern = Pattern.compile(psalmTitleElement);
324
330 private Writer writer;
331 private String filename;
332 }
333