| GBFFilter.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2005
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: GBFFilter.java 2221 2012-01-25 21:32:57Z dmsmith $
21 */
22 package org.crosswire.jsword.book.filter.gbf;
23
24 import java.util.ArrayList;
25 import java.util.LinkedList;
26 import java.util.List;
27
28 import org.crosswire.jsword.book.Book;
29 import org.crosswire.jsword.book.DataPolice;
30 import org.crosswire.jsword.book.OSISUtil;
31 import org.crosswire.jsword.book.filter.Filter;
32 import org.crosswire.jsword.passage.Key;
33 import org.jdom.Content;
34 import org.jdom.Element;
35
36 /**
37 * Filter to convert GBF data to OSIS format.
38 *
39 * The best place to go for more information about the GBF spec is:
40 * <a href="http://ebible.org/bible/gbf.htm">http://ebible.org/bible/gbf.htm</a>
41 *
42 * @see gnu.lgpl.License for license details.<br>
43 * The copyright to this program is held by it's authors.
44 * @author Joe Walker [joe at eireneh dot com]
45 */
46 public class GBFFilter implements Filter {
47 /* (non-Javadoc)
48 * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String)
49 */
50 public List<Content> toOSIS(Book book, Key key, String plain) {
51 Element ele = OSISUtil.factory().createDiv();
52 LinkedList<Content> stack = new LinkedList<Content>();
53 stack.addFirst(ele);
54
55 List<Tag> taglist = parseTags(book, key, plain.trim());
56 while (true) {
57 if (taglist.isEmpty()) {
58 break;
59 }
60
61 Tag tag = taglist.remove(0);
62 tag.updateOsisStack(book, key, stack);
63 }
64
65 stack.removeFirst();
66 return ele.removeContent();
67 }
68
69 @Override
70 public GBFFilter clone() {
71 GBFFilter clone = null;
72 try {
73 clone = (GBFFilter) super.clone();
74 } catch (CloneNotSupportedException e) {
75 assert false : e;
76 }
77 return clone;
78 }
79
80 /**
81 * Turn the string into a list of tags in the order that they appear in the
82 * original string.
83 */
84 private List<Tag> parseTags(Book book, Key key, String aRemains) {
85 String remains = aRemains;
86 List<Tag> taglist = new ArrayList<Tag>();
87
88 // A GBF code is of the form <XY...> or <Xy...>
89 // where the first letter is always capitalized and
90 // the second letter indicates an open or close tag.
91 // Upper letters are open, lower are close.
92 // The ... is optional and represents an argument.
93 // Sometimes the argument is preceded by a space.
94 // In GBF it is legal to have < and > otherwise.
95 // In at least one module, GerLut1545, << ... >> is used for quotes.
96 while (true) {
97 int ltpos = remains.indexOf('<');
98 int gtpos = remains.indexOf('>', ltpos + 1);
99
100 // check whether we have unmatched < and >, or no tags at all
101 // If so then we don't have a tag in the remaining.
102 if (ltpos == -1 || gtpos == -1) {
103 // If the first letter after < is an upper case letter
104 // then report it as a potential problem
105 if (ltpos >= 0
106 && ltpos < remains.length() + 1
107 && Character.isUpperCase(remains.charAt(ltpos + 1)))
108 {
109 DataPolice.report(book, key, "Possible bad GBF tag" + remains);
110 }
111 if (gtpos != -1 && ltpos >= 0) {
112 DataPolice.report(book, key, "Possible bad GBF tag" + remains);
113 }
114 int pos = Math.max(ltpos, gtpos) + 1;
115 // If there were not any <, > or either ended the string
116 // then we only have text.
117 if (pos == 0 || pos == remains.length()) {
118 taglist.add(GBFTagBuilders.getTextTag(remains));
119 break;
120 }
121 taglist.add(GBFTagBuilders.getTextTag(remains.substring(0, pos)));
122 remains = remains.substring(pos);
123 continue;
124 }
125
126 // If the character after the < is not an upper case letter
127 // then we don't have GBF.
128 // So, create a text tag that ends with the found >.
129 // Note that in JST, there are spurious html tags and
130 // this will treat them as valid GBF text.
131 char firstChar = remains.charAt(ltpos + 1);
132 if (!Character.isUpperCase(firstChar)) {
133 taglist.add(GBFTagBuilders.getTextTag(remains.substring(0, gtpos + 1)));
134 remains = remains.substring(gtpos + 1);
135 continue;
136 }
137
138 // generate tags
139 String start = remains.substring(0, ltpos);
140 int strLen = start.length();
141 if (strLen > 0) {
142 int beginIndex = 0;
143 boolean inSepStr = SEPARATORS.indexOf(start.charAt(0)) >= 0;
144 // split words from separators...
145 // e.g., "a b c? e g." -> "a b c", "? ", "e g."
146 // "a b c<tag> e g." -> "a b c", tag, " ", "e g."
147 for (int i = 1; inSepStr && i < strLen; i++) {
148 char currentChar = start.charAt(i);
149 if (!(SEPARATORS.indexOf(currentChar) >= 0)) {
150 taglist.add(GBFTagBuilders.getTextTag(start.substring(beginIndex, i)));
151 beginIndex = i;
152 inSepStr = false;
153 }
154 }
155
156 if (beginIndex < strLen) {
157 taglist.add(GBFTagBuilders.getTextTag(start.substring(beginIndex)));
158 }
159 }
160
161 String tag = remains.substring(ltpos + 1, gtpos);
162 int length = tag.length();
163 if (length > 0) {
164 Tag reply = GBFTagBuilders.getTag(book, key, tag);
165 if (reply != null) {
166 taglist.add(reply);
167 }
168 }
169
170 remains = remains.substring(gtpos + 1);
171 }
172
173 return taglist;
174 }
175
176 private static final String SEPARATORS = " ,:;.?!";
177
178 }
179