The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
gbfosis.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * gbfosis.cpp - GBF to OSIS filter
4  *
5  * $Id: gbfosis.cpp 3808 2020-10-02 13:23:34Z scribe $
6  *
7  * Copyright 2002-2013 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <stdarg.h>
26 #include <gbfosis.h>
27 #include <swmodule.h>
28 #include <versekey.h>
29 #include <swlog.h>
30 #include <stdarg.h>
31 
32 
34 
35 
37 }
38 
39 
41 }
42 
43 
44 char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
45  char token[2048]; //cheesy, we seem to like cheese :)
46  int tokpos = 0;
47  bool intoken = false;
48  bool keepToken = false;
49 
50 // static QuoteStack quoteStack;
51 
52  SWBuf orig = text;
53  SWBuf tmp;
54  SWBuf value;
55 
56  bool suspendTextPassThru = false;
57  bool handled = false;
58  bool newWord = false;
59  bool newText = false;
60  bool lastspace = false;
61 
62  const char *wordStart = text.c_str();
63  const char *wordEnd = NULL;
64 
65  const char *textStart = NULL;
66  const char *textEnd = NULL;
67 
68  SWBuf textNode = "";
69 
70  SWBuf buf;
71 
72  text = "";
73  for (const char* from = orig.c_str(); *from; ++from) {
74  if (*from == '<') { //start of new token detected
75  intoken = true;
76  tokpos = 0;
77  token[0] = 0;
78  token[1] = 0;
79  token[2] = 0;
80  textEnd = from-1; //end of last text node found
81  wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
82 
83  continue;
84  }
85 
86  if (*from == '>') { // process tokens
87  intoken = false;
88  keepToken = false;
89  suspendTextPassThru = false;
90  newWord = true;
91  handled = false;
92 
93  while (wordStart < (text.c_str() + text.length())) { //hack
94  if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
95  wordStart++;
96  else break;
97  }
98  while (wordEnd > wordStart) {
99  if (strchr(" ,;:.?!()'\"", *wordEnd))
100  wordEnd--;
101  else break;
102  }
103 
104  // Scripture Reference
105  if (!strncmp(token, "scripRef", 8)) {
106  suspendTextPassThru = true;
107  newText = true;
108  handled = true;
109  }
110  else if (!strncmp(token, "/scripRef", 9)) {
111  tmp = "";
112  tmp.append(textStart, (int)(textEnd - textStart)+1);
113  text += VerseKey::convertToOSIS(tmp.c_str(), key);
114 
115  lastspace = false;
116  suspendTextPassThru = false;
117  handled = true;
118  }
119 
120  // Footnote
121  if (!strcmp(token, "RF") || !strncmp(token, "RF ", 3)) { //the GBFFootnotes filter adds the attribute "swordFootnote", we want to catch that, too
122  // pushString(buf, "<reference work=\"Bible.KJV\" reference=\"");
123  text += "<note type=\"x-StudyNote\">";
124  newText = true;
125  lastspace = false;
126  handled = true;
127  }
128  else if (!strcmp(token, "Rf")) {
129  text += "</note>";
130  lastspace = false;
131  handled = true;
132  }
133  // hebrew titles
134  if (!strcmp(token, "TH")) {
135  text += "<title type=\"psalm\">";
136  newText = true;
137  lastspace = false;
138  handled = true;
139  }
140  else if (!strcmp(token, "Th")) {
141  text += "</title>";
142  lastspace = false;
143  handled = true;
144  }
145  // Italics assume transchange
146  if (!strcmp(token, "FI")) {
147  text += "<transChange type=\"added\">";
148  newText = true;
149  lastspace = false;
150  handled = true;
151  }
152  else if (!strcmp(token, "Fi")) {
153  text += "</transChange>";
154  lastspace = false;
155  handled = true;
156  }
157  // less than
158  if (!strcmp(token, "CT")) {
159  text += "&lt;";
160  newText = true;
161  lastspace = false;
162  handled = true;
163  }
164  // greater than
165  if (!strcmp(token, "CG")) {
166  text += "&gt;";
167  newText = true;
168  lastspace = false;
169  handled = true;
170  }
171  // Paragraph break. For now use empty paragraph element
172  if (!strcmp(token, "CM")) {
173  text += "<milestone type=\"x-p\" />";
174  newText = true;
175  lastspace = false;
176  handled = true;
177  }
178 
179  // Figure
180  else if (!strncmp(token, "img ", 4)) {
181  const char *src = strstr(token, "src");
182  if (!src) // assert we have a src attribute
183  continue;
184 // return false;
185 
186  text += "<figure src=\"";
187  const char *c;
188  for (c = src;((*c) && (*c != '"')); c++);
189 
190 // uncomment for SWORD absolute path logic
191 // if (*(c+1) == '/') {
192 // pushString(buf, "file:");
193 // pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
194 // if (*((*buf)-1) == '/')
195 // c++; // skip '/'
196 // }
197 // end of uncomment for asolute path logic
198 
199  for (c++;((*c) && (*c != '"')); c++) {
200  text += *c;
201  }
202  text += "\" />";
203 
204  lastspace = false;
205  handled = true;
206  }
207 
208  // Strongs numbers
209  else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
210  bool divineName = false;
211  value = token+1;
212 
213  // normal strongs number
214  //strstrip(val);
215  if (!strncmp(wordStart, "<w ", 3)) {
216  const char *attStart = strstr(wordStart, "lemma");
217  if (attStart) {
218  attStart += 7;
219 
220  buf = "";
221  buf.appendFormatted("strong:%s ", value.c_str());
222  }
223  else { // no lemma attribute
224  attStart = wordStart + 3;
225 
226  buf = "";
227  buf.appendFormatted(buf, "lemma=\"strong:%s\" ", value.c_str());
228  }
229 
230  text.insert(attStart - text.c_str(), buf);
231  }
232  else { //wordStart doesn't point to an existing <w> attribute!
233  if (!strcmp(value.c_str(), "H03068")) { //divineName
234  buf = "";
235  buf.appendFormatted("<divineName><w lemma=\"strong:%s\">", value.c_str());
236 
237  divineName = true;
238  }
239  else {
240  buf = "";
241  buf.appendFormatted("<w lemma=\"strong:%s\">", value.c_str());
242  }
243 
244  text.insert(wordStart - text.c_str(), buf);
245 
246  if (divineName) {
247  wordStart += 12;
248  text += "</w></divineName>";
249  }
250  else text += "</w>";
251 
252  lastspace = false;
253  }
254  handled = true;
255  }
256 
257  // Morphology
258  else if (*token == 'W' && token[1] == 'T') {
259  if (token[2] == 'G' || token[2] == 'H') { // Strongs
260  value = token+2;
261  }
262  else value = token+1;
263 
264  if (!strncmp(wordStart, "<w ", 3)) {
265  const char *attStart = strstr(wordStart, "morph");
266  if (attStart) { //existing morph attribute, append this one to it
267  attStart += 7;
268  buf = "";
269  buf.appendFormatted("%s:%s ", "robinson", value.c_str());
270  }
271  else { // no lemma attribute
272  attStart = wordStart + 3;
273  buf = "";
274  buf.appendFormatted("morph=\"%s:%s\" ", "robinson", value.c_str());
275  }
276 
277  text.insert(attStart - text.c_str(), buf); //hack, we have to
278  }
279  else { //no existing <w> attribute fond
280  buf = "";
281  buf.appendFormatted("<w morph=\"%s:%s\">", "robinson", value.c_str());
282  text.insert(wordStart - text.c_str(), buf);
283  text += "</w>";
284  lastspace = false;
285 
286  }
287  handled = true;
288  }
289 
290  if (!keepToken) {
291  if (!handled) {
292  SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>");
293 // exit(-1);
294  }
295  if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
296  if (lastspace) {
297  text--;
298  }
299  }
300  if (newText) {
301  textStart = from+1;
302  newText = false;
303  }
304  continue;
305  }
306 
307  // if not a strongs token, keep token in text
308  text.appendFormatted("<%s>", token);
309 
310  if (newText) {
311  textStart = text.c_str() + text.length();
312  newWord = false;
313  }
314  continue;
315  }
316  if (intoken) {
317  if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
318  token[tokpos++] = *from;
319  token[tokpos+2] = 0;
320  }
321  }
322  else {
323  switch (*from) {
324  case '\'':
325  case '\"':
326  case '`':
327 // quoteStack.handleQuote(fromStart, from, &to);
328  text += *from;
329  //from++; //this line removes chars after an apostrophe! Needs fixing.
330  break;
331  default:
332  if (newWord && (*from != ' ')) {
333  wordStart = text.c_str() + text.length();
334  newWord = false;
335 
336  //fix this if required?
337  //memset(to, 0, 10);
338 
339  }
340 
341  if (!suspendTextPassThru) {
342  text += (*from);
343  lastspace = (*from == ' ');
344  }
345  }
346  }
347  }
348 
349  const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key);
350  if (vkey) {
351  SWBuf ref = "";
352  if (vkey->getVerse()) {
353  ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
354  }
355 
356  if (ref.length() > 0) {
357 
358  text = ref + text;
359 
360  if (vkey->getVerse()) {
361  VerseKey *tmp = (VerseKey *)vkey->clone();
362  *tmp = *vkey;
363  tmp->setAutoNormalize(false);
364  tmp->setIntros(true);
365 
366  text += "</verse>";
367 
368  *tmp = MAXVERSE;
369  if (*vkey == *tmp) {
370  tmp->setVerse(0);
371 // sprintf(ref, "\t</div>");
372 // pushString(&to, ref);
373  *tmp = MAXCHAPTER;
374  *tmp = MAXVERSE;
375  if (*vkey == *tmp) {
376  tmp->setChapter(0);
377  tmp->setVerse(0);
378 // sprintf(ref, "\t</div>");
379 // pushString(&to, ref);
380 /*
381  if (!quoteStack.empty()) {
382  SWLog::getSystemLog()->logError("popping unclosed quote at end of book");
383  quoteStack.clear();
384  }
385 */
386  }
387  }
388  delete tmp;
389  }
390 // else if (vkey->Chapter()) {
391 // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
392 // }
393 // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
394  }
395  }
396  return 0;
397 }
398 
399 
401  clear();
402 }
403 
404 
406  while (!quotes.empty()) quotes.pop();
407 }
408 
409 
411  clear();
412 }
413 
414 
415 void QuoteStack::handleQuote(char *buf, char *quotePos, SWBuf &text) {
416 //QuoteInstance(char startChar = '\"', char level = 1, string uniqueID = "", char continueCount = 0) {
417  if (!quotes.empty()) {
418  QuoteInstance last = quotes.top();
419  if (last.startChar == *quotePos) {
420  text += "</quote>";
421  quotes.pop();
422  }
423  else {
424  quotes.push(QuoteInstance(*quotePos, last.level+1));
425  quotes.top().pushStartStream(text);
426  }
427  }
428  else {
429  quotes.push(QuoteInstance(*quotePos));
430  quotes.top().pushStartStream(text);
431  }
432 }
433 
435  text.appendFormatted("<quote level=\"%d\">", level);
436 }
437 
#define SWORD_NAMESPACE_START
Definition: defs.h:39
SWBuf & appendFormatted(const char *format,...)
Definition: swbuf.cpp:81
virtual SWKey * clone() const
Definition: versekey.cpp:278
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
#define MAXVERSE
Definition: versekey.h:43
static SWLog * getSystemLog()
Definition: swlog.cpp:53
virtual void setChapter(int ichapter)
Definition: versekey.cpp:1603
SWText * module
Definition: osis2mod.cpp:105
virtual ~GBFOSIS()
Definition: gbfosis.cpp:40
char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
Definition: gbfosis.cpp:44
static const char * convertToOSIS(const char *inRef, const SWKey *defaultKey)
Definition: versekey.cpp:1887
void insert(unsigned long pos, const char *str, unsigned long start=0, signed long max=-1)
Definition: swbuf.cpp:99
virtual void setIntros(bool val)
Definition: versekey.cpp:1663
return NULL
Definition: regex.c:7953
const char * c_str() const
Definition: swbuf.h:158
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
virtual ~QuoteStack()
Definition: gbfosis.cpp:410
QuoteInstanceStack quotes
Definition: gbfosis.h:52
virtual void setVerse(int iverse)
Definition: versekey.cpp:1622
virtual int getVerse() const
Definition: versekey.cpp:1534
void clear()
Definition: gbfosis.cpp:405
virtual const char * getOSISRef() const
Definition: versekey.cpp:1810
#define SWDYNAMIC_CAST(className, object)
Definition: defs.h:47
GBFOSIS()
Definition: gbfosis.cpp:36
void handleQuote(char *buf, char *quotePos, SWBuf &text)
Definition: gbfosis.cpp:415
void logError(const char *fmt,...) const
Definition: swlog.cpp:87
void pushStartStream(SWBuf &text)
Definition: gbfosis.cpp:434
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
#define MAXCHAPTER
Definition: versekey.h:44
virtual void setAutoNormalize(bool iautonorm)
Definition: versekey.cpp:1648