The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
osisrtf.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * osisrtf.cpp - OSIS to RTF filter
4  *
5  * $Id: osisrtf.cpp 3547 2017-12-10 05:06:48Z scribe $ *
6  *
7  * Copyright 2003-2014 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdlib.h>
24 #include <ctype.h>
25 #include <osisrtf.h>
26 #include <utilxml.h>
27 #include <utilstr.h>
28 #include <versekey.h>
29 #include <swmodule.h>
30 #include <stringmgr.h>
31 #include <stack>
32 
34 
35 namespace {
36  class MyUserData : public BasicFilterUserData {
37  public:
40  bool inXRefNote;
42  std::stack<char *> quoteStack;
43  SWBuf w;
45  MyUserData(const SWModule *module, const SWKey *key);
46  ~MyUserData();
47  };
48 
49 
50  MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
51  inXRefNote = false;
52  isBiblicalText = false;
53  suspendLevel = 0;
54  osisQToTick = true; // default
55  if (module) {
56  version = module->getName();
57  isBiblicalText = (!strcmp(module->getType(), "Biblical Texts"));
58  osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false")));
59  }
60  }
61 
62 
64  // Just in case the quotes are not well formed
65  while (!quoteStack.empty()) {
66  char *tagData = quoteStack.top();
67  quoteStack.pop();
68  delete [] tagData;
69  }
70  }
71 
72  static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
73  static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; }
74 
75 }
76 
77 
79  setTokenStart("<");
80  setTokenEnd(">");
81 
82  setEscapeStart("&");
83  setEscapeEnd(";");
84 
85  setEscapeStringCaseSensitive(true);
86 
87  addEscapeStringSubstitute("amp", "&");
88  addEscapeStringSubstitute("apos", "'");
89  addEscapeStringSubstitute("lt", "<");
90  addEscapeStringSubstitute("gt", ">");
91  addEscapeStringSubstitute("quot", "\"");
92  // addTokenSubstitute("lg", "{\\par}");
93  // addTokenSubstitute("/lg", "{\\par}");
94 
95  setTokenCaseSensitive(true);
96 }
97 
98 
100  return new MyUserData(module, key);
101 }
102 
103 
104 char OSISRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
105 
106  // preprocess text buffer to escape RTF control codes
107  const char *from;
108  SWBuf orig = text;
109  from = orig.c_str();
110  for (text = ""; *from; from++) { //loop to remove extra spaces
111  switch (*from) {
112  case '{':
113  case '}':
114  case '\\':
115  text += "\\";
116  text += *from;
117  break;
118  default:
119  text += *from;
120  }
121  }
122  text += (char)0;
123 
124  SWBasicFilter::processText(text, key, module); //handle tokens as usual
125 
126  orig = text;
127  from = orig.c_str();
128  for (text = ""; *from; from++) { //loop to remove extra spaces
129  if ((strchr(" \t\n\r", *from))) {
130  while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
131  from++;
132  }
133  text += " ";
134  }
135  else {
136  text += *from;
137  }
138  }
139  text += (char)0; // probably not needed, but don't want to remove without investigating (same as above)
140  return 0;
141 }
142 
143 
144 bool OSISRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
145  // manually process if it wasn't a simple substitution
146  MyUserData *u = (MyUserData *)userData;
147  SWBuf scratch;
148  bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token);
149  if (!sub) {
150  XMLTag tag(token);
151 
152  // <w> tag
153  if (!strcmp(tag.getName(), "w")) {
154 
155  // start <w> tag
156  if ((!tag.isEmpty()) && (!tag.isEndTag())) {
157  outText('{', buf, u);
158  u->w = token;
159  }
160 
161  // end or empty <w> tag
162  else {
163  bool endTag = tag.isEndTag();
164  SWBuf lastText;
165  bool show = true; // to handle unplaced article in kjv2003-- temporary till combined
166 
167  if (endTag) {
168  tag = u->w.c_str();
169  lastText = u->lastTextNode.c_str();
170  }
171  else lastText = "stuff";
172 
173  const char *attrib;
174  const char *val;
175  if ((attrib = tag.getAttribute("xlit"))) {
176  val = strchr(attrib, ':');
177  val = (val) ? (val + 1) : attrib;
178  scratch.setFormatted(" {\\fs15 <%s>}", val);
179  outText(scratch.c_str(), buf, u);
180  }
181  if ((attrib = tag.getAttribute("gloss"))) {
182  val = strchr(attrib, ':');
183  val = (val) ? (val + 1) : attrib;
184  scratch.setFormatted(" {\\fs15 <%s>}", val);
185  outText(scratch.c_str(), buf, u);
186  }
187  if ((attrib = tag.getAttribute("lemma"))) {
188  int count = tag.getAttributePartCount("lemma", ' ');
189  int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
190  do {
191  attrib = tag.getAttribute("lemma", i, ' ');
192  if (i < 0) i = 0; // to handle our -1 condition
193  val = strchr(attrib, ':');
194  val = (val) ? (val + 1) : attrib;
195  const char *val2 = val;
196  if ((strchr("GH", *val)) && (isdigit(val[1])))
197  val2++;
198  if ((!strcmp(val2, "3588")) && (lastText.length() < 1))
199  show = false;
200  else {
201  scratch.setFormatted(" {\\cf3 \\sub <%s>}", val2);
202  outText(scratch.c_str(), buf, u);
203  }
204  } while (++i < count);
205  }
206  if ((attrib = tag.getAttribute("morph")) && (show)) {
207  SWBuf savelemma = tag.getAttribute("savlm");
208  if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1))
209  show = false;
210  if (show) {
211  int count = tag.getAttributePartCount("morph", ' ');
212  int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
213  do {
214  attrib = tag.getAttribute("morph", i, ' ');
215  if (i < 0) i = 0; // to handle our -1 condition
216  val = strchr(attrib, ':');
217  val = (val) ? (val + 1) : attrib;
218  const char *val2 = val;
219  if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2])))
220  val2+=2;
221  scratch.setFormatted(" {\\cf4 \\sub (%s)}", val2);
222  outText(scratch.c_str(), buf, u);
223  } while (++i < count);
224  }
225  }
226  if ((attrib = tag.getAttribute("POS"))) {
227  val = strchr(attrib, ':');
228  val = (val) ? (val + 1) : attrib;
229  scratch.setFormatted(" {\\fs15 <%s>}", val);
230  outText(scratch.c_str(), buf, u);
231  }
232 
233  if (endTag)
234  outText('}', buf, u);
235  }
236  }
237 
238  // <note> tag
239  else if (!strcmp(tag.getName(), "note")) {
240  if (!tag.isEndTag()) {
241  if (!tag.isEmpty()) {
242  SWBuf type = tag.getAttribute("type");
243 
244  if ((type != "x-strongsMarkup") // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off
245  && (type != "strongsMarkup") // deprecated
246  ) {
247  SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
248  if (u->vkey) {
249  char ch = ((!strcmp(type.c_str(), "crossReference")) || (!strcmp(type.c_str(), "x-cross-ref"))) ? 'x':'n';
250  scratch.setFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, u->vkey->getVerse(), footnoteNumber.c_str());
251  outText(scratch.c_str(), buf, u);
252  u->inXRefNote = (ch == 'x');
253  }
254  }
255  u->suspendTextPassThru = (++u->suspendLevel);
256  }
257  }
258  if (tag.isEndTag()) {
259  u->suspendTextPassThru = (--u->suspendLevel);
260  u->inXRefNote = false;
261  }
262  }
263 
264  // <p> paragraph and <lg> linegroup tags
265  else if (!strcmp(tag.getName(), "p") || !strcmp(tag.getName(), "lg")) {
266  if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
267  outText("{\\fi200\\par}", buf, u);
268  }
269  else if (tag.isEndTag()) { // end tag
270  outText("{\\par}", buf, u);
271  userData->supressAdjacentWhitespace = true;
272  }
273  else { // empty paragraph break marker
274  outText("{\\pard\\par}", buf, u);
275  userData->supressAdjacentWhitespace = true;
276  }
277  }
278 
279  // Milestoned paragraphs, created by osis2mod
280  // <div type="paragraph" sID.../>
281  // <div type="paragraph" eID.../>
282  else if (tag.isEmpty() && !strcmp(tag.getName(), "div") && tag.getAttribute("type") && (!strcmp(tag.getAttribute("type"), "x-p") || !strcmp(tag.getAttribute("type"), "paragraph"))) {
283  // <div type="paragraph" sID... />
284  if (tag.getAttribute("sID")) { // non-empty start tag
285  outText("{\\fi200\\par}", buf, u);
286  }
287  // <div type="paragraph" eID... />
288  else if (tag.getAttribute("eID")) {
289  outText("{\\par}", buf, u);
290  userData->supressAdjacentWhitespace = true;
291  }
292  }
293 
294  // <reference> tag
295  else if (!strcmp(tag.getName(), "reference")) {
296  if (!u->inXRefNote) { // only show these if we're not in an xref note
297  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
298  outText("{<a href=\"\">", buf, u);
299  }
300  else if (tag.isEndTag()) {
301  outText("</a>}", buf, u);
302  }
303  }
304  }
305 
306  // <l> poetry
307  else if (!strcmp(tag.getName(), "l")) {
308  // end line marker
309  if (tag.getAttribute("eID")) {
310  outText("{\\par}", buf, u);
311  }
312  // <l/> without eID or sID
313  // Note: this is improper osis. This should be <lb/>
314  else if (tag.isEmpty() && !tag.getAttribute("sID")) {
315  outText("{\\par}", buf, u);
316  }
317  // end of the line
318  else if (tag.isEndTag()) {
319  outText("{\\par}", buf, u);
320  }
321  }
322 
323  // <milestone type="line"/> or <lb.../>
324  else if ((!strcmp(tag.getName(), "lb") && (!tag.getAttribute("type") || strcmp(tag.getAttribute("type"), "x-optional"))) || ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line")))) {
325  outText("{\\par}", buf, u);
326  userData->supressAdjacentWhitespace = true;
327  }
328 
329  // <title>
330  else if (!strcmp(tag.getName(), "title")) {
331  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
332  outText("{\\par\\i1\\b1 ", buf, u);
333  }
334  else if (tag.isEndTag()) {
335  outText("\\par}", buf, u);
336  }
337  }
338  // <list> - how do we support these better in RTF?
339  else if (!strcmp(tag.getName(), "list")) {
340  if((!tag.isEndTag()) && (!tag.isEmpty())) {
341  outText("\\par\\pard", buf, u);
342  }
343  else if (tag.isEndTag()) {
344  outText("\\par\\pard", buf, u);
345  }
346  }
347 
348  // <item> - support better
349  else if (!strcmp(tag.getName(), "item")) {
350  if((!tag.isEndTag()) && (!tag.isEmpty())) {
351  outText("* ", buf, u);
352  }
353  else if (tag.isEndTag()) {
354  outText("\\par", buf, u);
355  }
356  }
357 
358  // <catchWord> & <rdg> tags (italicize)
359  else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) {
360  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
361  outText("{\\i1 ", buf, u);
362  }
363  else if (tag.isEndTag()) {
364  outText('}', buf, u);
365  }
366  }
367 
368  // <hi>
369  else if (!strcmp(tag.getName(), "hi")) {
370  SWBuf type = tag.getAttribute("type");
371  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
372  if (type == "bold" || type == "b" || type == "x-b")
373  outText("{\\b1 ", buf, u);
374  else // all other types
375  outText("{\\i1 ", buf, u);
376  }
377  else if (tag.isEndTag()) {
378  outText('}', buf, u);
379  }
380  }
381 
382  // <q> quote
383  // Rules for a quote element:
384  // If the tag is empty with an sID or an eID then use whatever it specifies for quoting.
385  // Note: empty elements without sID or eID are ignored.
386  // If the tag is <q> then use it's specifications and push it onto a stack for </q>
387  // If the tag is </q> then use the pushed <q> for specification
388  // If there is a marker attribute, possibly empty, this overrides osisQToTick.
389  // If osisQToTick, then output the marker, using level to determine the type of mark.
390  else if (!strcmp(tag.getName(), "q")) {
391  SWBuf type = tag.getAttribute("type");
392  SWBuf who = tag.getAttribute("who");
393  const char *tmp = tag.getAttribute("level");
394  int level = (tmp) ? atoi(tmp) : 1;
395  tmp = tag.getAttribute("marker");
396  bool hasMark = tmp;
397  SWBuf mark = tmp;
398 
399  // open <q> or <q sID... />
400  if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) {
401  // if <q> then remember it for the </q>
402  if (!tag.isEmpty()) {
403  char *tagData = 0;
404  stdstr(&tagData, tag.toString());
405  u->quoteStack.push(tagData);
406  }
407 
408  // Do this first so quote marks are included as WoC
409  if (who == "Jesus")
410  outText("\\cf6 ", buf, u);
411 
412  // first check to see if we've been given an explicit mark
413  if (hasMark)
414  outText(mark, buf, u);
415  //alternate " and '
416  else if (u->osisQToTick)
417  outText((level % 2) ? '\"' : '\'', buf, u);
418  }
419  // close </q> or <q eID... />
420  else if ((tag.isEndTag()) || (tag.getAttribute("eID"))) {
421  // if it is </q> then pop the stack for the attributes
422  if (tag.isEndTag() && !u->quoteStack.empty()) {
423  char *tagData = u->quoteStack.top();
424  u->quoteStack.pop();
425  XMLTag qTag(tagData);
426  delete [] tagData;
427 
428  type = qTag.getAttribute("type");
429  who = qTag.getAttribute("who");
430  tmp = qTag.getAttribute("level");
431  level = (tmp) ? atoi(tmp) : 1;
432  tmp = qTag.getAttribute("marker");
433  hasMark = tmp;
434  mark = tmp;
435  }
436 
437  // first check to see if we've been given an explicit mark
438  if (hasMark)
439  outText(mark, buf, u);
440  // finally, alternate " and ', if config says we should supply a mark
441  else if (u->osisQToTick)
442  outText((level % 2) ? '\"' : '\'', buf, u);
443 
444  // Do this last so quote marks are included as WoC
445  if (who == "Jesus")
446  outText("\\cf0 ", buf, u);
447  }
448  }
449 
450 
451  // <milestone type="cQuote" marker="x"/>
452  else if (!strcmp(tag.getName(), "milestone") && tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "cQuote")) {
453  const char *tmp = tag.getAttribute("marker");
454  bool hasMark = tmp;
455  SWBuf mark = tmp;
456  tmp = tag.getAttribute("level");
457  int level = (tmp) ? atoi(tmp) : 1;
458 
459  // first check to see if we've been given an explicit mark
460  if (hasMark)
461  outText(mark, buf, u);
462  // finally, alternate " and ', if config says we should supply a mark
463  else if (u->osisQToTick)
464  outText((level % 2) ? '\"' : '\'', buf, u);
465  }
466 
467  // <transChange>
468  else if (!strcmp(tag.getName(), "transChange")) {
469  SWBuf type = tag.getAttribute("type");
470 
471  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
472 
473 // just do all transChange tags this way for now
474 // if (type == "supplied")
475  outText("{\\i1 ", buf, u);
476  }
477  else if (tag.isEndTag()) {
478  outText('}', buf, u);
479  }
480  }
481 
482  // <divineName>
483  else if (!strcmp(tag.getName(), "divineName")) {
484 
485  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
486  outText("{\\scaps ", buf, u);
487  }
488  else if (tag.isEndTag()) {
489  outText("}", buf, u);
490  }
491  }
492 
493  // <div>
494  else if (!strcmp(tag.getName(), "div")) {
495 
496  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
497  outText("\\pard ", buf, u);
498  }
499  else if (tag.isEndTag()) {
500  outText("\\par ", buf, u);
501  }
502  }
503 
504  // image
505  else if (!strcmp(tag.getName(), "figure")) {
506  const char *src = tag.getAttribute("src");
507  if (!src) // assert we have a src attribute
508  return false;
509 
510  char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)];
511  *filepath = 0;
512  strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath"));
513  strcat(filepath, src);
514 
515 // we do this because BibleCS looks for this EXACT format for an image tag
516  outText("<img src=\"", buf, u);
517  outText(filepath, buf, u);
518  outText("\" />", buf, u);
519 /*
520  char imgc;
521  for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--);
522  c++;
523  FILE* imgfile;
524  if (stricmp(c, "jpg") || stricmp(c, "jpeg")) {
525  imgfile = fopen(filepath, "r");
526  if (imgfile != NULL) {
527  outText("{\\nonshppict {\\pict\\jpegblip ", buf, u);
528  while (feof(imgfile) != EOF) {
529  scratch.setFormatted("%2x", fgetc(imgfile));
530  outText(scratch.c_str(), buf, u);
531 
532  }
533  fclose(imgfile);
534  outText("}}", buf, u);
535  }
536  }
537  else if (stricmp(c, "png")) {
538  outText("{\\*\\shppict {\\pict\\pngblip ", buf, u);
539 
540  outText("}}", buf, u);
541  }
542 */
543  delete [] filepath;
544  }
545  else {
546  return false; // we still didn't handle token
547  }
548  }
549  return true;
550 }
551 
552 
const char * getName() const
Definition: swmodule.cpp:204
#define SWORD_NAMESPACE_START
Definition: defs.h:39
OSISRTF()
Definition: osisrtf.cpp:78
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
const char * getType() const
Definition: swmodule.cpp:232
virtual const char * getConfigEntry(const char *key) const
Definition: swmodule.cpp:1159
const SWModule * module
Definition: swbasicfilter.h:42
const char * getName() const
Definition: utilxml.h:58
SWText * module
Definition: osis2mod.cpp:105
Definition: utilxml.h:38
const char * toString() const
Definition: utilxml.cpp:285
bool isEmpty() const
Definition: utilxml.h:60
SWORD_NAMESPACE_START char * stdstr(char **ipstr, const char *istr, unsigned int memPadFactor=1)
Definition: utilstr.h:44
const char * c_str() const
Definition: swbuf.h:158
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
Definition: osisrtf.cpp:104
static void outText(const char *t, SWBuf &o, BasicFilterUserData *u)
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
Definition: utilxml.cpp:230
bool isEndTag(const char *eID=0) const
Definition: utilxml.cpp:323
#define SWORD_NAMESPACE_END
Definition: defs.h:40
SWBuf & setFormatted(const char *format,...)
Definition: swbuf.cpp:50
Definition: swkey.h:77
virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData)
Definition: osisrtf.cpp:144
std::stack< char * > quoteStack
Definition: osisrtf.cpp:42
int getAttributePartCount(const char *attribName, char partSplit= '|') const
Definition: utilxml.cpp:218
virtual BasicFilterUserData * createUserData(const SWModule *module, const SWKey *key)
Definition: osisrtf.cpp:99