The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
thmlxhtml.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * thmlxhtml.cpp - ThML to classed XHTML
4  *
5  * $Id: thmlxhtml.cpp 3726 2020-04-26 17:53:51Z scribe $
6  *
7  * Copyright 2011-2013 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdlib.h>
24 #include <thmlxhtml.h>
25 #include <swmodule.h>
26 #include <utilxml.h>
27 #include <utilstr.h>
28 #include <versekey.h>
29 #include <url.h>
30 
32 
33 
34 const char *ThMLXHTML::getHeader() const {
35  return "\
36  ";
37 }
38 
39 
41  isBiblicalText = false;
42  secHeadLevel = 0;
43  if (module) {
44  version = module->getName();
45  isBiblicalText = (!strcmp(module->getType(), "Biblical Texts"));
46  }
47 }
48 
49 
51  setTokenStart("<");
52  setTokenEnd(">");
53 
54  setEscapeStart("&");
55  setEscapeEnd(";");
56 
59 
60  addAllowedEscapeString("quot");
64 
65  addAllowedEscapeString("nbsp");
66  addAllowedEscapeString("brvbar"); // "Š"
67  addAllowedEscapeString("sect"); // "§"
68  addAllowedEscapeString("copy"); // "©"
69  addAllowedEscapeString("laquo"); // "«"
70  addAllowedEscapeString("reg"); // "®"
71  addAllowedEscapeString("acute"); // "Ž"
72  addAllowedEscapeString("para"); // "¶"
73  addAllowedEscapeString("raquo"); // "»"
74 
75  addAllowedEscapeString("Aacute"); // "Á"
76  addAllowedEscapeString("Agrave"); // "À"
77  addAllowedEscapeString("Acirc"); // "Â"
78  addAllowedEscapeString("Auml"); // "Ä"
79  addAllowedEscapeString("Atilde"); // "Ã"
80  addAllowedEscapeString("Aring"); // "Å"
81  addAllowedEscapeString("aacute"); // "á"
82  addAllowedEscapeString("agrave"); // "à"
83  addAllowedEscapeString("acirc"); // "â"
84  addAllowedEscapeString("auml"); // "ä"
85  addAllowedEscapeString("atilde"); // "ã"
86  addAllowedEscapeString("aring"); // "å"
87  addAllowedEscapeString("Eacute"); // "É"
88  addAllowedEscapeString("Egrave"); // "È"
89  addAllowedEscapeString("Ecirc"); // "Ê"
90  addAllowedEscapeString("Euml"); // "Ë"
91  addAllowedEscapeString("eacute"); // "é"
92  addAllowedEscapeString("egrave"); // "è"
93  addAllowedEscapeString("ecirc"); // "ê"
94  addAllowedEscapeString("euml"); // "ë"
95  addAllowedEscapeString("Iacute"); // "Í"
96  addAllowedEscapeString("Igrave"); // "Ì"
97  addAllowedEscapeString("Icirc"); // "Î"
98  addAllowedEscapeString("Iuml"); // "Ï"
99  addAllowedEscapeString("iacute"); // "í"
100  addAllowedEscapeString("igrave"); // "ì"
101  addAllowedEscapeString("icirc"); // "î"
102  addAllowedEscapeString("iuml"); // "ï"
103  addAllowedEscapeString("Oacute"); // "Ó"
104  addAllowedEscapeString("Ograve"); // "Ò"
105  addAllowedEscapeString("Ocirc"); // "Ô"
106  addAllowedEscapeString("Ouml"); // "Ö"
107  addAllowedEscapeString("Otilde"); // "Õ"
108  addAllowedEscapeString("oacute"); // "ó"
109  addAllowedEscapeString("ograve"); // "ò"
110  addAllowedEscapeString("ocirc"); // "ô"
111  addAllowedEscapeString("ouml"); // "ö"
112  addAllowedEscapeString("otilde"); // "õ"
113  addAllowedEscapeString("Uacute"); // "Ú"
114  addAllowedEscapeString("Ugrave"); // "Ù"
115  addAllowedEscapeString("Ucirc"); // "Û"
116  addAllowedEscapeString("Uuml"); // "Ü"
117  addAllowedEscapeString("uacute"); // "ú"
118  addAllowedEscapeString("ugrave"); // "ù"
119  addAllowedEscapeString("ucirc"); // "û"
120  addAllowedEscapeString("uuml"); // "ü"
121  addAllowedEscapeString("Yacute"); // "Ý"
122  addAllowedEscapeString("yacute"); // "ý"
123  addAllowedEscapeString("yuml"); // "ÿ"
124 
125  addAllowedEscapeString("deg"); // "°"
126  addAllowedEscapeString("plusmn"); // "±"
127  addAllowedEscapeString("sup2"); // "²"
128  addAllowedEscapeString("sup3"); // "³"
129  addAllowedEscapeString("sup1"); // "¹"
130  addAllowedEscapeString("nbsp"); // "º"
131  addAllowedEscapeString("pound"); // "£"
132  addAllowedEscapeString("cent"); // "¢"
133  addAllowedEscapeString("frac14"); // "Œ"
134  addAllowedEscapeString("frac12"); // "œ"
135  addAllowedEscapeString("frac34"); // "Ÿ"
136  addAllowedEscapeString("iquest"); // "¿"
137  addAllowedEscapeString("iexcl"); // "¡"
138  addAllowedEscapeString("ETH"); // "Ð"
139  addAllowedEscapeString("eth"); // "ð"
140  addAllowedEscapeString("THORN"); // "Þ"
141  addAllowedEscapeString("thorn"); // "þ"
142  addAllowedEscapeString("AElig"); // "Æ"
143  addAllowedEscapeString("aelig"); // "æ"
144  addAllowedEscapeString("Oslash"); // "Ø"
145  addAllowedEscapeString("curren"); // "€"
146  addAllowedEscapeString("Ccedil"); // "Ç"
147  addAllowedEscapeString("ccedil"); // "ç"
148  addAllowedEscapeString("szlig"); // "ß"
149  addAllowedEscapeString("Ntilde"); // "Ñ"
150  addAllowedEscapeString("ntilde"); // "ñ"
151  addAllowedEscapeString("yen"); // "¥"
152  addAllowedEscapeString("not"); // "¬"
153  addAllowedEscapeString("ordf"); // "ª"
154  addAllowedEscapeString("uml"); // "š"
155  addAllowedEscapeString("shy"); // "­"
156  addAllowedEscapeString("macr"); // "¯"
157 
158  addAllowedEscapeString("micro"); // "µ"
159  addAllowedEscapeString("middot"); // "·"
160  addAllowedEscapeString("cedil"); // "ž"
161  addAllowedEscapeString("ordm"); // "º"
162  addAllowedEscapeString("times"); // "×"
163  addAllowedEscapeString("divide"); // "÷"
164  addAllowedEscapeString("oslash"); // "ø"
165 
166  setTokenCaseSensitive(true);
167 // addTokenSubstitute("scripture", "<i> ");
168  addTokenSubstitute("/scripture", "</i> ");
169 
170  renderNoteNumbers = false;
171 }
172 
173 
174 bool ThMLXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
175  if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
176  MyUserData *u = (MyUserData *)userData;
177 
178  XMLTag tag(token);
179  if ((!tag.isEndTag()) && (!tag.isEmpty()))
180  u->startTag = tag;
181 
182  if (tag.getName() && !strcmp(tag.getName(), "sync")) {
183  SWBuf value = tag.getAttribute("value");
184  if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
185  if(value.length())
186  buf.appendFormatted("<small><em class=\"morph\">(<a href=\"passagestudy.jsp?action=showMorph&type=Greek&value=%s\" class=\"morph\">%s</a>)</em></small>",
187  URL::encode(value.c_str()).c_str(),
188  value.c_str());
189  }
190  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "lemma")) { //&gt;
191  if(value.length())
192  // empty "type=" is deliberate.
193  buf.appendFormatted("<small><em class=\"strongs\">&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=&value=%s\" class=\"strongs\">%s</a>&gt;</em></small>",
194  URL::encode(value.c_str()).c_str(),
195  value.c_str());
196  }
197  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
198  char ch = *value;
199  value<<1;
200  buf.appendFormatted("<small><em class=\"strongs\">&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\" class=\"strongs\">",
201  ((ch == 'H') ? "Hebrew" : "Greek"),
202  URL::encode(value.c_str()).c_str());
203  buf += (value.length()) ? value.c_str() : "";
204  buf += "</a>&gt;</em></small>";
205  }
206  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
207  buf += (tag.isEndTag() ? "</b>" : "<b>");
208  }
209 
210  }
211  // <note> tag
212  else if (!strcmp(tag.getName(), "note")) {
213  if (!tag.isEndTag()) {
214  SWBuf type = tag.getAttribute("type");
215 
216  // for backward compatibility
217  if (type == "x-cross-ref") type = "crossReference";
218 
219  SWBuf subType = tag.getAttribute("subType");
220  SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
221  SWBuf noteName = tag.getAttribute("n");
222  SWBuf classExtras = "";
223 
224  if (type.size()) {
225  classExtras.append(" ").append(type);
226  }
227  if (subType.size()) {
228  classExtras.append(" ").append(subType);
229  }
230  if (!tag.isEmpty()) {
231  if (u->vkey) {
232  // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
233  char ch = (type == "crossReference" ? 'x':'n');
234  buf.appendFormatted("<a class=\"noteMarker%s\" href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>",
235  classExtras.c_str(),
236  ch,
237  URL::encode(footnoteNumber.c_str()).c_str(),
238  URL::encode(u->version.c_str()).c_str(),
239  URL::encode(u->vkey->getText()).c_str(),
240  ch,
241  ch,
242  (renderNoteNumbers ? noteName.c_str() : ""));
243  }
244  else {
245  char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
246  buf.appendFormatted("<a class=\"noteMarker%s\" href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>",
247  classExtras.c_str(),
248  ch,
249  URL::encode(footnoteNumber.c_str()).c_str(),
250  URL::encode(u->version.c_str()).c_str(),
251  URL::encode(u->key->getText()).c_str(),
252  ch,
253  ch,
254  (renderNoteNumbers ? noteName.c_str() : ""));
255  }
256  u->suspendTextPassThru = true;
257  }
258  }
259  if (tag.isEndTag()) {
260  u->suspendTextPassThru = false;
261  }
262  }
263  else if (!strcmp(tag.getName(), "scripture")) {
264  buf += (tag.isEndTag() ? "</i>" : "<i>");
265  }
266  // <scripRef> tag
267  else if (!strcmp(tag.getName(), "scripRef")) {
268  if (!tag.isEndTag()) {
269  if (!tag.isEmpty()) {
270  u->suspendTextPassThru = true;
271  }
272  }
273  if (tag.isEndTag()) { // </scripRef>
274  if (!u->isBiblicalText) {
275  SWBuf refList = u->startTag.getAttribute("passage");
276  if (!refList.length())
277  refList = u->lastTextNode;
278  SWBuf version = tag.getAttribute("version");
279 
280  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">",
281  (refList.length()) ? URL::encode(refList.c_str()).c_str() : "",
282  (version.length()) ? URL::encode(version.c_str()).c_str() : "");
283  buf += u->lastTextNode.c_str();
284  buf += "</a>";
285  }
286  else {
287  SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
288  SWBuf noteName = tag.getAttribute("n");
289  if (u->vkey) {
290  // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
291  //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", u->vkey->getText(), footnoteNumber.c_str());
292  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup class=\"x\">*x%s</sup></small></a>",
293  URL::encode(footnoteNumber.c_str()).c_str(),
294  URL::encode(u->version.c_str()).c_str(),
295  URL::encode(u->vkey->getText()).c_str(),
296  (renderNoteNumbers ? noteName.c_str() : ""));
297  }
298  }
299 
300  // let's let text resume to output again
301  u->suspendTextPassThru = false;
302  }
303  }
304  else if (tag.getName() && !strcmp(tag.getName(), "div")) {
305  if (tag.isEndTag() && u->secHeadLevel) {
306  buf += "</h";
307  buf += u->secHeadLevel;
308  buf += ">";
309  u->secHeadLevel = 0;
310  }
311  else if (tag.getAttribute("class")) {
312  if (!stricmp(tag.getAttribute("class"), "sechead")) {
313  u->secHeadLevel = '3';
314  buf += "<h3>";
315  }
316  else if (!stricmp(tag.getAttribute("class"), "title")) {
317  u->secHeadLevel = '2';
318  buf += "<h2>";
319  }
320  else {
321  buf += tag;
322  }
323  }
324  else {
325  buf += tag;
326  }
327  }
328  else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
329  const char *src = strstr(token, "src");
330  if (!src) // assert we have a src attribute
331  return false;
332 
333  const char *c, *d;
334  if (((c = strchr(src+3, '"')) == NULL) ||
335  ((d = strchr( ++c , '"')) == NULL)) // identify endpoints.
336  return false; // abandon hope.
337 
338  SWBuf imagename = "file:";
339  if (*c == '/') // as below, inside for loop.
340  imagename += userData->module->getConfigEntry("AbsoluteDataPath");
341  while (c != d) // move bits into the name.
342  imagename += *(c++);
343 
344  // images become clickable, if the UI supports showImage.
345  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><",
346  URL::encode(imagename.c_str()).c_str(),
347  URL::encode(u->version.c_str()).c_str());
348 
349  for (c = token; *c; c++) {
350  if ((*c == '/') && (*(c+1) == '\0'))
351  continue;
352  if (c == src) {
353  for (;((*c) && (*c != '"')); c++)
354  buf += *c;
355 
356  if (!*c) { c--; continue; }
357 
358  buf += '"';
359  if (*(c+1) == '/') {
360  buf += "file:";
361  buf += userData->module->getConfigEntry("AbsoluteDataPath");
362  if (buf[buf.length()-2] == '/')
363  c++; // skip '/'
364  }
365  continue;
366  }
367  buf += *c;
368  }
369  buf += " border=0 /></a>";
370  }
371  else {
372  buf += '<';
373  /*for (const char *tok = token; *tok; tok++)
374  buf += *tok;*/
375  buf += token;
376  buf += '>';
377  //return false; // we still didn't handle token
378  }
379  }
380  return true;
381 }
382 
383 
const char * getName() const
Definition: swmodule.cpp:204
#define SWORD_NAMESPACE_START
Definition: defs.h:39
SWBuf & appendFormatted(const char *format,...)
Definition: swbuf.cpp:81
void setTokenEnd(const char *tokenEnd)
void addAllowedEscapeString(const char *findString)
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
const char * getType() const
Definition: swmodule.cpp:232
virtual const char * getConfigEntry(const char *key) const
Definition: swmodule.cpp:1159
virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData)
Definition: thmlxhtml.cpp:174
virtual const char * getHeader() const
Definition: thmlxhtml.cpp:34
const SWModule * module
Definition: swbasicfilter.h:42
const char * getName() const
Definition: utilxml.h:58
SWText * module
Definition: osis2mod.cpp:105
Definition: utilxml.h:38
void setTokenCaseSensitive(bool val)
void setEscapeStart(const char *escStart)
bool isEmpty() const
Definition: utilxml.h:60
bool substituteToken(SWBuf &buf, const char *token)
int stricmp(const char *s1, const char *s2)
Definition: utilstr.cpp:194
const VerseKey * vkey
Definition: swbasicfilter.h:44
virtual const char * getText() const
Definition: versekey.cpp:1242
virtual const char * getText() const
Definition: swkey.cpp:184
void setTokenStart(const char *tokenStart)
return NULL
Definition: regex.c:7953
const char * c_str() const
Definition: swbuf.h:158
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
void setPassThruNumericEscapeString(bool val)
unsigned long size() const
Definition: swbuf.h:185
const SWKey * key
Definition: swbasicfilter.h:43
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
Definition: utilxml.cpp:230
bool renderNoteNumbers
Definition: thmlxhtml.h:38
void setEscapeStringCaseSensitive(bool val)
bool isEndTag(const char *eID=0) const
Definition: utilxml.cpp:323
void addTokenSubstitute(const char *findString, const char *replaceString)
MyUserData(const SWModule *module, const SWKey *key)
Definition: thmlxhtml.cpp:40
void setEscapeEnd(const char *escEnd)
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
static const SWBuf encode(const char *urlText)
Definition: url.cpp:231