The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
thmlhtmlhref.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * thmlhtmlhref.cpp - ThML to HTML filter with hrefs
4  *
5  * $Id: thmlhtmlhref.cpp 3547 2017-12-10 05:06:48Z scribe $
6  *
7  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdlib.h>
24 #include <thmlhtmlhref.h>
25 #include <swmodule.h>
26 #include <utilxml.h>
27 #include <utilstr.h>
28 #include <versekey.h>
29 #include <url.h>
30 
31 
33 
34 
36  isBiblicalText = false;
37  inSecHead = false;
38  if (module) {
39  version = module->getName();
40  isBiblicalText = (!strcmp(module->getType(), "Biblical Texts"));
41  }
42 }
43 
44 
46  setTokenStart("<");
47  setTokenEnd(">");
48 
49  setEscapeStart("&");
50  setEscapeEnd(";");
51 
54 
55  addAllowedEscapeString("quot");
59 
60  addAllowedEscapeString("nbsp");
61  addAllowedEscapeString("brvbar"); // "Š"
62  addAllowedEscapeString("sect"); // "§"
63  addAllowedEscapeString("copy"); // "©"
64  addAllowedEscapeString("laquo"); // "«"
65  addAllowedEscapeString("reg"); // "®"
66  addAllowedEscapeString("acute"); // "Ž"
67  addAllowedEscapeString("para"); // "¶"
68  addAllowedEscapeString("raquo"); // "»"
69 
70  addAllowedEscapeString("Aacute"); // "Á"
71  addAllowedEscapeString("Agrave"); // "À"
72  addAllowedEscapeString("Acirc"); // "Â"
73  addAllowedEscapeString("Auml"); // "Ä"
74  addAllowedEscapeString("Atilde"); // "Ã"
75  addAllowedEscapeString("Aring"); // "Å"
76  addAllowedEscapeString("aacute"); // "á"
77  addAllowedEscapeString("agrave"); // "à"
78  addAllowedEscapeString("acirc"); // "â"
79  addAllowedEscapeString("auml"); // "ä"
80  addAllowedEscapeString("atilde"); // "ã"
81  addAllowedEscapeString("aring"); // "å"
82  addAllowedEscapeString("Eacute"); // "É"
83  addAllowedEscapeString("Egrave"); // "È"
84  addAllowedEscapeString("Ecirc"); // "Ê"
85  addAllowedEscapeString("Euml"); // "Ë"
86  addAllowedEscapeString("eacute"); // "é"
87  addAllowedEscapeString("egrave"); // "è"
88  addAllowedEscapeString("ecirc"); // "ê"
89  addAllowedEscapeString("euml"); // "ë"
90  addAllowedEscapeString("Iacute"); // "Í"
91  addAllowedEscapeString("Igrave"); // "Ì"
92  addAllowedEscapeString("Icirc"); // "Î"
93  addAllowedEscapeString("Iuml"); // "Ï"
94  addAllowedEscapeString("iacute"); // "í"
95  addAllowedEscapeString("igrave"); // "ì"
96  addAllowedEscapeString("icirc"); // "î"
97  addAllowedEscapeString("iuml"); // "ï"
98  addAllowedEscapeString("Oacute"); // "Ó"
99  addAllowedEscapeString("Ograve"); // "Ò"
100  addAllowedEscapeString("Ocirc"); // "Ô"
101  addAllowedEscapeString("Ouml"); // "Ö"
102  addAllowedEscapeString("Otilde"); // "Õ"
103  addAllowedEscapeString("oacute"); // "ó"
104  addAllowedEscapeString("ograve"); // "ò"
105  addAllowedEscapeString("ocirc"); // "ô"
106  addAllowedEscapeString("ouml"); // "ö"
107  addAllowedEscapeString("otilde"); // "õ"
108  addAllowedEscapeString("Uacute"); // "Ú"
109  addAllowedEscapeString("Ugrave"); // "Ù"
110  addAllowedEscapeString("Ucirc"); // "Û"
111  addAllowedEscapeString("Uuml"); // "Ü"
112  addAllowedEscapeString("uacute"); // "ú"
113  addAllowedEscapeString("ugrave"); // "ù"
114  addAllowedEscapeString("ucirc"); // "û"
115  addAllowedEscapeString("uuml"); // "ü"
116  addAllowedEscapeString("Yacute"); // "Ý"
117  addAllowedEscapeString("yacute"); // "ý"
118  addAllowedEscapeString("yuml"); // "ÿ"
119 
120  addAllowedEscapeString("deg"); // "°"
121  addAllowedEscapeString("plusmn"); // "±"
122  addAllowedEscapeString("sup2"); // "²"
123  addAllowedEscapeString("sup3"); // "³"
124  addAllowedEscapeString("sup1"); // "¹"
125  addAllowedEscapeString("nbsp"); // "º"
126  addAllowedEscapeString("pound"); // "£"
127  addAllowedEscapeString("cent"); // "¢"
128  addAllowedEscapeString("frac14"); // "Œ"
129  addAllowedEscapeString("frac12"); // "œ"
130  addAllowedEscapeString("frac34"); // "Ÿ"
131  addAllowedEscapeString("iquest"); // "¿"
132  addAllowedEscapeString("iexcl"); // "¡"
133  addAllowedEscapeString("ETH"); // "Ð"
134  addAllowedEscapeString("eth"); // "ð"
135  addAllowedEscapeString("THORN"); // "Þ"
136  addAllowedEscapeString("thorn"); // "þ"
137  addAllowedEscapeString("AElig"); // "Æ"
138  addAllowedEscapeString("aelig"); // "æ"
139  addAllowedEscapeString("Oslash"); // "Ø"
140  addAllowedEscapeString("curren"); // "€"
141  addAllowedEscapeString("Ccedil"); // "Ç"
142  addAllowedEscapeString("ccedil"); // "ç"
143  addAllowedEscapeString("szlig"); // "ß"
144  addAllowedEscapeString("Ntilde"); // "Ñ"
145  addAllowedEscapeString("ntilde"); // "ñ"
146  addAllowedEscapeString("yen"); // "¥"
147  addAllowedEscapeString("not"); // "¬"
148  addAllowedEscapeString("ordf"); // "ª"
149  addAllowedEscapeString("uml"); // "š"
150  addAllowedEscapeString("shy"); // "­"
151  addAllowedEscapeString("macr"); // "¯"
152 
153  addAllowedEscapeString("micro"); // "µ"
154  addAllowedEscapeString("middot"); // "·"
155  addAllowedEscapeString("cedil"); // "ž"
156  addAllowedEscapeString("ordm"); // "º"
157  addAllowedEscapeString("times"); // "×"
158  addAllowedEscapeString("divide"); // "÷"
159  addAllowedEscapeString("oslash"); // "ø"
160 
161  setTokenCaseSensitive(true);
162 // addTokenSubstitute("scripture", "<i> ");
163  addTokenSubstitute("/scripture", "</i> ");
164 
165  renderNoteNumbers = false;
166 }
167 
168 
169 bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
170  if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
171  MyUserData *u = (MyUserData *)userData;
172 
173  XMLTag tag(token);
174  if ((!tag.isEndTag()) && (!tag.isEmpty()))
175  u->startTag = tag;
176 
177  if (tag.getName() && !strcmp(tag.getName(), "sync")) {
178  SWBuf value = tag.getAttribute("value");
179  if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
180  if(value.length())
181  buf.appendFormatted("<small><em class=\"morph\">(<a href=\"passagestudy.jsp?action=showMorph&type=Greek&value=%s\" class=\"morph\">%s</a>)</em></small>",
182  URL::encode(value.c_str()).c_str(),
183  value.c_str());
184  }
185  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "lemma")) { //&gt;
186  if(value.length())
187  // empty "type=" is deliberate.
188  buf.appendFormatted("<small><em class=\"strongs\">&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=&value=%s\" class=\"strongs\">%s</a>&gt;</em></small>",
189  URL::encode(value.c_str()).c_str(),
190  value.c_str());
191  }
192  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
193  char ch = *value;
194  value<<1;
195  buf.appendFormatted("<small><em class=\"strongs\">&lt;<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\" class=\"strongs\">",
196  ((ch == 'H') ? "Hebrew" : "Greek"),
197  URL::encode(value.c_str()).c_str());
198  buf += (value.length()) ? value.c_str() : "";
199  buf += "</a>&gt;</em></small>";
200  }
201  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
202  buf += (tag.isEndTag() ? "</b>" : "<b>");
203  }
204 
205  }
206  // <note> tag
207  else if (!strcmp(tag.getName(), "note")) {
208  if (!tag.isEndTag()) {
209  if (!tag.isEmpty()) {
210  SWBuf type = tag.getAttribute("type");
211  SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
212  SWBuf noteName = tag.getAttribute("n");
213  if (u->vkey) {
214  // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
215  char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
216  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>",
217  ch,
218  URL::encode(footnoteNumber.c_str()).c_str(),
219  URL::encode(u->version.c_str()).c_str(),
220  URL::encode(u->vkey->getText()).c_str(),
221  ch,
222  ch,
223  (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : ""));
224  }
225  else {
226  char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
227  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>",
228  ch,
229  URL::encode(footnoteNumber.c_str()).c_str(),
230  URL::encode(u->version.c_str()).c_str(),
231  URL::encode(u->key->getText()).c_str(),
232  ch,
233  ch,
234  (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : ""));
235  }
236  u->suspendTextPassThru = true;
237  }
238  }
239  if (tag.isEndTag()) {
240  u->suspendTextPassThru = false;
241  }
242  }
243  else if (!strcmp(tag.getName(), "scripture")) {
244  buf += (tag.isEndTag() ? "</i>" : "<i>");
245  }
246  // <scripRef> tag
247  else if (!strcmp(tag.getName(), "scripRef")) {
248  if (!tag.isEndTag()) {
249  if (!tag.isEmpty()) {
250  u->suspendTextPassThru = true;
251  }
252  }
253  if (tag.isEndTag()) { // </scripRef>
254  if (!u->isBiblicalText) {
255  SWBuf refList = u->startTag.getAttribute("passage");
256  if (!refList.length())
257  refList = u->lastTextNode;
258  SWBuf version = tag.getAttribute("version");
259 
260  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">",
261  (refList.length()) ? URL::encode(refList.c_str()).c_str() : "",
262  (version.length()) ? URL::encode(version.c_str()).c_str() : "");
263  buf += u->lastTextNode.c_str();
264  buf += "</a>";
265  }
266  else {
267  SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
268  SWBuf noteName = tag.getAttribute("n");
269  if (u->vkey) {
270  // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
271  //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", u->vkey->getText(), footnoteNumber.c_str());
272  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup class=\"x\">*x%s</sup></small></a>",
273  URL::encode(footnoteNumber.c_str()).c_str(),
274  URL::encode(u->version.c_str()).c_str(),
275  URL::encode(u->vkey->getText()).c_str(),
276  (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : ""));
277  }
278  }
279 
280  // let's let text resume to output again
281  u->suspendTextPassThru = false;
282  }
283  }
284  else if (tag.getName() && !strcmp(tag.getName(), "div")) {
285  if (tag.isEndTag() && u->inSecHead) {
286  buf += "</i></b><br />";
287  u->inSecHead = false;
288  }
289  else if (tag.getAttribute("class")) {
290  if (!stricmp(tag.getAttribute("class"), "sechead")) {
291  u->inSecHead = true;
292  buf += "<br /><b><i>";
293  }
294  else if (!stricmp(tag.getAttribute("class"), "title")) {
295  u->inSecHead = true;
296  buf += "<br /><b><i>";
297  }
298  else {
299  buf += tag;
300  }
301  }
302  else {
303  buf += tag;
304  }
305  }
306  else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
307  const char *src = strstr(token, "src");
308  if (!src) // assert we have a src attribute
309  return false;
310 
311  const char *c, *d;
312  if (((c = strchr(src+3, '"')) == NULL) ||
313  ((d = strchr( ++c , '"')) == NULL)) // identify endpoints.
314  return false; // abandon hope.
315 
316  SWBuf imagename = "file:";
317  if (*c == '/') // as below, inside for loop.
318  imagename += userData->module->getConfigEntry("AbsoluteDataPath");
319  while (c != d) // move bits into the name.
320  imagename += *(c++);
321 
322  // images become clickable, if the UI supports showImage.
323  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><",
324  URL::encode(imagename.c_str()).c_str(),
325  URL::encode(u->version.c_str()).c_str());
326 
327  for (c = token; *c; c++) {
328  if ((*c == '/') && (*(c+1) == '\0'))
329  continue;
330  if (c == src) {
331  for (;((*c) && (*c != '"')); c++)
332  buf += *c;
333 
334  if (!*c) { c--; continue; }
335 
336  buf += '"';
337  if (*(c+1) == '/') {
338  buf += "file:";
339  buf += userData->module->getConfigEntry("AbsoluteDataPath");
340  if (buf[buf.length()-2] == '/')
341  c++; // skip '/'
342  }
343  continue;
344  }
345  buf += *c;
346  }
347  buf += " border=0 /></a>";
348  }
349  else {
350  buf += '<';
351  /*for (const char *tok = token; *tok; tok++)
352  buf += *tok;*/
353  buf += token;
354  buf += '>';
355  //return false; // we still didn't handle token
356  }
357  }
358  return true;
359 }
360 
361 
const char * getName() const
Definition: swmodule.cpp:204
#define SWORD_NAMESPACE_START
Definition: defs.h:39
SWBuf & appendFormatted(const char *format,...)
Definition: swbuf.cpp:81
void setTokenEnd(const char *tokenEnd)
void addAllowedEscapeString(const char *findString)
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
const char * getType() const
Definition: swmodule.cpp:232
virtual const char * getConfigEntry(const char *key) const
Definition: swmodule.cpp:1159
const SWModule * module
Definition: swbasicfilter.h:42
const char * getName() const
Definition: utilxml.h:58
SWText * module
Definition: osis2mod.cpp:105
Definition: utilxml.h:38
void setTokenCaseSensitive(bool val)
void setEscapeStart(const char *escStart)
bool isEmpty() const
Definition: utilxml.h:60
bool substituteToken(SWBuf &buf, const char *token)
int stricmp(const char *s1, const char *s2)
Definition: utilstr.cpp:194
const VerseKey * vkey
Definition: swbasicfilter.h:44
virtual const char * getText() const
Definition: versekey.cpp:1242
virtual const char * getText() const
Definition: swkey.cpp:184
void setTokenStart(const char *tokenStart)
return NULL
Definition: regex.c:7953
MyUserData(const SWModule *module, const SWKey *key)
const char * c_str() const
Definition: swbuf.h:158
void setPassThruNumericEscapeString(bool val)
const SWKey * key
Definition: swbasicfilter.h:43
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
Definition: utilxml.cpp:230
bool renderNoteNumbers
Definition: thmlhtmlhref.h:38
void setEscapeStringCaseSensitive(bool val)
bool isEndTag(const char *eID=0) const
Definition: utilxml.cpp:323
void addTokenSubstitute(const char *findString, const char *replaceString)
virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData)
void setEscapeEnd(const char *escEnd)
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
static const SWBuf encode(const char *urlText)
Definition: url.cpp:231