The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
teixhtml.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  *
3  * teixhtml.cpp - TEI to XHTML filter
4  *
5  * $Id: teixhtml.cpp 3807 2020-09-27 12:59:54Z scribe $
6  *
7  * Copyright 2012-2013 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdlib.h>
24 #include <ctype.h>
25 #include <teixhtml.h>
26 #include <utilxml.h>
27 #include <swmodule.h>
28 #include <url.h>
29 #include <iostream>
30 
31 
33 
34 
35 const char *TEIXHTML::getHeader() const {
36  // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def> <tr> <orth> <etym> <usg>
37  const static char *header = "\n\
38  .entryFree, .form, .etym, .def, .usg, .quote {display:block;}\n\
39  .pron, .pos, .oVar, .ref, {display:inline}\n\
40  [type=headword] {font-weight:bold; font-variant:small-caps; text-decoration:underline;}\n\
41  [type=derivative] {font-weight:bold; font-variant:small-caps;}\n\
42  [rend=italic] {font-style:italic;}\n\
43  [rend=bold] {font-weight:bold;}\n\
44  [rend=small-caps] {font-variant:small-caps}\n\
45  .pos:before {content: \"Pos.: \"; font-weight:bold;}\n\
46  .pron:before {content:\" \\\\ \";}\n\
47  .pron:after {content:\" \\\\ \";}\n\
48  .etym:before {content:\"Etym.:\"; display:block; font-weight:bold;}\n\
49  .usg:before {content:\"Usg.:\"; display:block; font-weight:bold;}\n\
50  .def:before {content:\"Def.:\" display:block; font-weight:bold;}\n\
51  .quote {background-color:#cfcfdf; padding:0.3em; margin:0.5em; border-width:1px; border-style:solid;}\n\
52  .cit:before {content:\"quote:\" ; display:block; margin-top:0.5em; font-size:small;}\n\
53  .cit {align:center;}\n\
54  .persName:before {content:\" (\"; font-size:small;}\n\
55  .persName:after {content:\") \"; font-size:small;}\n\
56  .persName {font-size:small;}\n\
57  .number {font-style:bold;}\n\
58  .def {font-style:bold;}\n\
59  ";
60  return header;
61 }
62 
63 
64 
66  isBiblicalText = false;
67  if (module) {
68  version = module->getName();
69  isBiblicalText = (!strcmp(module->getType(), "Biblical Texts"));
70  }
71 }
72 
73 
75  setTokenStart("<");
76  setTokenEnd(">");
77 
78  setEscapeStart("&");
79  setEscapeEnd(";");
80 
82 
83  addAllowedEscapeString("quot");
84  addAllowedEscapeString("apos");
88 
90 
91  renderNoteNumbers = false;
92 }
93 
94 bool TEIXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
95  // manually process if it wasn't a simple substitution
96  if (!substituteToken(buf, token)) {
97  MyUserData *u = (MyUserData *)userData;
98  XMLTag tag(token);
99 
100  if (!strcmp(tag.getName(), "p")) {
101  if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
102  buf += "<!P><br />";
103  }
104  else if (tag.isEndTag()) { // end tag
105  buf += "<!/P><br />";
106  //userData->supressAdjacentWhitespace = true;
107  }
108  else { // empty paragraph break marker
109  buf += "<!P><br />";
110  //userData->supressAdjacentWhitespace = true;
111  }
112  }
113 
114  // <hi>
115  else if (!strcmp(tag.getName(), "hi")) {
116  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
117  SWBuf rend = tag.getAttribute("rend");
118 
119  u->lastHi = rend;
120  if (rend == "italic" || rend == "ital")
121  buf += "<i>";
122  else if (rend == "bold")
123  buf += "<b>";
124  else if (rend == "super" || rend == "sup")
125  buf += "<sup>";
126  else if (rend == "sub")
127  buf += "<sub>";
128  else if (rend == "overline")
129  buf += "<span style=\"text-decoration:overline\">";
130 
131  }
132  else if (tag.isEndTag()) {
133  SWBuf rend = u->lastHi;
134  if (rend == "italic" || rend == "ital")
135  buf += "</i>";
136  else if (rend == "bold")
137  buf += "</b>";
138  else if (rend == "super" || rend == "sup")
139  buf += "</sup>";
140  else if (rend == "sub")
141  buf += "</sub>";
142  else if (rend == "overline")
143  buf += "</span>";
144  }
145  }
146 
147  // <entryFree>
148  else if (!strcmp(tag.getName(), "entryFree")) {
149  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
150  SWBuf n = tag.getAttribute("n");
151  if (n != "") {
152  buf += "<span class=\"entryFree\">";
153  buf += n;
154  buf += "</span>";
155  }
156  }
157  }
158 
159  // <sense>
160  else if (!strcmp(tag.getName(), "sense")) {
161  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
162  SWBuf n = tag.getAttribute("n");
163  buf += "<br/><span class=\"sense";
164  if (n != "") {
165  buf += "\" n=\"";
166  buf += n;
167 
168 
169  }
170  buf += "\">";
171  }
172  else if (tag.isEndTag()) {
173  buf += "</span> ";
174  }
175  }
176 
177  // <div>
178  else if (!strcmp(tag.getName(), "div")) {
179 
180  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
181  buf += "<!P>";
182  }
183  else if (tag.isEndTag()) {
184  }
185  }
186 
187  // <lb.../>
188  else if (!strcmp(tag.getName(), "lb")) {
189  buf += "<br />";
190  }
191 
192  // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def> <tr> <orth> <etym> <usg>
193  else if (!strcmp(tag.getName(), "pos") ||
194  !strcmp(tag.getName(), "gen") ||
195  !strcmp(tag.getName(), "case") ||
196  !strcmp(tag.getName(), "gram") ||
197  !strcmp(tag.getName(), "number") ||
198  !strcmp(tag.getName(), "pron") ||
199  !strcmp(tag.getName(), "def") ||
200  !strcmp(tag.getName(), "tr") ||
201  !strcmp(tag.getName(), "orth") ||
202  !strcmp(tag.getName(), "etym") ||
203  !strcmp(tag.getName(), "usg") ||
204  !strcmp(tag.getName(), "quote")||
205  !strcmp(tag.getName(), "cit")||
206  !strcmp(tag.getName(), "persName")||
207  !strcmp(tag.getName(), "oVar"))
208  {
209  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
210  buf += "<span class=\"";
211  buf += tag.getName();
212  if (tag.getAttribute("type")) {
213  buf += "\" type =\"";
214  buf += tag.getAttribute("type");
215  }
216  if (tag.getAttribute("rend")) {
217  buf += "\" rend =\"";
218  buf += tag.getAttribute("rend");
219  }
220  buf += "\">";
221  }
222  else if (tag.isEndTag()) {
223  buf += "</span>";
224  }
225  }
226 
227  else if (!strcmp(tag.getName(), "ref")) {
228  if (!tag.isEndTag()) {
229  u->suspendTextPassThru = true;
230  SWBuf target;
231  SWBuf work;
232  SWBuf ref;
233 
234  int was_osisref = false;
235  if(tag.getAttribute("osisRef"))
236  {
237  target += tag.getAttribute("osisRef");
238  was_osisref=true;
239  }
240  else if(tag.getAttribute("target"))
241  target += tag.getAttribute("target");
242 
243  if(target.size())
244  {
245  const char* the_ref = strchr(target, ':');
246 
247  if(!the_ref) {
248  // No work
249  ref = target;
250  }
251  else {
252  // Compensate for starting :
253  ref = the_ref + 1;
254 
255  int size = target.size() - ref.size() - 1;
256  work.setSize(size);
257  strncpy(work.getRawData(), target, size);
258  }
259 
260  if(was_osisref)
261  {
262  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">",
263  (ref) ? URL::encode(ref.c_str()).c_str() : "",
264  (work.size()) ? URL::encode(work.c_str()).c_str() : "");
265  }
266  else
267  {
268  // Dictionary link, or something
269  buf.appendFormatted("<a href=\"sword://%s/%s\">",
270  (work.size()) ? URL::encode(work.c_str()).c_str() : u->version.c_str(),
271  (ref) ? URL::encode(ref.c_str()).c_str() : ""
272  );
273  }
274  }
275  else
276  {
277  //std::cout << "TARGET WASN'T\n";
278  }
279 
280  }
281  else {
282  buf += u->lastTextNode.c_str();
283  buf += "</a>";
284 
285  u->suspendTextPassThru = false;
286  }
287  }
288 
289  // <note> tag
290  else if (!strcmp(tag.getName(), "note")) {
291  if (!tag.isEndTag()) {
292  if (!tag.isEmpty()) {
293  u->suspendTextPassThru = true;
294  }
295  }
296  // how does any of this work??? If isEndTag is true, </note>, there will be no attributes.
297  if (tag.isEndTag()) {
298  SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
299  SWBuf noteName = tag.getAttribute("n");
300 
301  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup class=\"n\">*n%s</sup></small></a>",
302  URL::encode(footnoteNumber.c_str()).c_str(),
303  URL::encode(u->version.c_str()).c_str(),
304  URL::encode(u->key->getText()).c_str(),
305  (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : ""));
306 
307  u->suspendTextPassThru = false;
308  }
309  }
310  // <graphic> image tag
311  else if (!strcmp(tag.getName(), "graphic")) {
312  const char *url = tag.getAttribute("url");
313  if (url) { // assert we have a url attribute
314  SWBuf filepath;
315  if (userData->module) {
316  filepath = userData->module->getConfigEntry("AbsoluteDataPath");
317  if ((filepath.size()) && (filepath[filepath.size()-1] != '/') && (url[0] != '/'))
318  filepath += '/';
319  }
320  filepath += url;
321  buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><img src=\"file:%s\" border=\"0\" /></a>",
322  URL::encode(filepath.c_str()).c_str(),
323  URL::encode(u->version.c_str()).c_str(),
324  filepath.c_str());
325  u->suspendTextPassThru = false;
326  }
327  }
328  // <table> <row> <cell>
329  else if (!strcmp(tag.getName(), "table")) {
330  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
331  buf += "<table><tbody>\n";
332  }
333  else if (tag.isEndTag()) {
334  buf += "</tbody></table>\n";
335  u->supressAdjacentWhitespace = true;
336  }
337  }
338  else if (!strcmp(tag.getName(), "row")) {
339  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
340  buf += "\t<tr>";
341  }
342  else if (tag.isEndTag()) {
343  buf += "</tr>\n";
344  }
345  }
346  else if (!strcmp(tag.getName(), "cell")) {
347  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
348  buf += "<td>";
349  }
350  else if (tag.isEndTag()) {
351  buf += "</td>";
352  }
353  }
354  // <list> <item>
355  else if (!strcmp(tag.getName(), "list")) {
356  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
357 
358  SWBuf rend = tag.getAttribute("rend");
359 
360  u->lastHi = rend;
361  if (rend == "numbered") {
362  buf += "<ol>\n";
363  }
364  else if (rend == "lettered") {
365  buf += "<ol type=\"A\">\n";
366  }
367  else if (rend == "bulleted") {
368  buf += "<ul>\n";
369  }
370  else {
371  buf += "<ul class=\"list ";
372  buf += rend.c_str();
373  buf += "\">";
374  }
375  }
376  else if (tag.isEndTag()) {
377  SWBuf rend = u->lastHi;
378  if (rend == "numbered") {
379  buf += "</ol>\n>";
380  }
381  else if (rend == "lettered") {
382  buf += "</ol>\n";
383  }
384  else if (rend == "bulleted") {
385  buf += "</ul>\n";
386  }
387  else {
388  buf += "</ul>\n";
389  }
390  u->supressAdjacentWhitespace = true;
391  }
392  }
393  else if (!strcmp(tag.getName(), "item")) {
394  if ((!tag.isEndTag()) && (!tag.isEmpty())) {
395  buf += "<li>";
396  }
397  else if (tag.isEndTag()) {
398  buf += "</li>\n";
399  }
400  }
401  else {
402  return false; // we still didn't handle token
403  }
404 
405 
406  }
407  return true;
408 }
409 
410 
412 
const char * getName() const
Definition: swmodule.cpp:204
#define SWORD_NAMESPACE_START
Definition: defs.h:39
SWBuf & appendFormatted(const char *format,...)
Definition: swbuf.cpp:81
TEIXHTML()
Definition: teixhtml.cpp:74
void setTokenEnd(const char *tokenEnd)
void addAllowedEscapeString(const char *findString)
Definition: swbuf.h:47
const char * getType() const
Definition: swmodule.cpp:232
virtual const char * getConfigEntry(const char *key) const
Definition: swmodule.cpp:1159
MyUserData(const SWModule *module, const SWKey *key)
Definition: teixhtml.cpp:65
const SWModule * module
Definition: swbasicfilter.h:42
const char * getName() const
Definition: utilxml.h:58
SWText * module
Definition: osis2mod.cpp:105
Definition: utilxml.h:38
bool renderNoteNumbers
Definition: teixhtml.h:36
void setTokenCaseSensitive(bool val)
void setEscapeStart(const char *escStart)
bool isEmpty() const
Definition: utilxml.h:60
bool substituteToken(SWBuf &buf, const char *token)
virtual const char * getText() const
Definition: swkey.cpp:184
void setTokenStart(const char *tokenStart)
char * getRawData()
Definition: swbuf.h:379
const char * c_str() const
Definition: swbuf.h:158
virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData)
Definition: teixhtml.cpp:94
virtual const char * getHeader() const
Definition: teixhtml.cpp:35
unsigned long size() const
Definition: swbuf.h:185
const SWKey * key
Definition: swbasicfilter.h:43
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
Definition: utilxml.cpp:230
int size
Definition: regex.c:5043
void setEscapeStringCaseSensitive(bool val)
bool isEndTag(const char *eID=0) const
Definition: utilxml.cpp:323
void setEscapeEnd(const char *escEnd)
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
static const SWBuf encode(const char *urlText)
Definition: url.cpp:231
void setSize(unsigned long len)
Definition: swbuf.h:255