The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
thmllatex.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * thmllatex.cpp - ThML to classed LaTeX
4  *
5  * $Id: thmllatex.cpp 3547 2017-12-10 05:06:48Z scribe $
6  *
7  * Copyright 2011-2014 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdlib.h>
24 #include <thmllatex.h>
25 #include <swmodule.h>
26 #include <utilxml.h>
27 #include <utilstr.h>
28 #include <versekey.h>
29 #include <url.h>
30 
32 
33 
34 const char *ThMLLaTeX::getHeader() const {
35  return "\
36  ";
37 }
38 
39 
41  isBiblicalText = false;
42  inSecHead = false;
43  if (module) {
44  version = module->getName();
45  isBiblicalText = (!strcmp(module->getType(), "Biblical Texts"));
46  }
47 }
48 
49 
51  setTokenStart("<");
52  setTokenEnd(">");
53 
54  setEscapeStart("&");
55  setEscapeEnd(";");
56 
59 
60  addAllowedEscapeString("quot");
64 
65  addAllowedEscapeString("nbsp");
66  addAllowedEscapeString("brvbar"); // "Š"
67  addAllowedEscapeString("sect"); // "§"
68  addAllowedEscapeString("copy"); // "©"
69  addAllowedEscapeString("laquo"); // "«"
70  addAllowedEscapeString("reg"); // "®"
71  addAllowedEscapeString("acute"); // "Ž"
72  addAllowedEscapeString("para"); // "¶"
73  addAllowedEscapeString("raquo"); // "»"
74 
75  addAllowedEscapeString("Aacute"); // "Á"
76  addAllowedEscapeString("Agrave"); // "À"
77  addAllowedEscapeString("Acirc"); // "Â"
78  addAllowedEscapeString("Auml"); // "Ä"
79  addAllowedEscapeString("Atilde"); // "Ã"
80  addAllowedEscapeString("Aring"); // "Å"
81  addAllowedEscapeString("aacute"); // "á"
82  addAllowedEscapeString("agrave"); // "à"
83  addAllowedEscapeString("acirc"); // "â"
84  addAllowedEscapeString("auml"); // "ä"
85  addAllowedEscapeString("atilde"); // "ã"
86  addAllowedEscapeString("aring"); // "å"
87  addAllowedEscapeString("Eacute"); // "É"
88  addAllowedEscapeString("Egrave"); // "È"
89  addAllowedEscapeString("Ecirc"); // "Ê"
90  addAllowedEscapeString("Euml"); // "Ë"
91  addAllowedEscapeString("eacute"); // "é"
92  addAllowedEscapeString("egrave"); // "è"
93  addAllowedEscapeString("ecirc"); // "ê"
94  addAllowedEscapeString("euml"); // "ë"
95  addAllowedEscapeString("Iacute"); // "Í"
96  addAllowedEscapeString("Igrave"); // "Ì"
97  addAllowedEscapeString("Icirc"); // "Î"
98  addAllowedEscapeString("Iuml"); // "Ï"
99  addAllowedEscapeString("iacute"); // "í"
100  addAllowedEscapeString("igrave"); // "ì"
101  addAllowedEscapeString("icirc"); // "î"
102  addAllowedEscapeString("iuml"); // "ï"
103  addAllowedEscapeString("Oacute"); // "Ó"
104  addAllowedEscapeString("Ograve"); // "Ò"
105  addAllowedEscapeString("Ocirc"); // "Ô"
106  addAllowedEscapeString("Ouml"); // "Ö"
107  addAllowedEscapeString("Otilde"); // "Õ"
108  addAllowedEscapeString("oacute"); // "ó"
109  addAllowedEscapeString("ograve"); // "ò"
110  addAllowedEscapeString("ocirc"); // "ô"
111  addAllowedEscapeString("ouml"); // "ö"
112  addAllowedEscapeString("otilde"); // "õ"
113  addAllowedEscapeString("Uacute"); // "Ú"
114  addAllowedEscapeString("Ugrave"); // "Ù"
115  addAllowedEscapeString("Ucirc"); // "Û"
116  addAllowedEscapeString("Uuml"); // "Ü"
117  addAllowedEscapeString("uacute"); // "ú"
118  addAllowedEscapeString("ugrave"); // "ù"
119  addAllowedEscapeString("ucirc"); // "û"
120  addAllowedEscapeString("uuml"); // "ü"
121  addAllowedEscapeString("Yacute"); // "Ý"
122  addAllowedEscapeString("yacute"); // "ý"
123  addAllowedEscapeString("yuml"); // "ÿ"
124 
125  addAllowedEscapeString("deg"); // "°"
126  addAllowedEscapeString("plusmn"); // "±"
127  addAllowedEscapeString("sup2"); // "²"
128  addAllowedEscapeString("sup3"); // "³"
129  addAllowedEscapeString("sup1"); // "¹"
130  addAllowedEscapeString("nbsp"); // "º"
131  addAllowedEscapeString("pound"); // "£"
132  addAllowedEscapeString("cent"); // "¢"
133  addAllowedEscapeString("frac14"); // "Œ"
134  addAllowedEscapeString("frac12"); // "œ"
135  addAllowedEscapeString("frac34"); // "Ÿ"
136  addAllowedEscapeString("iquest"); // "¿"
137  addAllowedEscapeString("iexcl"); // "¡"
138  addAllowedEscapeString("ETH"); // "Ð"
139  addAllowedEscapeString("eth"); // "ð"
140  addAllowedEscapeString("THORN"); // "Þ"
141  addAllowedEscapeString("thorn"); // "þ"
142  addAllowedEscapeString("AElig"); // "Æ"
143  addAllowedEscapeString("aelig"); // "æ"
144  addAllowedEscapeString("Oslash"); // "Ø"
145  addAllowedEscapeString("curren"); // "€"
146  addAllowedEscapeString("Ccedil"); // "Ç"
147  addAllowedEscapeString("ccedil"); // "ç"
148  addAllowedEscapeString("szlig"); // "ß"
149  addAllowedEscapeString("Ntilde"); // "Ñ"
150  addAllowedEscapeString("ntilde"); // "ñ"
151  addAllowedEscapeString("yen"); // "¥"
152  addAllowedEscapeString("not"); // "¬"
153  addAllowedEscapeString("ordf"); // "ª"
154  addAllowedEscapeString("uml"); // "š"
155  addAllowedEscapeString("shy"); // "­"
156  addAllowedEscapeString("macr"); // "¯"
157 
158  addAllowedEscapeString("micro"); // "µ"
159  addAllowedEscapeString("middot"); // "·"
160  addAllowedEscapeString("cedil"); // "ž"
161  addAllowedEscapeString("ordm"); // "º"
162  addAllowedEscapeString("times"); // "×"
163  addAllowedEscapeString("divide"); // "÷"
164  addAllowedEscapeString("oslash"); // "ø"
165 
166  setTokenCaseSensitive(true);
167  addTokenSubstitute("scripture", " \\swordquote{ ");
168  addTokenSubstitute("/scripture", "}");
169 
170  renderNoteNumbers = false;
171 }
172 
173 
174 bool ThMLLaTeX::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
175  if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution
176  MyUserData *u = (MyUserData *)userData;
177 
178  XMLTag tag(token);
179  if ((!tag.isEndTag()) && (!tag.isEmpty()))
180  u->startTag = tag;
181 
182  if (tag.getName() && !strcmp(tag.getName(), "sync")) {
183  SWBuf value = tag.getAttribute("value");
184  if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //&gt;
185  if (value.length())
186  buf.appendFormatted("\\swordmorph[Greek]{%s}",
187  value.c_str());
188  }
189  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "lemma")) { //&gt;
190  if (value.length())
191  // empty "type=" is deliberate.
192  buf.appendFormatted("\\swordmorph[lemma]{%s}",
193  value.c_str());
194  }
195  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) {
196  if (!tag.isEndTag()) {
197  char ch = *value;
198  value<<1;
199  buf.appendFormatted("\\swordstrong[%s]{%s}{",
200  ((ch == 'H') ? "Hebrew" : "Greek"),
201  value.c_str());
202  }
203  else { buf += "}"; }
204  }
205 
206  else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) {
207  if (!tag.isEndTag()) {
208  buf.appendFormatted("\\sworddict{%s}{",
209  value.c_str());
210  }
211  else { buf += "}"; }
212  }
213 
214  }
215  // <note> tag
216  else if (!strcmp(tag.getName(), "note")) {
217  if (!tag.isEndTag()) {
218  if (!tag.isEmpty()) {
219  SWBuf type = tag.getAttribute("type");
220  SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
221  SWBuf noteName = tag.getAttribute("n");
222  SWBuf footnoteBody = "";
223  if (u->module){
224  footnoteBody += u->module->getEntryAttributes()["Footnote"][footnoteNumber]["body"];
225  }
226  if (u->vkey) {
227  // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
228  char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
229  buf.appendFormatted("\\swordfootnote[%c]{%s}{%s}{%s}{%s}{",
230  ch,
231  footnoteNumber.c_str(),
232  u->version.c_str(),
233  u->vkey->getText(),
234  noteName.c_str());
235  }
236  else {
237  char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
238  buf.appendFormatted("\\swordfootnote[%c]{%s}{%s}{%s}{%s}{",
239  ch,
240  footnoteNumber.c_str(),
241  u->version.c_str(),
242  u->key->getText(),
243  noteName.c_str());
244  }
245  u->suspendTextPassThru = true;
246  if (u->module) {
247  buf += u->module->renderText(footnoteBody).c_str();
248  }
249  }
250  }
251  if (tag.isEndTag()) {
252  buf += "}";
253  u->suspendTextPassThru = false;
254  }
255  }
256  else if (!strcmp(tag.getName(), "scripture")) {
257  buf += (tag.isEndTag() ? "\\swordquote" : "}");
258  }
259  // <scripRef> tag
260  else if (!strcmp(tag.getName(), "scripRef")) {
261  if (!tag.isEndTag()) {
262  if (!tag.isEmpty()) {
263  u->suspendTextPassThru = true;
264  }
265  }
266  if (!tag.isEndTag()) { // </scripRef>
267  if (!u->isBiblicalText) {
268  SWBuf refList = u->startTag.getAttribute("passage");
269  if (!refList.length())
270  refList = u->lastTextNode;
271  SWBuf version = tag.getAttribute("version");
272 
273  buf.appendFormatted("\\swordxref{%s}{%s}{",
274  (refList.length()) ? refList.c_str() : "",
275  (version.length()) ? version.c_str() : "");
276  buf += u->lastTextNode.c_str();
277  buf += "}";
278  }
279  else {
280  SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote");
281  SWBuf noteName = tag.getAttribute("n");
282  SWBuf footnoteBody = "";
283  if (u->module){
284  footnoteBody += u->module->getEntryAttributes()["Footnote"][footnoteNumber]["body"];
285  }
286  if (u->vkey) {
287  // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt.
288  //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", u->vkey->getText(), footnoteNumber.c_str());
289  // char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n');
290  char ch = 'x';
291  buf.appendFormatted("\\swordfootnote[%c]{%s}{%s}{%s}{%s}{",
292  ch,
293  footnoteNumber.c_str(),
294  u->version.c_str(),
295  u->vkey->getText(),
296  (renderNoteNumbers ? noteName.c_str() : ""));
297  if (u->module) {
298  buf += u->module->renderText(footnoteBody).c_str();
299  }
300  }
301  }
302 
303 
304  }
305  else if (tag.isEndTag()){
306  buf +="}";
307  // let's let text resume to output again
308  u->suspendTextPassThru = false;
309  }
310  }
311  else if (tag.getName() && !strcmp(tag.getName(), "div")) {
312 
313  //if (!tag.isEndTag() && u->vkey && !u->vkey->getChapter())
314  // buf += "\\swordsection{book}{";
315  //}
316 
317 
318  if (!tag.isEndTag() && u->inSecHead) {
319  buf += "\\swordsection{sechead}{";
320  u->inSecHead = false;
321  }
322 
323  else if (!tag.isEndTag() && tag.getAttribute("class")) {
324  buf += "\\swordsection{";
325  buf += tag.getAttribute("class");
326  buf += "}{";
327 
328  }
329  else if (!tag.isEndTag()) {
330  buf += "\\swordsection{}{";
331  }
332 
333  else if (tag.isEndTag()) {
334  buf += "}";
335  }
336  }
337  else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) {
338  const char *src = strstr(token, "src");
339  if (!src) // assert we have a src attribute
340  return false;
341 
342  const char *c, *d;
343  if (((c = strchr(src+3, '"')) == NULL) ||
344  ((d = strchr( ++c , '"')) == NULL)) // identify endpoints.
345  return false; // abandon hope.
346 
347 
348  // images become clickable, if the UI supports showImage.
349  buf +="\\figure{";
350 
351  for (c = token; *c; c++) {
352  if ((*c == '/') && (*(c+1) == '\0'))
353  continue;
354  if (c == src) {
355  for (;((*c) && (*c != '"')); c++)
356  buf += *c;
357 
358  if (!*c) { c--; continue; }
359 
360  buf += '"';
361  if (*(c+1) == '/') {
362  buf += "\\includegraphics{";
363  buf += userData->module->getConfigEntry("AbsoluteDataPath");
364  if (buf[buf.length()-2] == '/')
365  c++; // skip '/'
366  }
367  continue;
368  }
369  buf += *c;
370  }
371  buf += "}}";
372  }
373  else if (tag.getName() && (!strcmp(tag.getName(), "i"))){
374  if (!tag.isEndTag()) {
375  buf += "\\emph{";
376  }
377  else { buf += "}"; }
378  }
379  else if (tag.getName() && (!strcmp(tag.getName(), "br"))){
380  buf += "\\\\";
381 
382  }
383  else {
384  buf += '<';
385  /*for (const char *tok = token; *tok; tok++)
386  buf += *tok;*/
387  buf += token;
388  buf += '>';
389  //return false; // we still didn't handle token
390  }
391  }
392  return true;
393 }
394 
395 
const char * getName() const
Definition: swmodule.cpp:204
#define SWORD_NAMESPACE_START
Definition: defs.h:39
virtual const char * getHeader() const
Definition: thmllatex.cpp:34
SWBuf & appendFormatted(const char *format,...)
Definition: swbuf.cpp:81
void setTokenEnd(const char *tokenEnd)
void addAllowedEscapeString(const char *findString)
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
const char * getType() const
Definition: swmodule.cpp:232
virtual const char * getConfigEntry(const char *key) const
Definition: swmodule.cpp:1159
const SWModule * module
Definition: swbasicfilter.h:42
const char * getName() const
Definition: utilxml.h:58
SWText * module
Definition: osis2mod.cpp:105
Definition: utilxml.h:38
bool renderNoteNumbers
Definition: thmllatex.h:38
void setTokenCaseSensitive(bool val)
void setEscapeStart(const char *escStart)
SWBuf renderText(const char *buf, int len=-1, bool render=true) const
Definition: swmodule.cpp:1038
bool isEmpty() const
Definition: utilxml.h:60
bool substituteToken(SWBuf &buf, const char *token)
const VerseKey * vkey
Definition: swbasicfilter.h:44
virtual const char * getText() const
Definition: versekey.cpp:1242
virtual const char * getText() const
Definition: swkey.cpp:184
void setTokenStart(const char *tokenStart)
virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData)
Definition: thmllatex.cpp:174
return NULL
Definition: regex.c:7953
const char * c_str() const
Definition: swbuf.h:158
void setPassThruNumericEscapeString(bool val)
const SWKey * key
Definition: swbasicfilter.h:43
virtual AttributeTypeList & getEntryAttributes() const
Definition: swmodule.h:817
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
Definition: utilxml.cpp:230
void setEscapeStringCaseSensitive(bool val)
bool isEndTag(const char *eID=0) const
Definition: utilxml.cpp:323
void addTokenSubstitute(const char *findString, const char *replaceString)
void setEscapeEnd(const char *escEnd)
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
MyUserData(const SWModule *module, const SWKey *key)
Definition: thmllatex.cpp:40