The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swbasicfilter.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * swbasicfilter.cpp - definition of class SWBasicFilter. An SWFilter
4  * impl that provides some basic methods that
5  * many filters will need and can use as a starting
6  * point.
7  *
8  * $Id: swbasicfilter.cpp 3808 2020-10-02 13:23:34Z scribe $
9  *
10  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
11  * CrossWire Bible Society
12  * P. O. Box 2528
13  * Tempe, AZ 85280-2528
14  *
15  * This program is free software; you can redistribute it and/or modify it
16  * under the terms of the GNU General Public License as published by the
17  * Free Software Foundation version 2.
18  *
19  * This program is distributed in the hope that it will be useful, but
20  * WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * General Public License for more details.
23  *
24  */
25 
26 #include <stdlib.h>
27 #include <swbasicfilter.h>
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <utilstr.h>
31 #include <stringmgr.h>
32 #include <versekey.h>
33 #include <map>
34 #include <set>
35 
37 
38 
39 typedef std::map<SWBuf, SWBuf> DualStringMap;
40 typedef std::set<SWBuf> StringSet;
41 
42 
43 // I hate bridge patterns but this isolates std::map from a ton of filters
45 public:
49 };
50 
51 
52 const char SWBasicFilter::INITIALIZE = 1;
53 const char SWBasicFilter::PRECHAR = 2;
54 const char SWBasicFilter::POSTCHAR = 4;
55 const char SWBasicFilter::FINALIZE = 8;
56 
57 
59  this->module = module;
60  this->key = key;
61  suspendTextPassThru = false;
63  vkey = 0;
64  SWTRY {
65  vkey = SWDYNAMIC_CAST(const VerseKey, key);
66  }
67  SWCATCH ( ... ) { }
68 }
69 
70 
72 
73  p = new Private;
74 
75  processStages = 0;
76  tokenStart = 0;
77  tokenEnd = 0;
78  escStart = 0;
79  escEnd = 0;
80 
81  setTokenStart("<");
82  setTokenEnd(">");
83  setEscapeStart("&");
84  setEscapeEnd(";");
85 
86  escStringCaseSensitive = false;
87  tokenCaseSensitive = false;
88  passThruUnknownToken = false;
89  passThruUnknownEsc = false;
90  passThruNumericEsc = false;
91 }
92 
93 
95  if (tokenStart)
96  delete [] tokenStart;
97 
98  if (tokenEnd)
99  delete [] tokenEnd;
100 
101  if (escStart)
102  delete [] escStart;
103 
104  if (escEnd)
105  delete [] escEnd;
106 
107  delete p;
108 }
109 
110 
112  passThruUnknownToken = val;
113 }
114 
115 
117  passThruUnknownEsc = val;
118 }
119 
120 
122  passThruUnknownEsc = val;
123 }
124 
125 
127  tokenCaseSensitive = val;
128 }
129 
130 
133 }
134 
135 
136 void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) {
137  char *buf = 0;
138 
139  if (!tokenCaseSensitive) {
140  stdstr(&buf, findString);
141  toupperstr(buf);
142  p->tokenSubMap[buf] = replaceString;
143  delete [] buf;
144  }
145  else p->tokenSubMap[findString] = replaceString;
146 }
147 
148 
149 void SWBasicFilter::removeTokenSubstitute(const char *findString) {
150  if (p->tokenSubMap.find(findString) != p->tokenSubMap.end()) {
151  p->tokenSubMap.erase( p->tokenSubMap.find(findString) );
152  }
153 }
154 
155 
156 void SWBasicFilter::addAllowedEscapeString(const char *findString) {
157  char *buf = 0;
158 
159  if (!escStringCaseSensitive) {
160  stdstr(&buf, findString);
161  toupperstr(buf);
162  p->escPassSet.insert(StringSet::value_type(buf));
163  delete [] buf;
164  }
165  else p->escPassSet.insert(StringSet::value_type(findString));
166 }
167 
168 
169 void SWBasicFilter::removeAllowedEscapeString(const char *findString) {
170  if (p->escPassSet.find(findString) != p->escPassSet.end()) {
171  p->escPassSet.erase( p->escPassSet.find(findString) );
172  }
173 }
174 
175 
176 void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) {
177  char *buf = 0;
178 
179  if (!escStringCaseSensitive) {
180  stdstr(&buf, findString);
181  toupperstr(buf);
182  p->escSubMap.insert(DualStringMap::value_type(buf, replaceString));
183  delete [] buf;
184  }
185  else p->escSubMap.insert(DualStringMap::value_type(findString, replaceString));
186 }
187 
188 
189 void SWBasicFilter::removeEscapeStringSubstitute(const char *findString) {
190  if (p->escSubMap.find(findString) != p->escSubMap.end()) {
191  p->escSubMap.erase( p->escSubMap.find(findString) );
192  }
193 }
194 
195 
196 bool SWBasicFilter::substituteToken(SWBuf &buf, const char *token) {
197  DualStringMap::iterator it;
198 
199  if (!tokenCaseSensitive) {
200  char *tmp = 0;
201  stdstr(&tmp, token);
202  toupperstr(tmp);
203  it = p->tokenSubMap.find(tmp);
204  delete [] tmp;
205  } else
206  it = p->tokenSubMap.find(token);
207 
208  if (it != p->tokenSubMap.end()) {
209  buf += it->second.c_str();
210  return true;
211  }
212  return false;
213 }
214 
215 
216 void SWBasicFilter::appendEscapeString(SWBuf &buf, const char *escString) {
217  buf += escStart;
218  buf += escString;
219  buf += escEnd;
220 }
221 
222 
223 bool SWBasicFilter::passAllowedEscapeString(SWBuf &buf, const char *escString) {
224  StringSet::iterator it;
225 
226  if (!escStringCaseSensitive) {
227  char *tmp = 0;
228  stdstr(&tmp, escString);
229  toupperstr(tmp);
230  it = p->escPassSet.find(tmp);
231  delete [] tmp;
232  } else
233  it = p->escPassSet.find(escString);
234 
235  if (it != p->escPassSet.end()) {
236  appendEscapeString(buf, escString);
237  return true;
238  }
239 
240  return false;
241 }
242 
243 
244 bool SWBasicFilter::handleNumericEscapeString(SWBuf &buf, const char *escString) {
245  if (passThruNumericEsc) {
246  appendEscapeString(buf, escString);
247  return true;
248  }
249  return false;
250 }
251 
252 
253 bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) {
254  DualStringMap::iterator it;
255 
256  if (*escString == '#') {
257  return handleNumericEscapeString(buf, escString);
258  }
259 
260  if (passAllowedEscapeString(buf, escString)) {
261  return true;
262  }
263 
264  if (!escStringCaseSensitive) {
265  char *tmp = 0;
266  stdstr(&tmp, escString);
267  toupperstr(tmp);
268  it = p->escSubMap.find(tmp);
269  delete [] tmp;
270  } else
271  it = p->escSubMap.find(escString);
272 
273  if (it != p->escSubMap.end()) {
274  buf += it->second.c_str();
275  return true;
276  }
277  return false;
278 }
279 
280 
281 bool SWBasicFilter::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
282  return substituteToken(buf, token);
283 }
284 
285 
286 bool SWBasicFilter::handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData) {
287  return substituteEscapeString(buf, escString);
288 }
289 
290 
291 void SWBasicFilter::setEscapeStart(const char *escStart) {
292  stdstr(&(this->escStart), escStart);
293  escStartLen = strlen(escStart);
294 }
295 
296 
297 void SWBasicFilter::setEscapeEnd(const char *escEnd) {
298  stdstr(&(this->escEnd), escEnd);
299  escEndLen = strlen(escEnd);
300 }
301 
302 
303 void SWBasicFilter::setTokenStart(const char *tokenStart) {
304  stdstr(&(this->tokenStart), tokenStart);
305  tokenStartLen = strlen(tokenStart);
306 }
307 
308 
309 void SWBasicFilter::setTokenEnd(const char *tokenEnd) {
310  stdstr(&(this->tokenEnd), tokenEnd);
311  tokenEndLen = strlen(tokenEnd);
312 }
313 
314 
315 char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
316  char *from;
317  char token[4096];
318  int tokpos = 0;
319  bool intoken = false;
320  bool inEsc = false;
321  int escStartPos = 0, escEndPos = 0;
322  int tokenStartPos = 0, tokenEndPos = 0;
323  SWBuf lastTextNode;
324  BasicFilterUserData *userData = createUserData(module, key);
325 
326  SWBuf orig = text;
327  from = orig.getRawData();
328  text = "";
329 
330  if (processStages & INITIALIZE) {
331  if (processStage(INITIALIZE, text, from, userData)) { // processStage handled it all
332  delete userData;
333  return 0;
334  }
335  }
336 
337  for (;*from; from++) {
338 
339  if (processStages & PRECHAR) {
340  if (processStage(PRECHAR, text, from, userData)) // processStage handled this char
341  continue;
342  }
343 
344  if (*from == tokenStart[tokenStartPos]) {
345  if (tokenStartPos == (tokenStartLen - 1)) {
346  intoken = true;
347  tokpos = 0;
348  token[0] = 0;
349  token[1] = 0;
350  token[2] = 0;
351  inEsc = false;
352  }
353  else tokenStartPos++;
354  continue;
355  }
356 
357  if (*from == escStart[escStartPos]) {
358  if (escStartPos == (escStartLen - 1)) {
359  intoken = true;
360  tokpos = 0;
361  token[0] = 0;
362  token[1] = 0;
363  token[2] = 0;
364  inEsc = true;
365  }
366  else escStartPos++;
367  continue;
368  }
369 
370  if (inEsc) {
371  if (*from == escEnd[escEndPos]) {
372  if (escEndPos == (escEndLen - 1)) {
373  intoken = inEsc = false;
374  userData->lastTextNode = lastTextNode;
375 
376  if (!userData->suspendTextPassThru) { //if text through is disabled no tokens should pass, too
377  if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) {
378  appendEscapeString(text, token);
379  }
380  }
381  escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
382  lastTextNode = "";
383  continue;
384  }
385  }
386  }
387 
388  if (!inEsc) {
389  if (*from == tokenEnd[tokenEndPos]) {
390  if (tokenEndPos == (tokenEndLen - 1)) {
391  intoken = false;
392  userData->lastTextNode = lastTextNode;
393  if ((!handleToken(text, token, userData)) && (passThruUnknownToken)) {
394  text += tokenStart;
395  text += token;
396  text += tokenEnd;
397  }
398  escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
399  lastTextNode = "";
400  if (!userData->suspendTextPassThru) {
401  userData->lastSuspendSegment.size(0);
402  }
403  continue;
404  }
405  }
406  }
407 
408  if (intoken) {
409  if (tokpos < 4090) {
410  token[tokpos++] = *from;
411  token[tokpos+2] = 0;
412  }
413  }
414  else {
415  if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) {
416  if (!userData->suspendTextPassThru) {
417  text.append(*from);
418  }
419  else userData->lastSuspendSegment.append(*from);
420  lastTextNode.append(*from);
421  }
422  userData->supressAdjacentWhitespace = false;
423  }
424 
425  if (processStages & POSTCHAR)
426  processStage(POSTCHAR, text, from, userData);
427 
428  }
429 
430  if (processStages & FINALIZE)
431  processStage(FINALIZE, text, from, userData);
432 
433  delete userData;
434  return 0;
435 }
436 
437 
#define SWORD_NAMESPACE_START
Definition: defs.h:39
static const char PRECHAR
Definition: swbasicfilter.h:97
void removeAllowedEscapeString(const char *findString)
void setTokenEnd(const char *tokenEnd)
void addAllowedEscapeString(const char *findString)
Definition: swbuf.h:47
static const char POSTCHAR
Definition: swbasicfilter.h:98
char * tokenStart
Definition: swbasicfilter.h:64
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
void setPassThruUnknownToken(bool val)
virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData)
const SWModule * module
Definition: swbasicfilter.h:42
bool passThruUnknownEsc
Definition: swbasicfilter.h:77
SWText * module
Definition: osis2mod.cpp:105
bool passThruNumericEsc
Definition: swbasicfilter.h:78
virtual bool handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData)
#define SWTRY
Definition: defs.h:57
void setTokenCaseSensitive(bool val)
void setEscapeStart(const char *escStart)
bool passThruUnknownToken
Definition: swbasicfilter.h:76
virtual bool handleNumericEscapeString(SWBuf &buf, const char *escString)
bool substituteToken(SWBuf &buf, const char *token)
const VerseKey * vkey
Definition: swbasicfilter.h:44
void addEscapeStringSubstitute(const char *findString, const char *replaceString)
SWORD_NAMESPACE_START char * stdstr(char **ipstr, const char *istr, unsigned int memPadFactor=1)
Definition: utilstr.h:44
void setTokenStart(const char *tokenStart)
#define SWCATCH(x)
Definition: defs.h:58
static const char FINALIZE
Definition: swbasicfilter.h:99
char * getRawData()
Definition: swbuf.h:379
void removeTokenSubstitute(const char *findString)
const char * c_str() const
Definition: swbuf.h:158
BasicFilterUserData(const SWModule *module, const SWKey *key)
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
void setPassThruNumericEscapeString(bool val)
void setPassThruUnknownEscapeString(bool val)
virtual ~SWBasicFilter()
bool tokenCaseSensitive
Definition: swbasicfilter.h:75
SWORD_NAMESPACE_START typedef std::map< SWBuf, SWBuf > DualStringMap
virtual bool processStage(char, SWBuf &, char *&, BasicFilterUserData *)
virtual BasicFilterUserData * createUserData(const SWModule *module, const SWKey *key)
Definition: swbasicfilter.h:91
bool substituteEscapeString(SWBuf &buf, const char *escString)
#define SWDYNAMIC_CAST(className, object)
Definition: defs.h:47
unsigned long size() const
Definition: swbuf.h:185
static const char INITIALIZE
Definition: swbasicfilter.h:96
const SWKey * key
Definition: swbasicfilter.h:43
bool escStringCaseSensitive
Definition: swbasicfilter.h:74
DualStringMap tokenSubMap
bool passAllowedEscapeString(SWBuf &buf, const char *escString)
void setEscapeStringCaseSensitive(bool val)
void addTokenSubstitute(const char *findString, const char *replaceString)
char * toupperstr(char *t, unsigned int max=0)
Definition: stringmgr.h:107
void setEscapeEnd(const char *escEnd)
#define SWORD_NAMESPACE_END
Definition: defs.h:40
std::set< SWBuf > StringSet
Definition: swkey.h:77
void removeEscapeStringSubstitute(const char *findString)
Private * p
Definition: swbasicfilter.h:82
void appendEscapeString(SWBuf &buf, const char *escString)