The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ThMLPlain Class Reference

#include <thmlplain.h>

+ Inheritance diagram for ThMLPlain:
+ Collaboration diagram for ThMLPlain:

Public Member Functions

virtual const char * getHeader () const
 
virtual char processText (SWBuf &text, const SWKey *key=0, const SWModule *module=0)
 
 ThMLPlain ()
 

Detailed Description

this filter converts ThML text to plain text

Definition at line 33 of file thmlplain.h.

Constructor & Destructor Documentation

SWORD_NAMESPACE_START ThMLPlain::ThMLPlain ( )

Definition at line 30 of file thmlplain.cpp.

30  {
31 }

Member Function Documentation

virtual const char* SWFilter::getHeader ( ) const
inlinevirtualinherited

This method can supply a header associated with the processing done with this filter. A typical example is a suggested CSS style block for classed containers.

Reimplemented in OSISLaTeX, OSISXHTML, ThMLLaTeX, ThMLXHTML, TEIXHTML, GBFLaTeX, and GBFXHTML.

Definition at line 62 of file swfilter.h.

62 { return ""; }
char ThMLPlain::processText ( SWBuf text,
const SWKey key = 0,
const SWModule module = 0 
)
virtual

This method processes and appropriately modifies the text given it for a particular filter task

Parameters
textThe text to be filtered/converted
keyCurrent key That was used.
moduleCurrent module.
Returns
0

Implements SWFilter.

Definition at line 33 of file thmlplain.cpp.

34 {
35  char token[2048];
36  int tokpos = 0;
37  bool intoken = false;
38  bool ampersand = false;
39 
40  const char *from;
41  SWBuf orig = text;
42  from = orig.c_str();
43  for (text = ""; *from; from++)
44  {
45  if (*from == 10 || *from == 13)
46  from++;
47  if (*from == '<') {
48  intoken = true;
49  tokpos = 0;
50  token[0] = 0;
51  token[1] = 0;
52  token[2] = 0;
53  ampersand = false;
54  continue;
55  }
56  else if (*from == '&') {
57  intoken = true;
58  tokpos = 0;
59  token[0] = 0;
60  token[1] = 0;
61  token[2] = 0;
62  ampersand = true;
63  continue;
64  }
65  if (*from == ';' && ampersand) {
66  intoken = false;
67  ampersand = false;
68 
69  if (!strncmp("nbsp", token, 4)) text += ' ';
70  else if (!strncmp("quot", token, 4)) text += '"';
71  else if (!strncmp("amp", token, 3)) text += '&';
72  else if (!strncmp("lt", token, 2)) text += '<';
73  else if (!strncmp("gt", token, 2)) text += '>';
74  else if (!strncmp("brvbar", token, 6)) text += "¦";
75  else if (!strncmp("sect", token, 4)) text += "§";
76  else if (!strncmp("copy", token, 4)) text += "©";
77  else if (!strncmp("laquo", token, 5)) text += "«";
78  else if (!strncmp("reg", token, 3)) text += "®";
79  else if (!strncmp("acute", token, 5)) text += "´";
80  else if (!strncmp("para", token, 4)) text += "¶";
81  else if (!strncmp("raquo", token, 5)) text += "»";
82 
83  else if (!strncmp("Aacute", token, 6)) text += "Á";
84  else if (!strncmp("Agrave", token, 6)) text += "À";
85  else if (!strncmp("Acirc", token, 5)) text += "Â";
86  else if (!strncmp("Auml", token, 4)) text += "Ä";
87  else if (!strncmp("Atilde", token, 6)) text += "Ã";
88  else if (!strncmp("Aring", token, 5)) text += "Å";
89  else if (!strncmp("aacute", token, 6)) text += "á";
90  else if (!strncmp("agrave", token, 6)) text += "à";
91  else if (!strncmp("acirc", token, 5)) text += "â";
92  else if (!strncmp("auml", token, 4)) text += "ä";
93  else if (!strncmp("atilde", token, 6)) text += "ã";
94  else if (!strncmp("aring", token, 5)) text += "å";
95  else if (!strncmp("Eacute", token, 6)) text += "É";
96  else if (!strncmp("Egrave", token, 6)) text += "È";
97  else if (!strncmp("Ecirc", token, 5)) text += "Ê";
98  else if (!strncmp("Euml", token, 4)) text += "Ë";
99  else if (!strncmp("eacute", token, 6)) text += "é";
100  else if (!strncmp("egrave", token, 6)) text += "è";
101  else if (!strncmp("ecirc", token, 5)) text += "ê";
102  else if (!strncmp("euml", token, 4)) text += "ë";
103  else if (!strncmp("Iacute", token, 6)) text += "Í";
104  else if (!strncmp("Igrave", token, 6)) text += "Ì";
105  else if (!strncmp("Icirc", token, 5)) text += "Î";
106  else if (!strncmp("Iuml", token, 4)) text += "Ï";
107  else if (!strncmp("iacute", token, 6)) text += "í";
108  else if (!strncmp("igrave", token, 6)) text += "ì";
109  else if (!strncmp("icirc", token, 5)) text += "î";
110  else if (!strncmp("iuml", token, 4)) text += "ï";
111  else if (!strncmp("Oacute", token, 6)) text += "Ó";
112  else if (!strncmp("Ograve", token, 6)) text += "Ò";
113  else if (!strncmp("Ocirc", token, 5)) text += "Ô";
114  else if (!strncmp("Ouml", token, 4)) text += "Ö";
115  else if (!strncmp("Otilde", token, 6)) text += "Õ";
116  else if (!strncmp("oacute", token, 6)) text += "ó";
117  else if (!strncmp("ograve", token, 6)) text += "ò";
118  else if (!strncmp("ocirc", token, 5)) text += "ô";
119  else if (!strncmp("ouml", token, 4)) text += "ö";
120  else if (!strncmp("otilde", token, 6)) text += "õ";
121  else if (!strncmp("Uacute", token, 6)) text += "Ú";
122  else if (!strncmp("Ugrave", token, 6)) text += "Ù";
123  else if (!strncmp("Ucirc", token, 5)) text += "Û";
124  else if (!strncmp("Uuml", token, 4)) text += "Ü";
125  else if (!strncmp("uacute", token, 6)) text += "ú";
126  else if (!strncmp("ugrave", token, 6)) text += "ù";
127  else if (!strncmp("ucirc", token, 5)) text += "û";
128  else if (!strncmp("uuml", token, 4)) text += "ü";
129  else if (!strncmp("Yacute", token, 6)) text += "Ý";
130  else if (!strncmp("yacute", token, 6)) text += "ý";
131  else if (!strncmp("yuml", token, 4)) text += "ÿ";
132 
133  else if (!strncmp("deg", token, 3)) text += "°";
134  else if (!strncmp("plusmn", token, 6)) text += "±";
135  else if (!strncmp("sup2", token, 4)) text += "²";
136  else if (!strncmp("sup3", token, 4)) text += "³";
137  else if (!strncmp("sup1", token, 4)) text += "¹";
138  else if (!strncmp("nbsp", token, 4)) text += "º";
139  else if (!strncmp("pound", token, 5)) text += "£";
140  else if (!strncmp("cent", token, 4)) text += "¢";
141  else if (!strncmp("frac14", token, 6)) text += "¼";
142  else if (!strncmp("frac12", token, 6)) text += "½";
143  else if (!strncmp("frac34", token, 6)) text += "¾";
144  else if (!strncmp("iquest", token, 6)) text += "¿";
145  else if (!strncmp("iexcl", token, 5)) text += "¡";
146  else if (!strncmp("ETH", token, 3)) text += "Ð";
147  else if (!strncmp("eth", token, 3)) text += "ð";
148  else if (!strncmp("THORN", token, 5)) text += "Þ";
149  else if (!strncmp("thorn", token, 5)) text += "þ";
150  else if (!strncmp("AElig", token, 5)) text += "Æ";
151  else if (!strncmp("aelig", token, 5)) text += "æ";
152  else if (!strncmp("Oslash", token, 6)) text += "Ø";
153  else if (!strncmp("curren", token, 6)) text += "¤";
154  else if (!strncmp("Ccedil", token, 6)) text += "Ç";
155  else if (!strncmp("ccedil", token, 6)) text += "ç";
156  else if (!strncmp("szlig", token, 5)) text += "ß";
157  else if (!strncmp("Ntilde", token, 6)) text += "Ñ";
158  else if (!strncmp("ntilde", token, 6)) text += "ñ";
159  else if (!strncmp("yen", token, 3)) text += "¥";
160  else if (!strncmp("not", token, 3)) text += "¬";
161  else if (!strncmp("ordf", token, 4)) text += "ª";
162  else if (!strncmp("uml", token, 3)) text += "¨";
163  else if (!strncmp("shy", token, 3)) text += "­";
164  else if (!strncmp("macr", token, 4)) text += "¯";
165  else if (!strncmp("micro", token, 5)) text += "µ";
166  else if (!strncmp("middot", token, 6)) text += "·";
167  else if (!strncmp("cedil", token, 5)) text += "¸";
168  else if (!strncmp("ordm", token, 4)) text += "º";
169  else if (!strncmp("times", token, 5)) text += "×";
170  else if (!strncmp("divide", token, 6)) text += "÷";
171  else if (!strncmp("oslash", token, 6)) text += "ø";
172  continue;
173 
174  }
175  else if (*from == '>' && !ampersand) {
176  intoken = false;
177  // process desired tokens
178  if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
179  text += ' ';
180  text += '<';
181  for (unsigned int i = 27; token[i] != '\"'; i++)
182  text += token[i];
183  text += '>';
184  continue;
185  }
186  if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) {
187  text += ' ';
188  text += '(';
189  for (unsigned int i = 25; token[i] != '\"'; i++)
190  text += token[i];
191  text += ')';
192  continue;
193  }
194  if (!strncmp("note", token, 4)) {
195  text += ' ';
196  text += '[';
197  }
198  else if (!strncmp("br", token, 2))
199  text += '\n';
200  else if (!strncmp("/p", token, 2))
201  text += '\n';
202  else if (!strncmp("/note", token, 5)) {
203  text += ']';
204  text += ' ';
205  }
206  continue;
207  }
208  if (intoken) {
209  if (tokpos < 2045) {
210  token[tokpos++] = *from;
211  //TODO: why is this + 2? Are we trying to keep 2 or 3 nulls after the last valid char?
212  // tokpos has been incremented past the last valid token. it should be pointing to null
213  // +1 should give us 2 nulls, but we're +2 here, which actually keeps 3 nulls after the
214  // last valid char. Why are we doing any of this? These were written before SWBuf and should
215  // probably be switched to SWBuf, but perf tests before and after the switch should be run
216  token[tokpos+2] = 0;
217  }
218  }
219  else text += *from;
220  }
221 
222  orig = text;
223  from = orig.c_str();
224  for (text = ""; *from; from++) { //loop to remove extra spaces
225  if ((strchr(" \t\n\r", *from))) {
226  while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) {
227  from++;
228  }
229  text += " ";
230  }
231  else {
232  text += *from;
233  }
234  }
235  text += (char)0;
236 
237  return 0;
238 }
Definition: swbuf.h:47
const char * c_str() const
Definition: swbuf.h:158

The documentation for this class was generated from the following files: