The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
osisstrongs.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * osisstrongs.cpp - SWFilter descendant to hide or show Strong's number
4  * in a OSIS module
5  *
6  * $Id: osisstrongs.cpp 3808 2020-10-02 13:23:34Z scribe $
7  *
8  * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org)
9  * CrossWire Bible Society
10  * P. O. Box 2528
11  * Tempe, AZ 85280-2528
12  *
13  * This program is free software; you can redistribute it and/or modify it
14  * under the terms of the GNU General Public License as published by the
15  * Free Software Foundation version 2.
16  *
17  * This program is distributed in the hope that it will be useful, but
18  * WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * General Public License for more details.
21  *
22  */
23 
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <ctype.h>
27 #include <osisstrongs.h>
28 #include <swmodule.h>
29 #include <versekey.h>
30 #include <utilxml.h>
31 
32 
34 
35 namespace {
36 
37  static const char oName[] = "Strong's Numbers";
38  static const char oTip[] = "Toggles Strong's Numbers On and Off if they exist";
39 
40  static const StringList *oValues() {
41  static const SWBuf choices[3] = {"Off", "On", ""};
42  static const StringList oVals(&choices[0], &choices[2]);
43  return &oVals;
44  }
45 }
46 
47 
49 }
50 
51 
53 }
54 
55 
56 char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
57  SWBuf token;
58  bool intoken = false;
59  int wordNum = 1;
60  char wordstr[11];
61  const char *wordStart = 0;
62  SWBuf page = ""; // some modules include <seg> page info, so we add these to the words
63 
64  const SWBuf orig = text;
65  const char * from = orig.c_str();
66 
67  for (text = ""; *from; ++from) {
68  if (*from == '<') {
69  intoken = true;
70  token = "";
71  continue;
72  }
73  if (*from == '>') { // process tokens
74  intoken = false;
75 
76  // possible page seg --------------------------------
77  if (token.startsWith("seg ")) {
78  XMLTag stag(token);
79  SWBuf type = stag.getAttribute("type");
80  if (type == "page") {
81  SWBuf number = stag.getAttribute("subtype");
82  if (number.length()) {
83  page = number;
84  }
85  }
86  }
87  // ---------------------------------------------------
88 
89  if (token.startsWith("w ")) { // Word
90  XMLTag wtag(token);
91 
92  // always save off lemma if we haven't yet
93  if (!wtag.getAttribute("savlm")) {
94  const char *l = wtag.getAttribute("lemma");
95  if (l) {
96  wtag.setAttribute("savlm", l);
97  }
98  }
99 
100  if (module->isProcessEntryAttributes()) {
101  wordStart = from+1;
102  char gh = 0;
103  const VerseKey *vkey = 0;
104  if (key) {
105  vkey = SWDYNAMIC_CAST(const VerseKey, key);
106  }
107  SWBuf lemma = "";
108  SWBuf morph = "";
109  SWBuf src = "";
110  SWBuf morphClass = "";
111  SWBuf lemmaClass = "";
112 
113  const char *attrib;
114  sprintf(wordstr, "%03d", wordNum);
115 
116  // why is morph entry attribute processing done in here? Well, it's faster. It makes more local sense to place this code in osismorph.
117  // easier to keep lemma and morph in same wordstr number too maybe.
118  if ((attrib = wtag.getAttribute("morph"))) {
119  int count = wtag.getAttributePartCount("morph", ' ');
120  int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
121  do {
122  SWBuf mClass = "";
123  SWBuf mp = "";
124  attrib = wtag.getAttribute("morph", i, ' ');
125  if (i < 0) i = 0; // to handle our -1 condition
126 
127  const char *m = strchr(attrib, ':');
128  if (m) {
129  int len = (int)(m-attrib);
130  mClass.append(attrib, len);
131  attrib += (len+1);
132  }
133  if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) {
134  mClass = "robinson";
135  }
136  if (i) { morphClass += " "; morph += " "; }
137  mp += attrib;
138  morphClass += mClass;
139  morph += mp;
140  mp.replaceBytes("+", ' ');
141  SWBuf tmp;
142  tmp.setFormatted("Morph.%d", i+1);
143  module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
144  tmp.setFormatted("MorphClass.%d", i+1);
145  module->getEntryAttributes()["Word"][wordstr][tmp] = mClass;
146  } while (++i < count);
147  }
148 
149  if ((attrib = wtag.getAttribute("savlm"))) {
150  int count = wtag.getAttributePartCount("savlm", ' ');
151  int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
152  do {
153  gh = 0;
154  SWBuf lClass = "";
155  SWBuf l = "";
156  attrib = wtag.getAttribute("savlm", i, ' ');
157  if (i < 0) i = 0; // to handle our -1 condition
158 
159  const char *m = strchr(attrib, ':');
160  if (m) {
161  int len = (int)(m-attrib);
162  lClass.append(attrib, len);
163  attrib += (len+1);
164  }
165  if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) {
166  if (isdigit(attrib[0])) {
167  if (vkey) {
168  gh = vkey->getTestament() ? 'H' : 'G';
169  }
170  }
171  else {
172  gh = *attrib;
173  attrib++;
174  }
175  lClass = "strong";
176  }
177  if (gh) l += gh;
178  l += attrib;
179  if (i) { lemmaClass += " "; lemma += " "; }
180  lemma += l;
181  l.replaceBytes("+", ' ');
182  lemmaClass += lClass;
183  SWBuf tmp;
184  tmp.setFormatted("Lemma.%d", i+1);
185  module->getEntryAttributes()["Word"][wordstr][tmp] = l;
186  tmp.setFormatted("LemmaClass.%d", i+1);
187  module->getEntryAttributes()["Word"][wordstr][tmp] = lClass;
188  } while (++i < count);
189  module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count);
190  }
191 
192  if ((attrib = wtag.getAttribute("src"))) {
193  int count = wtag.getAttributePartCount("src", ' ');
194  int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0
195  do {
196  SWBuf mp = "";
197  attrib = wtag.getAttribute("src", i, ' ');
198  if (i < 0) i = 0; // to handle our -1 condition
199 
200  if (i) src += " ";
201  mp += attrib;
202  src += mp;
203  mp.replaceBytes("+", ' ');
204  SWBuf tmp;
205  tmp.setFormatted("Src.%d", i+1);
206  module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
207  } while (++i < count);
208  }
209 
210 
211  if (lemma.length())
212  module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma;
213  if (lemmaClass.length())
214  module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass;
215  if (morph.length())
216  module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph;
217  if (morphClass.length())
218  module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass;
219  if (src.length())
220  module->getEntryAttributes()["Word"][wordstr]["Src"] = src;
221  if (page.length())
222  module->getEntryAttributes()["Word"][wordstr]["Page"] = page;
223 
224  if (wtag.isEmpty()) {
225  int j;
226  for (j = (int)token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--);
227  token.size(j+1);
228  }
229 
230  token += " wn=\"";
231  token += wordstr;
232  token += "\"";
233 
234  if (wtag.isEmpty()) {
235  token += "/";
236  }
237 
238  wordNum++;
239  }
240 
241  // if we won't want strongs, then lets get them out of lemma
242  if (!option) {
243  int count = wtag.getAttributePartCount("lemma", ' ');
244  for (int i = 0; i < count; ++i) {
245  SWBuf a = wtag.getAttribute("lemma", i, ' ');
246  const char *prefix = a.stripPrefix(':');
247  if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) {
248  // remove attribute part
249  wtag.setAttribute("lemma", 0, i, ' ');
250  --i;
251  --count;
252  }
253  }
254 
255 
256  }
257  token = wtag;
258  token.trim();
259  // drop <>
260  token << 1;
261  token--;
262  }
263  if (token.startsWith("/w")) { // Word End
264  if (module->isProcessEntryAttributes()) {
265  if (wordStart) {
266  SWBuf tmp;
267  tmp.append(wordStart, (from-wordStart)-3);
268  sprintf(wordstr, "%03d", wordNum-1);
269  module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
270  }
271  }
272  wordStart = 0;
273  }
274 
275  // keep token in text
276  text.append('<');
277  text.append(token);
278  text.append('>');
279 
280  continue;
281  }
282  if (intoken) {
283  token += *from;
284  }
285  else {
286  text.append(*from);
287  }
288  }
289  return 0;
290 }
291 
int page
Definition: imp2vs.cpp:303
#define SWORD_NAMESPACE_START
Definition: defs.h:39
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
const char * setAttribute(const char *attribName, const char *attribValue, int partNum=-1, char partSplit= '|')
Definition: utilxml.cpp:248
SWText * module
Definition: osis2mod.cpp:105
Definition: utilxml.h:38
bool startsWith(const SWBuf &prefix) const
Definition: swbuf.h:486
bool isEmpty() const
Definition: utilxml.h:60
static const StringList * oValues()
const char * c_str() const
Definition: swbuf.h:158
std::list< SWBuf > StringList
Definition: swmodule.cpp:91
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
static const char oName[]
SWBuf & trim()
Definition: swbuf.h:443
#define SWDYNAMIC_CAST(className, object)
Definition: defs.h:47
SWBuf & replaceBytes(const char *targets, char newByte)
Definition: swbuf.h:467
unsigned long size() const
Definition: swbuf.h:185
const char * stripPrefix(char separator, bool endOfStringAsSeparator=false)
Definition: swbuf.h:457
virtual bool isProcessEntryAttributes() const
Definition: swmodule.h:832
virtual AttributeTypeList & getEntryAttributes() const
Definition: swmodule.h:817
static const char * choices[4]
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
Definition: utilxml.cpp:230
static const char oTip[]
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
Definition: osisstrongs.cpp:56
#define SWORD_NAMESPACE_END
Definition: defs.h:40
SWBuf & setFormatted(const char *format,...)
Definition: swbuf.cpp:50
Definition: swkey.h:77
virtual ~OSISStrongs()
Definition: osisstrongs.cpp:52
virtual char getTestament() const
Definition: versekey.cpp:1498
int getAttributePartCount(const char *attribName, char partSplit= '|') const
Definition: utilxml.cpp:218