The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
osismorphsegmentation.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * osismorphsegmentation.cpp - SWFilter descendant to toggle splitting of
4  * morphemes (for morpheme segmented Hebrew in
5  * the WLC)
6  *
7  * $Id: osismorphsegmentation.cpp 3828 2020-11-24 23:15:44Z scribe $
8  *
9  * Copyright 2006-2013 CrossWire Bible Society (http://www.crosswire.org)
10  * CrossWire Bible Society
11  * P. O. Box 2528
12  * Tempe, AZ 85280-2528
13  *
14  * This program is free software; you can redistribute it and/or modify it
15  * under the terms of the GNU General Public License as published by the
16  * Free Software Foundation version 2.
17  *
18  * This program is distributed in the hope that it will be useful, but
19  * WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  * General Public License for more details.
22  *
23  */
24 
25 #include <osismorphsegmentation.h>
26 #include <stdlib.h>
27 #include <utilxml.h>
28 #include <swmodule.h>
29 #include <swbuf.h>
30 
31 
33 
34 namespace {
35 
36  static const char oName[] = "Morpheme Segmentation";
37  static const char oTip[] = "Toggles Morpheme Segmentation On and Off, when present";
38 
39  static const StringList *oValues() {
40  static const SWBuf choices[3] = {"Off", "On", ""};
41  static const StringList oVals(&choices[0], &choices[2]);
42  return &oVals;
43  }
44 }
45 
46 
48 }
49 
50 
52 
53 
54 char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) {
55  SWBuf token;
56  bool intoken = false;
57  bool hide = false;
58 
59  SWBuf orig( text );
60  const char *from = orig.c_str();
61 
62  XMLTag tag;
63  SWBuf tagText = "";
64  unsigned int morphemeNum = 0;
65  bool inMorpheme = false;
66  SWBuf buf;
67 
68  for (text = ""; *from; ++from) {
69  if (*from == '<') {
70  intoken = true;
71  token = "";
72  continue;
73  }
74 
75  if (*from == '>') { // process tokens
76  intoken = false;
77 
78  if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) {
79  tag = token;
80 
81  if (!tag.isEndTag() && tag.getAttribute("type") &&
82  ( !strcmp("morph", tag.getAttribute("type"))
83  || !strcmp("x-morph", tag.getAttribute("type")))) { // <seg type="morph"> start tag
84  hide = !option; //only hide if option is Off
85  tagText = "";
86  inMorpheme = true;
87  }
88 
89  if (tag.isEndTag() && inMorpheme) {
90  buf.setFormatted("%.3d", morphemeNum++);
91  module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText;
92  inMorpheme = false;
93  }
94  if (hide) { //hides start and end tags as long as hide is set
95 
96  if (tag.isEndTag()) { // </seg>
97  hide = false;
98  }
99 
100  continue; //leave out the current token
101  }
102  } //end of seg tag handling
103 
104  text.append('<');
105  text.append(token);
106  text.append('>');
107 
108  if (inMorpheme) {
109  tagText.append('<');
110  tagText.append(token);
111  tagText.append('>');
112  }
113 
114  hide = false;
115 
116  continue;
117  } //end of intoken part
118 
119  if (intoken) { //copy token
120  token.append(*from);
121  }
122  else { //copy text which is not inside of a tag
123  text.append(*from);
124  if (inMorpheme) {
125  tagText.append(*from);
126  }
127  }
128  }
129  return 0;
130 }
131 
#define SWORD_NAMESPACE_START
Definition: defs.h:39
Definition: swbuf.h:47
SWText * module
Definition: osis2mod.cpp:105
Definition: utilxml.h:38
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
static const StringList * oValues()
const char * c_str() const
Definition: swbuf.h:158
std::list< SWBuf > StringList
Definition: swmodule.cpp:91
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
static const char oName[]
virtual AttributeTypeList & getEntryAttributes() const
Definition: swmodule.h:817
static const char * choices[4]
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
Definition: utilxml.cpp:230
static const char oTip[]
bool isEndTag(const char *eID=0) const
Definition: utilxml.cpp:323
#define SWORD_NAMESPACE_END
Definition: defs.h:40
SWBuf & setFormatted(const char *format,...)
Definition: swbuf.cpp:50
Definition: swkey.h:77