The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
imp2gbs.cpp File Reference
#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <entriesblk.h>
#include <iostream>
#include <treekeyidx.h>
#include <rawgenbook.h>
#include <utilstr.h>
#include <filemgr.h>
#include <utf8greekaccents.h>
#include <stringmgr.h>
+ Include dependency graph for imp2gbs.cpp:

Go to the source code of this file.

Functions

int main (int argc, char **argv)
 
void parseParams (int argc, char **argv)
 
void usage (const char *app)
 
void writeEntry (SWModule *book, SWBuf keyBuffer, SWBuf entBuffer)
 

Variables

bool augEnt = true
 
bool augMod = false
 
UTF8GreekAccents greekAccentsFilter
 
bool greekFilter = false
 
SWBuf inFile
 
int lexLevels = 0
 
SWBuf outPath
 
bool toUpper = false
 

Function Documentation

int main ( int  argc,
char **  argv 
)

Definition at line 222 of file imp2gbs.cpp.

222  {
223  greekAccentsFilter.setOptionValue("Off"); // off = accents off
224  parseParams(argc, argv);
225 
226  // Let's see if we can open our input file
228  if (fd->getFd() < 0) {
229  fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], inFile.c_str());
230  exit(-2);
231  }
232 
233  RawGenBook *book;
234 
235  // Do some initialization stuff
236  if (!augMod) {
238  }
239  book = new RawGenBook(outPath);
240 
241  SWBuf lineBuffer;
242  SWBuf keyBuffer;
243  SWBuf entBuffer;
244 
245  bool more = true;
246  do {
247  more = FileMgr::getLine(fd, lineBuffer)!=0;
248  if (lineBuffer.startsWith("$$$")) {
249  if ((keyBuffer.size()) && (entBuffer.size())) {
250  writeEntry(book, keyBuffer, entBuffer);
251  }
252  keyBuffer = lineBuffer;
253  keyBuffer << 3;
254  keyBuffer.trim();
255  entBuffer.size(0);
256  }
257  else {
258  if (keyBuffer.size()) {
259  entBuffer += lineBuffer;
260  entBuffer += "\n";
261  }
262  }
263  } while (more);
264  if ((keyBuffer.size()) && (entBuffer.size())) {
265  writeEntry(book, keyBuffer, entBuffer);
266  }
267 
268  delete book;
269 
270  FileMgr::getSystemFileMgr()->close(fd);
271 
272  return 0;
273 }
static unsigned int RDONLY
Definition: filemgr.h:75
static char createModule(const char *ipath)
Definition: rawgenbook.cpp:191
int getFd()
Definition: filemgr.h:231
bool augMod
Definition: imp2gbs.cpp:59
void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer)
Definition: imp2gbs.cpp:131
virtual void setOptionValue(const char *ival)
Definition: swoptfilter.cpp:52
void parseParams(int argc, char **argv)
Definition: imp2gbs.cpp:80
static char getLine(FileDesc *fDesc, SWBuf &line)
Definition: filemgr.cpp:527
SWBuf inFile
Definition: imp2gbs.cpp:56
SWBuf outPath
Definition: imp2gbs.cpp:55
UTF8GreekAccents greekAccentsFilter
Definition: imp2gbs.cpp:62
static FileMgr * getSystemFileMgr()
Definition: filemgr.cpp:101
void parseParams ( int  argc,
char **  argv 
)

Definition at line 80 of file imp2gbs.cpp.

80  {
81 
82  if (argc < 2) {
83  usage(*argv);
84  }
85 
86  inFile = argv[1];
87 
88  for (int i = 2; i < argc; i++) {
89  if (!strcmp(argv[i], "-o")) {
90  if ((i+1 < argc) && (argv[i+1][0] != '-')) {
91  outPath = argv[i+1];
92  i++;
93  }
94  else usage(*argv);
95  }
96  else if (!strcmp(argv[i], "-U")) {
98  toUpper = true;
99  }
100  else {
101  fprintf(stderr, "Error: %s. Cannot reliably toUpper without UTF8 support\n\t(recompile with ICU enabled)\n\n", *argv);
102  usage(*argv);
103  }
104  }
105  else if (!strcmp(argv[i], "-g")) {
106  greekFilter = true;
107  }
108  else if (!strcmp(argv[i], "-O")) {
109  augEnt = false;
110  }
111  else if (!strcmp(argv[i], "-a")) {
112  augMod = true;
113  }
114  else if (!strcmp(argv[i], "-l")) {
115  if (i+1 < argc) {
116  lexLevels = atoi(argv[i+1]);
117  i++;
118  }
119  if (!lexLevels) usage(*argv);
120  }
121  }
122  if (!outPath.size()) {
123  outPath = inFile;
124  unsigned int i;
125  for (i = 0; (i < outPath.size() && outPath[i] != '.'); i++);
126  outPath.size(i);
127  }
128 }
static bool hasUTF8Support()
Definition: stringmgr.h:58
bool augEnt
Definition: imp2gbs.cpp:60
bool augMod
Definition: imp2gbs.cpp:59
void usage(const char *app)
Definition: imp2gbs.cpp:65
int lexLevels
Definition: imp2gbs.cpp:61
bool greekFilter
Definition: imp2gbs.cpp:58
SWBuf inFile
Definition: imp2gbs.cpp:56
SWBuf outPath
Definition: imp2gbs.cpp:55
bool toUpper
Definition: imp2gbs.cpp:57
void usage ( const char *  app)

Definition at line 65 of file imp2gbs.cpp.

65  {
66  fprintf(stderr, "imp2gbs 1.0 General Book module creation tool for the SWORD Project\n\n");
67  fprintf(stderr, "usage: %s <inFile> [OPTIONS]\n", app);
68  fprintf(stderr, "\t-o <outPath>\n\t\tSpecify an output Path other than inFile location.\n");
69  fprintf(stderr, "\t-a\n\t\tAugment Module [default: create new]\n");
70  fprintf(stderr, "\t-O\n\t\tOverwrite entries of same key [default: append to]\n");
71  fprintf(stderr, "\t-U\n\t\tKey filter: Convert toUpper\n");
72  fprintf(stderr, "\t-g\n\t\tKey filter: Strip Greek diacritics\n");
73  fprintf(stderr, "\t-l <levels>\n\t\tKey filter: Pseudo-Lexicon n-level generation using first character\n");
74  fprintf(stderr, "\t\te.g. -l 2 \"Abbey\" -> \"A/AB/Abbey\"\n");
75  fprintf(stderr, "\n");
76  exit (-1);
77 }
void writeEntry ( SWModule book,
SWBuf  keyBuffer,
SWBuf  entBuffer 
)

Definition at line 131 of file imp2gbs.cpp.

131  {
132 
133 
134  if (greekFilter) {
135  greekAccentsFilter.processText(keyBuffer);
136  }
137 
138  if (toUpper) {
139  unsigned size = (keyBuffer.size()+5)*3;
140  keyBuffer.setFillByte(0);
141  keyBuffer.resize(size);
142  StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2);
143  }
144 
145 // Added for Hesychius, but this stuff should be pushed back into new StringMgr
146 // functionality
147 #ifdef _ICU_
148 // if (lexLevels) {
149  if (lexLevels && !keyBuffer.startsWith("/Intro")) {
150  unsigned size = (keyBuffer.size()+(lexLevels*2));
151  keyBuffer.setFillByte(0);
152  keyBuffer.resize(size);
153 
154  UErrorCode err = U_ZERO_ERROR;
155 
156  int max = (size+5)*3;
157  UChar *ubuffer = new UChar[max+10];
158  int32_t len;
159 
160  u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
161  if (err == U_ZERO_ERROR) {
162  UChar *upper = new UChar[(lexLevels+1)*3];
163  memcpy(upper, ubuffer, lexLevels*sizeof(UChar));
164  upper[lexLevels] = 0;
165  len = u_strToUpper(upper, (lexLevels+1)*3, upper, -1, 0, &err);
166  memmove(ubuffer+len+1, ubuffer, (max-len)*sizeof(UChar));
167  memcpy(ubuffer, upper, len*sizeof(UChar));
168  ubuffer[len] = '/';
169  delete [] upper;
170 
171  int totalShift = 0;
172  for (int i = lexLevels-1; i; i--) {
173  int shift = (i < len)? i : len;
174  memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
175  ubuffer[shift] = '/';
176  totalShift += (shift+1);
177  }
178  u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
179  }
180 
181 /*
182  u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
183  if (err == U_ZERO_ERROR) {
184  int totalShift = 0;
185  for (int i = lexLevels; i; i--) {
186  int shift = (i < len)? i : len;
187  memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
188  ubuffer[shift] = '/';
189  totalShift += (shift+1);
190  }
191  UChar *upper = new UChar[(totalShift+1)*3];
192  memcpy(upper, ubuffer, totalShift*sizeof(UChar));
193  upper[totalShift] = 0;
194  len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err);
195  memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar));
196  memcpy(ubuffer, upper, len*sizeof(UChar));
197  delete [] upper;
198  u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
199  }
200 */
201 
202  delete [] ubuffer;
203  }
204 #endif
205 
206  std::cout << keyBuffer << std::endl;
207 
208  book->setKey(keyBuffer.c_str());
209 
210  // check to see if we already have an entry
211  for (int i = 2; book->getKey()->popError() != KEYERR_OUTOFBOUNDS; i++) {
212  SWBuf key;
213  key.setFormatted("%s {%d}", keyBuffer.c_str(), i);
214  std::cout << "dup key, trying: " << key << std::endl;
215  book->setKey(key.c_str());
216  }
217 
218  book->setEntry(entBuffer);
219 }
static StringMgr * getSystemStringMgr()
Definition: stringmgr.cpp:197
virtual char * upperUTF8(char *text, unsigned int max=0) const
Definition: stringmgr.cpp:223
#define KEYERR_OUTOFBOUNDS
Definition: swkey.h:35
int size
Definition: regex.c:5043
int lexLevels
Definition: imp2gbs.cpp:61
bool greekFilter
Definition: imp2gbs.cpp:58
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
UTF8GreekAccents greekAccentsFilter
Definition: imp2gbs.cpp:62
bool toUpper
Definition: imp2gbs.cpp:57

Variable Documentation

bool augEnt = true

Definition at line 60 of file imp2gbs.cpp.

bool augMod = false

Definition at line 59 of file imp2gbs.cpp.

UTF8GreekAccents greekAccentsFilter

Definition at line 62 of file imp2gbs.cpp.

bool greekFilter = false

Definition at line 58 of file imp2gbs.cpp.

SWBuf inFile

Definition at line 56 of file imp2gbs.cpp.

int lexLevels = 0

Definition at line 61 of file imp2gbs.cpp.

SWBuf outPath

Definition at line 55 of file imp2gbs.cpp.

bool toUpper = false

Definition at line 57 of file imp2gbs.cpp.