The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
imp2gbs.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * imp2gbs.cpp - Utility to import GenBooks in IMP format
4  *
5  * $Id: imp2gbs.cpp 3403 2016-02-09 16:53:41Z dmsmith $
6  *
7  * Copyright 2002-2013 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #ifdef _MSC_VER
24  #pragma warning( disable: 4251 )
25 #endif
26 
27 #include <ctype.h>
28 #include <stdio.h>
29 #include <errno.h>
30 #include <stdlib.h>
31 
32 #include <entriesblk.h>
33 #include <iostream>
34 #include <treekeyidx.h>
35 #include <rawgenbook.h>
36 #include <utilstr.h>
37 #include <filemgr.h>
38 #include <utf8greekaccents.h>
39 #include <stringmgr.h>
40 
41 #ifdef _ICU_
42 #include <unicode/utypes.h>
43 #include <unicode/ucnv.h>
44 #include <unicode/ustring.h>
45 #include <unicode/uchar.h>
46 #include <unicode/unistr.h>
47 #include <unicode/translit.h>
48 #include <unicode/locid.h>
49 #endif
50 
51 #ifndef NO_SWORD_NAMESPACE
52 using namespace sword;
53 #endif
54 
55 SWBuf outPath;
56 SWBuf inFile;
57 bool toUpper = false;
58 bool greekFilter = false;
59 bool augMod = false;
60 bool augEnt = true;
61 int lexLevels = 0;
63 
64 
65 void usage(const char *app) {
66  fprintf(stderr, "imp2gbs 1.0 General Book module creation tool for the SWORD Project\n\n");
67  fprintf(stderr, "usage: %s <inFile> [OPTIONS]\n", app);
68  fprintf(stderr, "\t-o <outPath>\n\t\tSpecify an output Path other than inFile location.\n");
69  fprintf(stderr, "\t-a\n\t\tAugment Module [default: create new]\n");
70  fprintf(stderr, "\t-O\n\t\tOverwrite entries of same key [default: append to]\n");
71  fprintf(stderr, "\t-U\n\t\tKey filter: Convert toUpper\n");
72  fprintf(stderr, "\t-g\n\t\tKey filter: Strip Greek diacritics\n");
73  fprintf(stderr, "\t-l <levels>\n\t\tKey filter: Pseudo-Lexicon n-level generation using first character\n");
74  fprintf(stderr, "\t\te.g. -l 2 \"Abbey\" -> \"A/AB/Abbey\"\n");
75  fprintf(stderr, "\n");
76  exit (-1);
77 }
78 
79 
80 void parseParams(int argc, char **argv) {
81 
82  if (argc < 2) {
83  usage(*argv);
84  }
85 
86  inFile = argv[1];
87 
88  for (int i = 2; i < argc; i++) {
89  if (!strcmp(argv[i], "-o")) {
90  if ((i+1 < argc) && (argv[i+1][0] != '-')) {
91  outPath = argv[i+1];
92  i++;
93  }
94  else usage(*argv);
95  }
96  else if (!strcmp(argv[i], "-U")) {
98  toUpper = true;
99  }
100  else {
101  fprintf(stderr, "Error: %s. Cannot reliably toUpper without UTF8 support\n\t(recompile with ICU enabled)\n\n", *argv);
102  usage(*argv);
103  }
104  }
105  else if (!strcmp(argv[i], "-g")) {
106  greekFilter = true;
107  }
108  else if (!strcmp(argv[i], "-O")) {
109  augEnt = false;
110  }
111  else if (!strcmp(argv[i], "-a")) {
112  augMod = true;
113  }
114  else if (!strcmp(argv[i], "-l")) {
115  if (i+1 < argc) {
116  lexLevels = atoi(argv[i+1]);
117  i++;
118  }
119  if (!lexLevels) usage(*argv);
120  }
121  }
122  if (!outPath.size()) {
123  outPath = inFile;
124  unsigned int i;
125  for (i = 0; (i < outPath.size() && outPath[i] != '.'); i++);
126  outPath.size(i);
127  }
128 }
129 
130 
131 void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer) {
132 
133 
134  if (greekFilter) {
135  greekAccentsFilter.processText(keyBuffer);
136  }
137 
138  if (toUpper) {
139  unsigned size = (keyBuffer.size()+5)*3;
140  keyBuffer.setFillByte(0);
141  keyBuffer.resize(size);
142  StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2);
143  }
144 
145 // Added for Hesychius, but this stuff should be pushed back into new StringMgr
146 // functionality
147 #ifdef _ICU_
148 // if (lexLevels) {
149  if (lexLevels && !keyBuffer.startsWith("/Intro")) {
150  unsigned size = (keyBuffer.size()+(lexLevels*2));
151  keyBuffer.setFillByte(0);
152  keyBuffer.resize(size);
153 
154  UErrorCode err = U_ZERO_ERROR;
155 
156  int max = (size+5)*3;
157  UChar *ubuffer = new UChar[max+10];
158  int32_t len;
159 
160  u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
161  if (err == U_ZERO_ERROR) {
162  UChar *upper = new UChar[(lexLevels+1)*3];
163  memcpy(upper, ubuffer, lexLevels*sizeof(UChar));
164  upper[lexLevels] = 0;
165  len = u_strToUpper(upper, (lexLevels+1)*3, upper, -1, 0, &err);
166  memmove(ubuffer+len+1, ubuffer, (max-len)*sizeof(UChar));
167  memcpy(ubuffer, upper, len*sizeof(UChar));
168  ubuffer[len] = '/';
169  delete [] upper;
170 
171  int totalShift = 0;
172  for (int i = lexLevels-1; i; i--) {
173  int shift = (i < len)? i : len;
174  memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
175  ubuffer[shift] = '/';
176  totalShift += (shift+1);
177  }
178  u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
179  }
180 
181 /*
182  u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
183  if (err == U_ZERO_ERROR) {
184  int totalShift = 0;
185  for (int i = lexLevels; i; i--) {
186  int shift = (i < len)? i : len;
187  memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
188  ubuffer[shift] = '/';
189  totalShift += (shift+1);
190  }
191  UChar *upper = new UChar[(totalShift+1)*3];
192  memcpy(upper, ubuffer, totalShift*sizeof(UChar));
193  upper[totalShift] = 0;
194  len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err);
195  memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar));
196  memcpy(ubuffer, upper, len*sizeof(UChar));
197  delete [] upper;
198  u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
199  }
200 */
201 
202  delete [] ubuffer;
203  }
204 #endif
205 
206  std::cout << keyBuffer << std::endl;
207 
208  book->setKey(keyBuffer.c_str());
209 
210  // check to see if we already have an entry
211  for (int i = 2; book->getKey()->popError() != KEYERR_OUTOFBOUNDS; i++) {
212  SWBuf key;
213  key.setFormatted("%s {%d}", keyBuffer.c_str(), i);
214  std::cout << "dup key, trying: " << key << std::endl;
215  book->setKey(key.c_str());
216  }
217 
218  book->setEntry(entBuffer);
219 }
220 
221 
222 int main(int argc, char **argv) {
223  greekAccentsFilter.setOptionValue("Off"); // off = accents off
224  parseParams(argc, argv);
225 
226  // Let's see if we can open our input file
228  if (fd->getFd() < 0) {
229  fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], inFile.c_str());
230  exit(-2);
231  }
232 
233  RawGenBook *book;
234 
235  // Do some initialization stuff
236  if (!augMod) {
238  }
239  book = new RawGenBook(outPath);
240 
241  SWBuf lineBuffer;
242  SWBuf keyBuffer;
243  SWBuf entBuffer;
244 
245  bool more = true;
246  do {
247  more = FileMgr::getLine(fd, lineBuffer)!=0;
248  if (lineBuffer.startsWith("$$$")) {
249  if ((keyBuffer.size()) && (entBuffer.size())) {
250  writeEntry(book, keyBuffer, entBuffer);
251  }
252  keyBuffer = lineBuffer;
253  keyBuffer << 3;
254  keyBuffer.trim();
255  entBuffer.size(0);
256  }
257  else {
258  if (keyBuffer.size()) {
259  entBuffer += lineBuffer;
260  entBuffer += "\n";
261  }
262  }
263  } while (more);
264  if ((keyBuffer.size()) && (entBuffer.size())) {
265  writeEntry(book, keyBuffer, entBuffer);
266  }
267 
268  delete book;
269 
270  FileMgr::getSystemFileMgr()->close(fd);
271 
272  return 0;
273 }
274 
275 
276 
static unsigned int RDONLY
Definition: filemgr.h:75
static char createModule(const char *ipath)
Definition: rawgenbook.cpp:191
static bool hasUTF8Support()
Definition: stringmgr.h:58
int main(int argc, char **argv)
Definition: addcomment.cpp:32
int getFd()
Definition: filemgr.h:231
static StringMgr * getSystemStringMgr()
Definition: stringmgr.cpp:197
virtual char * upperUTF8(char *text, unsigned int max=0) const
Definition: stringmgr.cpp:223
bool augEnt
Definition: imp2gbs.cpp:60
bool augMod
Definition: imp2gbs.cpp:59
void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer)
Definition: imp2gbs.cpp:131
#define KEYERR_OUTOFBOUNDS
Definition: swkey.h:35
void usage(const char *app)
Definition: imp2gbs.cpp:65
int size
Definition: regex.c:5043
int lexLevels
Definition: imp2gbs.cpp:61
bool greekFilter
Definition: imp2gbs.cpp:58
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
virtual void setOptionValue(const char *ival)
Definition: swoptfilter.cpp:52
void parseParams(int argc, char **argv)
Definition: imp2gbs.cpp:80
static char getLine(FileDesc *fDesc, SWBuf &line)
Definition: filemgr.cpp:527
SWBuf inFile
Definition: imp2gbs.cpp:56
SWBuf outPath
Definition: imp2gbs.cpp:55
UTF8GreekAccents greekAccentsFilter
Definition: imp2gbs.cpp:62
bool toUpper
Definition: imp2gbs.cpp:57
static FileMgr * getSystemFileMgr()
Definition: filemgr.cpp:101