The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
rawstr.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * rawstr.cpp - code for class 'RawStr'- a module that reads raw text
4  * files: ot and nt using indexs ??.bks ??.cps ??.vss
5  * and provides lookup and parsing functions based on
6  * class StrKey
7  *
8  * $Id: rawstr.cpp 3822 2020-11-03 18:54:47Z scribe $
9  *
10  * Copyright 1998-2013 CrossWire Bible Society (http://www.crosswire.org)
11  * CrossWire Bible Society
12  * P. O. Box 2528
13  * Tempe, AZ 85280-2528
14  *
15  * This program is free software; you can redistribute it and/or modify it
16  * under the terms of the GNU General Public License as published by the
17  * Free Software Foundation version 2.
18  *
19  * This program is distributed in the hope that it will be useful, but
20  * WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * General Public License for more details.
23  *
24  */
25 
26 #include <stdio.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 
30 #include <stdlib.h>
31 #include <utilstr.h>
32 #include <rawstr.h>
33 #include <sysdata.h>
34 #include <swlog.h>
35 #include <filemgr.h>
36 #include <swbuf.h>
37 #include <stringmgr.h>
38 
40 
41 /******************************************************************************
42  * RawStr Statics
43  */
44 
45 int RawStr::instance = 0;
46 const char RawStr::nl = '\n';
47 const int RawStr::IDXENTRYSIZE = 6;
48 
49 
50 
51 /******************************************************************************
52  * RawStr Constructor - Initializes data for instance of RawStr
53  *
54  * ENT: ipath - path of the directory where data and index files are located.
55  * be sure to include the trailing separator (e.g. '/' or '\')
56  * (e.g. 'modules/texts/rawtext/webster/')
57  */
58 
59 RawStr::RawStr(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive)
60 {
61  SWBuf buf;
62 
63  lastoff = -1;
64  path = 0;
65  stdstr(&path, ipath);
66 
67  if (fileMode == -1) { // try read/write if possible
68  fileMode = FileMgr::RDWR;
69  }
70 
71  buf.setFormatted("%s.idx", path);
72  idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
73 
74  buf.setFormatted("%s.dat", path);
75  datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
76 
77  if (!datfd || datfd->getFd() < 0) {
78 // couldn't find datafile but this might be fine if we're
79 // merely instantiating a remote InstallMgr SWMgr
80 SWLOGD("Couldn't open file: %s. errno: %d", buf.c_str(), errno);
81  }
82 
83  instance++;
84 }
85 
86 
87 /******************************************************************************
88  * RawStr Destructor - Cleans up instance of RawStr
89  */
90 
92 {
93  if (path)
94  delete [] path;
95 
96  --instance;
97 
100 }
101 
102 
103 /******************************************************************************
104  * RawStr::getidxbufdat - Gets the index string at the given idx offset
105  * NOTE: buf is allocated and must be freed by
106  * calling function
107  *
108  * ENT: ioffset - offset in dat file to lookup
109  * buf - address of pointer to allocate for storage of string
110  */
111 
112 void RawStr::getIDXBufDat(long ioffset, char **buf) const
113 {
114  int size;
115  char ch;
116  if (datfd && datfd->getFd() >= 0) {
117  datfd->seek(ioffset, SEEK_SET);
118  for (size = 0; datfd->read(&ch, 1) == 1; size++) {
119  if ((ch == '\\') || (ch == 10) || (ch == 13))
120  break;
121  }
122  *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
123  if (size) {
124  datfd->seek(ioffset, SEEK_SET);
125  datfd->read(*buf, size);
126  }
127  (*buf)[size] = 0;
128  if (!caseSensitive) toupperstr_utf8(*buf, size*2);
129  }
130  else {
131  *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
132  **buf = 0;
133  }
134 }
135 
136 
137 /******************************************************************************
138  * RawStr::getidxbuf - Gets the index string at the given idx offset
139  * NOTE: buf is allocated and must be freed by
140  * calling function
141  *
142  * ENT: ioffset - offset in idx file to lookup
143  * buf - address of pointer to allocate for storage of string
144  */
145 
146 void RawStr::getIDXBuf(long ioffset, char **buf) const
147 {
148  SW_u32 offset;
149 
150  if (idxfd && idxfd->getFd() >= 0) {
151  idxfd->seek(ioffset, SEEK_SET);
152  idxfd->read(&offset, 4);
153 
154  offset = swordtoarch32(offset);
155 
156  getIDXBufDat(offset, buf);
157  }
158 }
159 
160 
161 /******************************************************************************
162  * RawStr::findoffset - Finds the offset of the key string from the indexes
163  *
164  * ENT: key - key string to lookup
165  * start - address to store the starting offset
166  * size - address to store the size of the entry
167  * away - number of entries before of after to jump
168  * (default = 0)
169  *
170  * RET: error status -1 general error; -2 new file
171  */
172 
173 signed char RawStr::findOffset(const char *ikey, SW_u32 *start, SW_u16 *size, long away, SW_u32 *idxoff) const
174 {
175  char *trybuf, *maxbuf, *key = 0, quitflag = 0;
176  signed char retval = -1;
177  long headoff, tailoff, tryoff = 0, maxoff = 0;
178  int diff = 0;
179  bool awayFromSubstrCheck = false;
180 
181  if (idxfd->getFd() >=0) {
182  tailoff = maxoff = idxfd->seek(0, SEEK_END) - 6;
183  retval = (tailoff >= 0) ? 0 : -2; // if NOT new file
184  if (*ikey && retval != -2) {
185  headoff = 0;
186 
187  stdstr(&key, ikey, 3);
188  if (!caseSensitive) toupperstr_utf8(key, (unsigned int)(strlen(key)*3));
189 
190  int keylen = (int)strlen(key);
191  bool substr = false;
192 
193  trybuf = maxbuf = 0;
194  getIDXBuf(maxoff, &maxbuf);
195 
196  while (headoff < tailoff) {
197  tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff;
198  lastoff = -1;
199  getIDXBuf(tryoff, &trybuf);
200 
201  if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
202  tryoff += (tryoff > (maxoff / 2))?-6:6;
203  retval = -1;
204  break;
205  }
206 
207  diff = strcmp(key, trybuf);
208 
209  if (!diff)
210  break;
211 
212  if (!strncmp(trybuf, key, keylen)) substr = true;
213 
214  if (diff < 0)
215  tailoff = (tryoff == headoff) ? headoff : tryoff;
216  else headoff = tryoff;
217 
218  if (tailoff == headoff + 6) {
219  if (quitflag++)
220  headoff = tailoff;
221  }
222  }
223 
224  // didn't find exact match
225  if (headoff >= tailoff) {
226  tryoff = headoff;
227  if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
228  awayFromSubstrCheck = true;
229  away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
230  }
231  }
232  if (trybuf)
233  free(trybuf);
234  delete [] key;
235  if (maxbuf)
236  free(maxbuf);
237  }
238  else tryoff = 0;
239 
240  idxfd->seek(tryoff, SEEK_SET);
241 
242  SW_u32 tmpStart;
243  SW_u16 tmpSize;
244  *start = *size = tmpStart = tmpSize = 0;
245  idxfd->read(&tmpStart, 4);
246  idxfd->read(&tmpSize, 2);
247  if (idxoff)
248  *idxoff = (SW_u32)tryoff;
249 
250  *start = swordtoarch32(tmpStart);
251  *size = swordtoarch16(tmpSize);
252 
253  while (away) {
254  unsigned long laststart = *start;
255  unsigned short lastsize = *size;
256  long lasttry = tryoff;
257  tryoff += (away > 0) ? 6 : -6;
258 
259  bool bad = false;
260  if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6)))
261  bad = true;
262  else if (idxfd->seek(tryoff, SEEK_SET) < 0)
263  bad = true;
264  if (bad) {
265  if(!awayFromSubstrCheck)
266  retval = -1;
267  *start = (SW_u32)laststart;
268  *size = lastsize;
269  tryoff = lasttry;
270  if (idxoff)
271  *idxoff = (SW_u32)tryoff;
272  break;
273  }
274  idxfd->read(&tmpStart, 4);
275  idxfd->read(&tmpSize, 2);
276  if (idxoff)
277  *idxoff = (SW_u32)tryoff;
278 
279  *start = swordtoarch32(tmpStart);
280  *size = swordtoarch16(tmpSize);
281 
282  if (((laststart != *start) || (lastsize != *size)) && (*size))
283  away += (away < 0) ? 1 : -1;
284  }
285 
286  lastoff = tryoff;
287  }
288  else {
289  *start = 0;
290  *size = 0;
291  if (idxoff)
292  *idxoff = 0;
293  retval = -1;
294  }
295  return retval;
296 }
297 
298 
299 /******************************************************************************
300  * RawStr::readtext - gets text at a given offset
301  *
302  * ENT:
303  * start - starting offset where the text is located in the file
304  * size - size of text entry
305  * buf - buffer to store text
306  *
307  */
308 
309 void RawStr::readText(SW_u32 istart, SW_u16 *isize, char **idxbuf, SWBuf &buf) const
310 {
311  unsigned int ch;
312  char *idxbuflocal = 0;
313  getIDXBufDat(istart, &idxbuflocal);
314  SW_u32 start = istart;
315 
316  do {
317  if (*idxbuf)
318  delete [] *idxbuf;
319 
320  buf = "";
321  buf.setFillByte(0);
322  buf.setSize(++(*isize));
323 
324  *idxbuf = new char [ (*isize) ];
325 
326  datfd->seek(start, SEEK_SET);
327  datfd->read(buf.getRawData(), (int)((*isize) - 1));
328 
329  for (ch = 0; buf[ch]; ch++) { // skip over index string
330  if (buf[ch] == 10) {
331  ch++;
332  break;
333  }
334  }
335  buf = SWBuf(buf.c_str()+ch);
336  // resolve link
337  if (!strncmp(buf.c_str(), "@LINK", 5)) {
338  for (ch = 0; buf[ch]; ch++) { // null before nl
339  if (buf[ch] == 10) {
340  buf[ch] = 0;
341  break;
342  }
343  }
344  findOffset(buf.c_str() + 6, &start, isize);
345  }
346  else break;
347  }
348  while (true); // while we're resolving links
349 
350  if (idxbuflocal) {
351  int localsize = (int)strlen(idxbuflocal);
352  localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1);
353  strncpy(*idxbuf, idxbuflocal, localsize);
354  (*idxbuf)[localsize] = 0;
355  free(idxbuflocal);
356  }
357 }
358 
359 
360 /******************************************************************************
361  * RawLD::settext - Sets text for current offset
362  *
363  * ENT: key - key for this entry
364  * buf - buffer to store
365  * len - length of buffer (0 - null terminated)
366  */
367 
368 void RawStr::doSetText(const char *ikey, const char *buf, long len)
369 {
370 
371  SW_u32 start, outstart;
372  SW_u32 idxoff;
373  SW_u32 endoff;
374  SW_s32 shiftSize;
375  SW_u16 size;
376  SW_u16 outsize;
377  char *tmpbuf = 0;
378  char *key = 0;
379  char *dbKey = 0;
380  char *idxBytes = 0;
381  char *outbuf = 0;
382  char *ch = 0;
383 
384  char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff);
385  stdstr(&key, ikey, 2);
386  if (!caseSensitive) toupperstr_utf8(key, (unsigned int)(strlen(key)*2));
387 
388  len = (len < 0) ? strlen(buf) : len;
389 
390  getIDXBufDat(start, &dbKey);
391 
392  if (strcmp(key, dbKey) < 0) {
393  }
394  else if (strcmp(key, dbKey) > 0) {
395  if (errorStatus != (char)-2) // not a new file
396  idxoff += 6;
397  else idxoff = 0;
398  }
399  else if ((!strcmp(key, dbKey)) && (len>0 /*we're not deleting*/)) { // got absolute entry
400  do {
401  tmpbuf = new char [ size + 2 ];
402  memset(tmpbuf, 0, size + 2);
403  datfd->seek(start, SEEK_SET);
404  datfd->read(tmpbuf, (int)(size - 1));
405 
406  for (ch = tmpbuf; *ch; ch++) { // skip over index string
407  if (*ch == 10) {
408  ch++;
409  break;
410  }
411  }
412  memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf));
413 
414  // resolve link
415  if (!strncmp(tmpbuf, "@LINK", 5) && (len)) {
416  for (ch = tmpbuf; *ch; ch++) { // null before nl
417  if (*ch == 10) {
418  *ch = 0;
419  break;
420  }
421  }
422  findOffset(tmpbuf + 6, &start, &size, 0, &idxoff);
423  }
424  else break;
425  }
426  while (true); // while we're resolving links
427  }
428 
429  endoff = (SW_u32)idxfd->seek(0, SEEK_END);
430 
431  shiftSize = endoff - idxoff;
432 
433  if (shiftSize > 0) {
434  idxBytes = new char [ shiftSize ];
435  idxfd->seek(idxoff, SEEK_SET);
436  idxfd->read(idxBytes, shiftSize);
437  }
438 
439  outbuf = new char [ len + strlen(key) + 5 ];
440  sprintf(outbuf, "%s%c%c", key, 13, 10);
441  size = strlen(outbuf);
442  memcpy(outbuf + size, buf, len);
443  size = outsize = size + (len);
444 
445  start = outstart = (SW_u32)datfd->seek(0, SEEK_END);
446 
447  outstart = archtosword32(start);
448  outsize = archtosword16(size);
449 
450  idxfd->seek(idxoff, SEEK_SET);
451  if (len > 0) {
452  datfd->seek(start, SEEK_SET);
453  datfd->write(outbuf, (int)size);
454 
455  // add a new line to make data file easier to read in an editor
456  datfd->write(&nl, 1);
457 
458  idxfd->write(&outstart, 4);
459  idxfd->write(&outsize, 2);
460  if (idxBytes) {
461  idxfd->write(idxBytes, shiftSize);
462  delete [] idxBytes;
463  }
464  }
465  else { // delete entry
466  if (idxBytes) {
467  idxfd->write(idxBytes+6, shiftSize-6);
468  idxfd->seek(-1, SEEK_CUR); // last valid byte
469  FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
470  delete [] idxBytes;
471  }
472  }
473 
474  delete [] key;
475  delete [] outbuf;
476  free(dbKey);
477 }
478 
479 
480 /******************************************************************************
481  * RawLD::linkentry - links one entry to another
482  *
483  * ENT: testmt - testament to find (0 - Bible/module introduction)
484  * destidxoff - dest offset into .vss
485  * srcidxoff - source offset into .vss
486  */
487 
488 void RawStr::doLinkEntry(const char *destkey, const char *srckey) {
489  char *text = new char [ strlen(destkey) + 7 ];
490  sprintf(text, "@LINK %s", destkey);
491  doSetText(srckey, text);
492  delete [] text;
493 }
494 
495 /******************************************************************************
496  * RawLD::CreateModule - Creates new module files
497  *
498  * ENT: path - directory to store module files
499  * RET: error status
500  */
501 
502 signed char RawStr::createModule(const char *ipath)
503 {
504  char *path = 0;
505  char *buf = new char [ strlen (ipath) + 20 ];
506  FileDesc *fd, *fd2;
507 
508  stdstr(&path, ipath);
509 
510  if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
511  path[strlen(path)-1] = 0;
512 
513  sprintf(buf, "%s.dat", path);
514  FileMgr::removeFile(buf);
516  fd->getFd();
518 
519  sprintf(buf, "%s.idx", path);
520  FileMgr::removeFile(buf);
522  fd2->getFd();
524 
525  delete [] path;
526 
527  return 0;
528 }
529 
void setFillByte(char ch)
Definition: swbuf.h:146
#define SWORD_NAMESPACE_START
Definition: defs.h:39
long seek(long offset, int whence)
Definition: filemgr.cpp:143
long lastoff
Definition: rawstr.h:41
static int instance
Definition: rawstr.h:38
FileDesc * open(const char *path, int mode, bool tryDowngrade)
Definition: filemgr.cpp:175
Definition: swbuf.h:47
#define SEEK_CUR
Definition: zconf.h:245
#define archtosword32(x)
Definition: sysdata.h:97
char * path
Definition: rawstr.h:39
#define SEEK_END
Definition: zconf.h:246
static unsigned int RDWR
Definition: filemgr.h:76
signed int SW_s32
Definition: sysdata.h:40
void doLinkEntry(const char *destkey, const char *srckey)
Definition: rawstr.cpp:488
static const char nl
Definition: rawstr.h:52
int getFd()
Definition: filemgr.h:231
long write(const void *buf, long count)
Definition: filemgr.cpp:153
signed char trunc(FileDesc *file)
Definition: filemgr.cpp:256
char * toupperstr_utf8(char *t, unsigned int max=0)
Definition: stringmgr.h:118
void doSetText(const char *key, const char *buf, long len=-1)
Definition: rawstr.cpp:368
SWORD_NAMESPACE_START char * stdstr(char **ipstr, const char *istr, unsigned int memPadFactor=1)
Definition: utilstr.h:44
void close(FileDesc *file)
Definition: filemgr.cpp:196
char * malloc()
char * getRawData()
Definition: swbuf.h:379
free(preg->fastmap)
const char * c_str() const
Definition: swbuf.h:158
static int removeFile(const char *fName)
Definition: filemgr.cpp:517
char * realloc()
RawStr(const char *ipath, int fileMode=-1, bool caseSensitive=false)
Definition: rawstr.cpp:59
FileDesc * idxfd
Definition: rawstr.h:45
#define swordtoarch32(x)
Definition: sysdata.h:94
unsigned short SW_u16
Definition: sysdata.h:38
virtual ~RawStr()
Definition: rawstr.cpp:91
static const int IDXENTRYSIZE
Definition: rawstr.h:49
#define SEEK_SET
Definition: zconf.h:244
int size
Definition: regex.c:5043
static unsigned int CREAT
Definition: filemgr.h:72
FileDesc * datfd
Definition: rawstr.h:46
#define swordtoarch16(x)
Definition: sysdata.h:93
unsigned int SW_u32
Definition: sysdata.h:41
void readText(SW_u32 start, SW_u16 *size, char **idxbuf, SWBuf &buf) const
Definition: rawstr.cpp:309
static unsigned int IWRITE
Definition: filemgr.h:79
static unsigned int WRONLY
Definition: filemgr.h:77
long read(void *buf, long count)
Definition: filemgr.cpp:148
static unsigned int IREAD
Definition: filemgr.h:78
#define SWORD_NAMESPACE_END
Definition: defs.h:40
void getIDXBufDat(long ioffset, char **buf) const
Definition: rawstr.cpp:112
#define SWLOGD(...)
Definition: defs.h:187
signed char findOffset(const char *key, SW_u32 *start, SW_u16 *size, long away=0, SW_u32 *idxoff=0) const
Definition: rawstr.cpp:173
void getIDXBuf(long ioffset, char **buf) const
Definition: rawstr.cpp:146
SWBuf & setFormatted(const char *format,...)
Definition: swbuf.cpp:50
#define archtosword16(x)
Definition: sysdata.h:96
static signed char createModule(const char *path)
Definition: rawstr.cpp:502
void setSize(unsigned long len)
Definition: swbuf.h:255
bool caseSensitive
Definition: rawstr.h:40
static FileMgr * getSystemFileMgr()
Definition: filemgr.cpp:101