The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
rawstr4.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * rawstr4.cpp - code for class 'RawStr'- a module that reads raw text
4  * files: ot and nt using indexs ??.bks ??.cps ??.vss
5  * and provides lookup and parsing functions based on
6  * class StrKey
7  *
8  * $Id: rawstr4.cpp 3822 2020-11-03 18:54:47Z scribe $
9  *
10  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
11  * CrossWire Bible Society
12  * P. O. Box 2528
13  * Tempe, AZ 85280-2528
14  *
15  * This program is free software; you can redistribute it and/or modify it
16  * under the terms of the GNU General Public License as published by the
17  * Free Software Foundation version 2.
18  *
19  * This program is distributed in the hope that it will be useful, but
20  * WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * General Public License for more details.
23  *
24  */
25 
26 #include <stdio.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 #include <stdlib.h>
30 #include <sys/types.h>
31 
32 #include <utilstr.h>
33 #include <rawstr4.h>
34 #include <sysdata.h>
35 #include <swlog.h>
36 #include <filemgr.h>
37 #include <swbuf.h>
38 #include <stringmgr.h>
39 
41 
42 /******************************************************************************
43  * RawStr Statics
44  */
45 
46 int RawStr4::instance = 0;
47 const char RawStr4::nl = '\n';
48 const int RawStr4::IDXENTRYSIZE = 8;
49 
50 
51 /******************************************************************************
52  * RawStr Constructor - Initializes data for instance of RawStr
53  *
54  * ENT: ipath - path of the directory where data and index files are located.
55  * be sure to include the trailing separator (e.g. '/' or '\')
56  * (e.g. 'modules/texts/rawtext/webster/')
57  */
58 
59 RawStr4::RawStr4(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive)
60 {
61  SWBuf buf;
62 
63  lastoff = -1;
64  path = 0;
65  stdstr(&path, ipath);
66 
67  if (fileMode == -1) { // try read/write if possible
68  fileMode = FileMgr::RDWR;
69  }
70 
71  buf.setFormatted("%s.idx", path);
72  idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
73 
74  buf.setFormatted("%s.dat", path);
75  datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
76 
77  if (!datfd || datfd->getFd() < 0) {
78 // couldn't find datafile but this might be fine if we're
79 // merely instantiating a remote InstallMgr SWMgr
80 SWLOGD("Couldn't open file: %s. errno: %d", buf.c_str(), errno);
81  }
82 
83  instance++;
84 }
85 
86 
87 /******************************************************************************
88  * RawStr Destructor - Cleans up instance of RawStr
89  */
90 
92 {
93  if (path)
94  delete [] path;
95 
96  --instance;
97 
100 }
101 
102 
103 /******************************************************************************
104  * RawStr4::getidxbufdat - Gets the index string at the given idx offset
105  * NOTE: buf is allocated and must be freed by
106  * calling function
107  *
108  * ENT: ioffset - offset in dat file to lookup
109  * buf - address of pointer to allocate for storage of string
110  */
111 
112 void RawStr4::getIDXBufDat(long ioffset, char **buf) const
113 {
114  int size;
115  char ch;
116  if ((unsigned long)datfd > 0) {
117  datfd->seek(ioffset, SEEK_SET);
118  for (size = 0; datfd->read(&ch, 1) == 1; size++) {
119  if ((ch == '\\') || (ch == 10) || (ch == 13))
120  break;
121  }
122  *buf = (*buf) ? (char *)realloc(*buf, size*2 + 1) : (char *)malloc(size*2 + 1);
123  if (size) {
124  datfd->seek(ioffset, SEEK_SET);
125  datfd->read(*buf, size);
126  }
127  (*buf)[size] = 0;
128  if (!caseSensitive) toupperstr_utf8(*buf, size*2);
129  }
130  else {
131  *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1);
132  **buf = 0;
133  }
134 }
135 
136 
137 /******************************************************************************
138  * RawStr4::getidxbuf - Gets the index string at the given idx offset
139  * NOTE: buf is allocated and must be freed by
140  * calling function
141  *
142  * ENT: ioffset - offset in idx file to lookup
143  * buf - address of pointer to allocate for storage of string
144  */
145 
146 void RawStr4::getIDXBuf(long ioffset, char **buf) const
147 {
148  SW_u32 offset;
149 
150  if ((unsigned long)idxfd > 0) {
151  idxfd->seek(ioffset, SEEK_SET);
152 
153  idxfd->read(&offset, 4);
154  offset = swordtoarch32(offset);
155 
156  getIDXBufDat(offset, buf);
157 
158 /* What the heck is this supposed to do??????
159  for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) {
160  *targetbuf = *trybuf;
161  }
162  *targetbuf = 0;
163  trybuf = 0;
164  if (!caseSensitive) toupperstr_utf8(targetbuf);
165 */
166  }
167 }
168 
169 
170 /******************************************************************************
171  * RawStr4::findoffset - Finds the offset of the key string from the indexes
172  *
173  * ENT: key - key string to lookup
174  * start - address to store the starting offset
175  * size - address to store the size of the entry
176  * away - number of entries before of after to jump
177  * (default = 0)
178  *
179  * RET: error status -1 general error; -2 new file
180  */
181 
182 signed char RawStr4::findOffset(const char *ikey, SW_u32 *start, SW_u32 *size, long away, SW_u32 *idxoff) const
183 {
184  char *trybuf, *maxbuf, *key = 0, quitflag = 0;
185  signed char retval = -1;
186  long headoff, tailoff, tryoff = 0, maxoff = 0;
187  int diff = 0;
188  bool awayFromSubstrCheck = false;
189 
190  if (idxfd->getFd() >=0) {
191  tailoff = maxoff = idxfd->seek(0, SEEK_END) - 8;
192 
193  retval = (tailoff >= 0) ? 0 : -2; // if NOT new file
194  if (*ikey && retval != -2) {
195  headoff = 0;
196 
197  stdstr(&key, ikey, 3);
198  if (!caseSensitive) toupperstr_utf8(key, (unsigned int)(strlen(key)*3));
199 
200  int keylen = (int)strlen(key);
201  bool substr = false;
202 
203  trybuf = maxbuf = 0;
204  getIDXBuf(maxoff, &maxbuf);
205 
206  while (headoff < tailoff) {
207  tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff;
208  lastoff = -1;
209  getIDXBuf(tryoff, &trybuf);
210 
211  if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry)
212  tryoff += (tryoff > (maxoff / 2))?-8:8;
213  retval = -1;
214  break;
215  }
216 
217  diff = strcmp(key, trybuf);
218 
219  if (!diff)
220  break;
221 
222  if (!strncmp(trybuf, key, keylen)) substr = true;
223 
224  if (diff < 0)
225  tailoff = (tryoff == headoff) ? headoff : tryoff;
226  else headoff = tryoff;
227 
228  if (tailoff == headoff + 8) {
229  if (quitflag++)
230  headoff = tailoff;
231  }
232  }
233 
234  // didn't find exact match
235  if (headoff >= tailoff) {
236  tryoff = headoff;
237  if (!substr && ((tryoff != maxoff)||(strncmp(key, maxbuf, keylen)<0))) {
238  awayFromSubstrCheck = true;
239  away--; // if our entry doesn't startwith our key, prefer the previous entry over the next
240  }
241  }
242  if (trybuf)
243  free(trybuf);
244  delete [] key;
245  if (maxbuf)
246  free(maxbuf);
247  }
248  else tryoff = 0;
249 
250  idxfd->seek(tryoff, SEEK_SET);
251 
252  SW_u32 tmpStart, tmpSize;
253  *start = *size = tmpStart = tmpSize = 0;
254  idxfd->read(&tmpStart, 4);
255  idxfd->read(&tmpSize, 4);
256  if (idxoff)
257  *idxoff = (SW_u32)tryoff;
258 
259  *start = swordtoarch32(tmpStart);
260  *size = swordtoarch32(tmpSize);
261 
262  while (away) {
263  unsigned long laststart = *start;
264  unsigned long lastsize = *size;
265  long lasttry = tryoff;
266  tryoff += (away > 0) ? 8 : -8;
267 
268  bool bad = false;
269  if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8)))
270  bad = true;
271  else if (idxfd->seek(tryoff, SEEK_SET) < 0)
272  bad = true;
273  if (bad) {
274  if(!awayFromSubstrCheck)
275  retval = -1;
276  *start = (SW_u32)laststart;
277  *size = (SW_u32)lastsize;
278  tryoff = lasttry;
279  if (idxoff)
280  *idxoff = (SW_u32)tryoff;
281  break;
282  }
283  idxfd->read(&tmpStart, 4);
284  idxfd->read(&tmpSize, 4);
285  if (idxoff)
286  *idxoff = (SW_u32)tryoff;
287 
288  *start = swordtoarch32(tmpStart);
289  *size = swordtoarch32(tmpSize);
290 
291  if (((laststart != *start) || (lastsize != *size)) && (*size))
292  away += (away < 0) ? 1 : -1;
293  }
294 
295  lastoff = tryoff;
296  }
297  else {
298  *start = 0;
299  *size = 0;
300  if (idxoff)
301  *idxoff = 0;
302  retval = -1;
303  }
304  return retval;
305 }
306 
307 
308 /******************************************************************************
309  * RawStr4::readtext - gets text at a given offset
310  *
311  * ENT:
312  * start - starting offset where the text is located in the file
313  * size - size of text entry
314  * buf - buffer to store text
315  *
316  */
317 
318 void RawStr4::readText(SW_u32 istart, SW_u32 *isize, char **idxbuf, SWBuf &buf) const
319 {
320  unsigned int ch;
321  char *idxbuflocal = 0;
322  getIDXBufDat(istart, &idxbuflocal);
323  SW_u32 start = istart;
324 
325  do {
326  if (*idxbuf)
327  delete [] *idxbuf;
328 
329  buf = "";
330  buf.setFillByte(0);
331  buf.setSize(++(*isize));
332 
333  *idxbuf = new char [ (*isize) ];
334 
335  datfd->seek(start, SEEK_SET);
336  datfd->read(buf.getRawData(), (int)((*isize) - 1));
337 
338  for (ch = 0; buf[ch]; ch++) { // skip over index string
339  if (buf[ch] == 10) {
340  ch++;
341  break;
342  }
343  }
344  buf = SWBuf(buf.c_str()+ch);
345  // resolve link
346  if (!strncmp(buf.c_str(), "@LINK", 5)) {
347  for (ch = 0; buf[ch]; ch++) { // null before nl
348  if (buf[ch] == 10) {
349  buf[ch] = 0;
350  break;
351  }
352  }
353  findOffset(buf.c_str() + 6, &start, isize);
354  }
355  else break;
356  }
357  while (true); // while we're resolving links
358 
359  if (idxbuflocal) {
360  unsigned int localsize = (unsigned int)strlen(idxbuflocal);
361  localsize = (localsize < (*isize - 1)) ? localsize : (*isize - 1);
362  strncpy(*idxbuf, idxbuflocal, localsize);
363  (*idxbuf)[localsize] = 0;
364  free(idxbuflocal);
365  }
366 }
367 
368 
369 /******************************************************************************
370  * RawLD::settext - Sets text for current offset
371  *
372  * ENT: key - key for this entry
373  * buf - buffer to store
374  * len - length of buffer (0 - null terminated)
375  */
376 
377 void RawStr4::doSetText(const char *ikey, const char *buf, long len) {
378 
379  SW_u32 start, outstart;
380  SW_u32 idxoff;
381  SW_u32 endoff;
382  SW_s32 shiftSize;
383  SW_u32 size;
384  SW_u32 outsize;
385  char *tmpbuf = 0;
386  char *key = 0;
387  char *dbKey = 0;
388  char *idxBytes = 0;
389  char *outbuf = 0;
390  char *ch = 0;
391 
392  char errorStatus = findOffset(ikey, &start, &size, 0, &idxoff);
393  stdstr(&key, ikey, 3);
394  if (!caseSensitive) toupperstr_utf8(key, (unsigned int)(strlen(key)*3));
395 
396  len = (len < 0) ? strlen(buf) : len;
397  getIDXBufDat(start, &dbKey);
398 
399  if (strcmp(key, dbKey) < 0) {
400  }
401  else if (strcmp(key, dbKey) > 0) {
402  if (errorStatus != (char)-2) // not a new file
403  idxoff += 8;
404  else idxoff = 0;
405  }
406  else if ((!strcmp(key, dbKey)) && (len>0/*we're not deleting*/)) { // got absolute entry
407  do {
408  tmpbuf = new char [ size + 2 ];
409  memset(tmpbuf, 0, size + 2);
410  datfd->seek(start, SEEK_SET);
411  datfd->read(tmpbuf, (int)(size - 1));
412 
413  for (ch = tmpbuf; *ch; ch++) { // skip over index string
414  if (*ch == 10) {
415  ch++;
416  break;
417  }
418  }
419  memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf));
420 
421  // resolve link
422  if (!strncmp(tmpbuf, "@LINK", 5) && (len > 0)) {
423  for (ch = tmpbuf; *ch; ch++) { // null before nl
424  if (*ch == 10) {
425  *ch = 0;
426  break;
427  }
428  }
429  findOffset(tmpbuf + 8, &start, &size, 0, &idxoff);
430  ++size;
431  }
432  else break;
433  }
434  while (true); // while we're resolving links
435  }
436 
437  endoff = (SW_u32)idxfd->seek(0, SEEK_END);
438 
439  shiftSize = endoff - idxoff;
440 
441  if (shiftSize > 0) {
442  idxBytes = new char [ shiftSize ];
443  idxfd->seek(idxoff, SEEK_SET);
444  idxfd->read(idxBytes, shiftSize);
445  }
446 
447  outbuf = new char [ len + strlen(key) + 5 ];
448  sprintf(outbuf, "%s%c%c", key, 13, 10);
449  size = strlen(outbuf);
450  memcpy(outbuf + size, buf, len);
451  size = outsize = size + (SW_u32)len;
452 
453  start = outstart = (SW_u32)datfd->seek(0, SEEK_END);
454 
455  outstart = archtosword32(start);
456  outsize = archtosword32(size);
457 
458  idxfd->seek(idxoff, SEEK_SET);
459  if (len>0) {
460  datfd->seek(start, SEEK_SET);
461  datfd->write(outbuf, (long)size);
462 
463  // add a new line to make data file easier to read in an editor
464  datfd->write(&nl, 1);
465 
466  idxfd->write(&outstart, 4);
467  idxfd->write(&outsize, 4);
468  if (idxBytes) {
469  idxfd->write(idxBytes, shiftSize);
470  delete [] idxBytes;
471  }
472  }
473  else { // delete entry
474  if (idxBytes) {
475  idxfd->write(idxBytes+8, shiftSize-8);
476  idxfd->seek(-1, SEEK_CUR); // last valid byte
477  FileMgr::getSystemFileMgr()->trunc(idxfd); // truncate index
478  delete [] idxBytes;
479  }
480  }
481 
482  delete [] key;
483  delete [] outbuf;
484  free(dbKey);
485 }
486 
487 
488 /******************************************************************************
489  * RawLD::linkentry - links one entry to another
490  *
491  * ENT: testmt - testament to find (0 - Bible/module introduction)
492  * destidxoff - dest offset into .vss
493  * srcidxoff - source offset into .vss
494  */
495 
496 void RawStr4::doLinkEntry(const char *destkey, const char *srckey) {
497  char *text = new char [ strlen(destkey) + 7 ];
498  sprintf(text, "@LINK %s", destkey);
499  doSetText(srckey, text);
500  delete [] text;
501 }
502 
503 
504 /******************************************************************************
505  * RawLD::CreateModule - Creates new module files
506  *
507  * ENT: path - directory to store module files
508  * RET: error status
509  */
510 
511 signed char RawStr4::createModule(const char *ipath)
512 {
513  char *path = 0;
514  char *buf = new char [ strlen (ipath) + 20 ];
515  FileDesc *fd, *fd2;
516 
517  stdstr(&path, ipath);
518 
519  if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
520  path[strlen(path)-1] = 0;
521 
522  sprintf(buf, "%s.dat", path);
523  FileMgr::removeFile(buf);
525  fd->getFd();
527 
528  sprintf(buf, "%s.idx", path);
529  FileMgr::removeFile(buf);
531  fd2->getFd();
533 
534  delete [] path;
535 
536  return 0;
537 }
538 
void setFillByte(char ch)
Definition: swbuf.h:146
#define SWORD_NAMESPACE_START
Definition: defs.h:39
long seek(long offset, int whence)
Definition: filemgr.cpp:143
static const int IDXENTRYSIZE
Definition: rawstr4.h:44
FileDesc * open(const char *path, int mode, bool tryDowngrade)
Definition: filemgr.cpp:175
Definition: swbuf.h:47
#define SEEK_CUR
Definition: zconf.h:245
#define archtosword32(x)
Definition: sysdata.h:97
#define SEEK_END
Definition: zconf.h:246
static unsigned int RDWR
Definition: filemgr.h:76
static const char nl
Definition: rawstr4.h:52
signed int SW_s32
Definition: sysdata.h:40
signed char findOffset(const char *key, SW_u32 *start, SW_u32 *size, long away=0, SW_u32 *idxoff=0) const
Definition: rawstr4.cpp:182
bool caseSensitive
Definition: rawstr4.h:40
RawStr4(const char *ipath, int fileMode=-1, bool caseSensitive=false)
Definition: rawstr4.cpp:59
int getFd()
Definition: filemgr.h:231
char * path
Definition: rawstr4.h:39
long write(const void *buf, long count)
Definition: filemgr.cpp:153
signed char trunc(FileDesc *file)
Definition: filemgr.cpp:256
long lastoff
Definition: rawstr4.h:41
char * toupperstr_utf8(char *t, unsigned int max=0)
Definition: stringmgr.h:118
SWORD_NAMESPACE_START char * stdstr(char **ipstr, const char *istr, unsigned int memPadFactor=1)
Definition: utilstr.h:44
void close(FileDesc *file)
Definition: filemgr.cpp:196
char * malloc()
char * getRawData()
Definition: swbuf.h:379
free(preg->fastmap)
const char * c_str() const
Definition: swbuf.h:158
static int removeFile(const char *fName)
Definition: filemgr.cpp:517
char * realloc()
FileDesc * datfd
Definition: rawstr4.h:47
#define swordtoarch32(x)
Definition: sysdata.h:94
virtual ~RawStr4()
Definition: rawstr4.cpp:91
FileDesc * idxfd
Definition: rawstr4.h:46
void readText(SW_u32 start, SW_u32 *size, char **idxbuf, SWBuf &buf) const
Definition: rawstr4.cpp:318
#define SEEK_SET
Definition: zconf.h:244
void doSetText(const char *key, const char *buf, long len=-1)
Definition: rawstr4.cpp:377
int size
Definition: regex.c:5043
void getIDXBufDat(long ioffset, char **buf) const
Definition: rawstr4.cpp:112
void doLinkEntry(const char *destkey, const char *srckey)
Definition: rawstr4.cpp:496
static unsigned int CREAT
Definition: filemgr.h:72
static signed char createModule(const char *path)
Definition: rawstr4.cpp:511
unsigned int SW_u32
Definition: sysdata.h:41
static unsigned int IWRITE
Definition: filemgr.h:79
static unsigned int WRONLY
Definition: filemgr.h:77
long read(void *buf, long count)
Definition: filemgr.cpp:148
static unsigned int IREAD
Definition: filemgr.h:78
#define SWORD_NAMESPACE_END
Definition: defs.h:40
static int instance
Definition: rawstr4.h:38
#define SWLOGD(...)
Definition: defs.h:187
SWBuf & setFormatted(const char *format,...)
Definition: swbuf.cpp:50
void setSize(unsigned long len)
Definition: swbuf.h:255
void getIDXBuf(long ioffset, char **buf) const
Definition: rawstr4.cpp:146
static FileMgr * getSystemFileMgr()
Definition: filemgr.cpp:101