[sword-cvs] sword/src/modules/common zverse2.cpp,NONE,1.1

sword@www.crosswire.org sword@www.crosswire.org
Mon, 12 Apr 2004 06:53:06 -0700


Update of /cvs/core/sword/src/modules/common
In directory www:/tmp/cvs-serv31934/common

Added Files:
	zverse2.cpp 
Log Message:

dglassey: add new zverse and ztext that use versekey2 for indexes and have initial support for separated markup


--- NEW FILE: zverse2.cpp ---
/******************************************************************************
 *  zverse2.h   - code for class 'zVerse2'- a module that reads raw text
 *				files:  ot and nt using indexs ??.bks ??.cps ??.vss
 *				and provides lookup and parsing functions based on
 *				class VerseKey2 for compressed modules
 */


#include <ctype.h>
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>

#ifndef __GNUC__
#include <io.h>
#else
#include <unistd.h>
#endif

#include <utilfuns.h>
#include <versekey2.h>
#include <zverse2.h>
#include <sysdata.h>
#include <swbuf.h>


#ifndef O_BINARY
#define O_BINARY 0
#endif

SWORD_NAMESPACE_START

/******************************************************************************
 * zVerse2 Statics
 */

int zVerse2::instance = 0;

const char zVerse2::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b', 'i'};

/******************************************************************************
 * zVerse2 Constructor - Initializes data for instance of zVerse2
 *
 * ENT:	ipath - path of the directory where data and index files are located.
 *		be sure to include the trailing separator (e.g. '/' or '\')
 *		(e.g. 'modules/texts/rawtext/webster/')
 *		fileMode - open mode for the files (O_RDONLY, etc.)
 *		blockType - verse, chapter, book, etc.
 */

zVerse2::zVerse2(const char *ipath, int fileMode, int blockType, 
	SWCompress *icomp, int indexesPerBlock)
{
	SWBuf buf;

	nl = '\n';
	path = 0;
	cacheBufIdx = -1;
	cacheBuf = 0;
	m_idxPerBlock = indexesPerBlock;
	dirtyCache = false;
	stdstr(&path, ipath);

	if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
		path[strlen(path)-1] = 0;

	compressor = (icomp) ? icomp : new SWCompress();

	if (fileMode == -1) { // try read/write if possible
		fileMode = O_RDWR;
	}
		
	buf.setFormatted("%s/ot.%czs", path, uniqueIndexID[blockType]);
	idxfp = FileMgr::getSystemFileMgr()->open(buf, fileMode|O_BINARY, true);

	buf.setFormatted("%s/ot.%czz", path, uniqueIndexID[blockType]);
	textfp = FileMgr::getSystemFileMgr()->open(buf, fileMode|O_BINARY, true);

	buf.setFormatted("%s/ot.%czv", path, uniqueIndexID[blockType]);
	compfp = FileMgr::getSystemFileMgr()->open(buf, fileMode|O_BINARY, true);

	buf.setFormatted("%s/ot.%czm", path, uniqueIndexID[blockType]);
	markupfp = FileMgr::getSystemFileMgr()->open(buf, fileMode|O_BINARY, true);

	buf.setFormatted("%s/ot.%czr", path, uniqueIndexID[blockType]);
	midxfp = FileMgr::getSystemFileMgr()->open(buf, fileMode|O_BINARY, true);

	
	instance++;
}


/******************************************************************************
 * zVerse2 Destructor - Cleans up instance of zVerse2
 */

zVerse2::~zVerse2()
{
	int loop1;

	if (cacheBuf) {
		flushCache();
		free(cacheBuf);
	}

	if (path)
		delete [] path;

	if (compressor)
		delete compressor;

	--instance;

	FileMgr::getSystemFileMgr()->close(idxfp);
	FileMgr::getSystemFileMgr()->close(textfp);
	FileMgr::getSystemFileMgr()->close(compfp);
	FileMgr::getSystemFileMgr()->close(markupfp);
	FileMgr::getSystemFileMgr()->close(midxfp);
}


/******************************************************************************
 * zVerse2::findoffset	- Finds the offset of the key verse from the indexes
 *
 *
 *
 * ENT: testmt	- testament to find (0 - Bible/module introduction)
 *	book	- book      to find (0 - testament    introduction)
 *	chapter	- chapter   to find (0 - book         introduction)
 *	verse	- verse     to find (0 - chapter      introduction)
 *	start	- address to store the starting offset
 *	size	- address to store the size of the entry
 */

void zVerse2::findOffsetText(long idxoff, long *start, unsigned short *size)
{
	// set start to offset in
	// set size to
	// set
	unsigned long ulBuffNum=0;	          // buffer number
	unsigned long ulVerseStart=0;	       // verse offset within buffer
	unsigned short usVerseSize=0;	       // verse size
	unsigned long ulCompOffset=0;	       // compressed buffer start
	unsigned long ulCompSize=0;	             // buffer size compressed
	unsigned long ulUnCompSize=0;	          // buffer size uncompressed
	char *pcCompText=NULL;					 // compressed text

	*start = *size = 0;
	//printf ("Finding offset %ld\n", idxoff);
	idxoff *= 10;
	
	// assert we have and valid file descriptor
	if (compfp->getFd() < 1)
		return;
		
	long newOffset = lseek(compfp->getFd(), idxoff, SEEK_SET);
	if (newOffset == idxoff) {
		if (read(compfp->getFd(), &ulBuffNum, 4) != 4) {
			printf ("Error reading ulBuffNum\n");
			return;
		}
	}
	else return;

	ulBuffNum = swordtoarch32(ulBuffNum);

	if (read(compfp->getFd(), &ulVerseStart, 4) != 4)
	{
		printf ("Error reading ulVerseStart\n");
		return;
	}
	if (read(compfp->getFd(), &usVerseSize, 2) != 2)
	{
		printf ("Error reading usVerseSize\n");
		return;
	}

	*start = swordtoarch32(ulVerseStart);
	*size = swordtoarch16(usVerseSize);

	if (*size) {
		if (((long) ulBuffNum == cacheBufIdx) && (cacheBuf)) {
			// have the text buffered
			return;
		}

		//printf ("Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize);


		if (lseek(idxfp->getFd(), ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12)
		{
			printf ("Error seeking compressed file index\n");
			return;
		}
		if (read(idxfp->getFd(), &ulCompOffset, 4)<4)
		{
			printf ("Error reading ulCompOffset\n");
			return;
		}
		if (read(idxfp->getFd(), &ulCompSize, 4)<4)
		{
			printf ("Error reading ulCompSize\n");
			return;
		}
		if (read(idxfp->getFd(), &ulUnCompSize, 4)<4)
		{
			printf ("Error reading ulUnCompSize\n");
			return;
		}

		ulCompOffset  = swordtoarch32(ulCompOffset);
		ulCompSize  = swordtoarch32(ulCompSize);
		ulUnCompSize  = swordtoarch32(ulUnCompSize);

		if (lseek(textfp->getFd(), ulCompOffset, SEEK_SET)!=(long)ulCompOffset)
		{
			printf ("Error: could not seek to right place in compressed text\n");
			return;
		}
		SWBuf pcCompText;
		pcCompText.setSize(ulCompSize+5);

		if (read(textfp->getFd(), pcCompText.getRawData(), ulCompSize)<(long)ulCompSize) {
			printf ("Error reading compressed text\n");
			return;
		}
		pcCompText.setSize(ulCompSize);
		#if 0 // it's a nullop - dunno what it was supposed to be?
		rawZFilter(pcCompText, 0); // 0 = decipher
		#endif
		
		compressor->zBuf(&ulCompSize, pcCompText.getRawData());

		if (cacheBuf) {
			flushCache();
			free(cacheBuf);
		}
		
		unsigned long len = 0;
		compressor->Buf(0, &len);
		cacheBuf = (char *)calloc(len + 1, 1);
		memcpy(cacheBuf, compressor->Buf(), len);

		cacheBufIdx = ulBuffNum;
	}
}


void zVerse2::findOffsetMarkup(long idxoff, long *start, unsigned short *size)
{
	if (midxfp->getFd() < 1)
		return;
	idxoff *= 6;
		
	if (midxfp->getFd() >= 0) {
		lseek(midxfp->getFd(), idxoff, SEEK_SET);
		read(midxfp->getFd(), start, 4);							// read start
		unsigned short len = read(idxfp->getFd(), size, 2); 		// read size

		*start = swordtoarch32(*start);
		*size  = swordtoarch16(*size);

		if (len < 2) {
			*size = (unsigned short)((*start) ? (lseek(markupfp->getFd(), 0, SEEK_END) - (long)*start) : 0);	
				// if for some reason we get an error reading size, make size to end of file
		}
	}
	else {
		*start = 0;
		*size = 0;
	}
}

/******************************************************************************
 * zVerse2::zreadtext	- gets text at a given offset
 *
 * ENT:	testmt	- testament file to search in (0 - Old; 1 - New)
 *	start	- starting offset where the text is located in the file
 *	size	- size of text entry + 1 (null)
 *	buf	- buffer to store text
 *
 */

void zVerse2::zReadText(long start, unsigned short size, SWBuf &inBuf) {
	inBuf = "";
	inBuf.setFillByte(0);
	inBuf.setSize(size+1);
	if (size > 0) {
		if (cacheBuf)
			strncpy(inBuf.getRawData(), &(cacheBuf[start]), size);
	}
	inBuf.setSize(strlen(inBuf.c_str()));
}


/******************************************************************************
 * zVerse2::settext	- Sets text for current offset
 *
 * ENT: testmt	- testament to find (0 - Bible/module introduction)
 *	idxoff	- offset into .vss
 *	buf	- buffer to store
 *      len     - length of buffer (0 - null terminated)
 */

void zVerse2::doSetText(long idxoff, const char *buf, long len) {

	len = (len < 0) ? strlen(buf) : len;
	if ((!dirtyCache) || (cacheBufIdx < 0)) {
		cacheBufIdx = lseek(idxfp->getFd(), 0, SEEK_END) / 12;
		if (cacheBuf)
			free(cacheBuf);
		cacheBuf = (char *)calloc(len + 1, 1);
	}
	else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len + 1)):calloc((len + 1), 1));

	dirtyCache = true;

	unsigned long start, outstart;
	unsigned long outBufIdx = cacheBufIdx;
	unsigned short size;
	unsigned short outsize;

	idxoff *= 10;
	size = outsize = len;

	start = strlen(cacheBuf);

	if (!size)
		start = outBufIdx = 0;

	outBufIdx = archtosword32(outBufIdx);
	outstart  = archtosword32(start);
	outsize   = archtosword16(size);

	lseek(compfp->getFd(), idxoff, SEEK_SET);
	write(compfp->getFd(), &outBufIdx, 4);
	write(compfp->getFd(), &outstart, 4);
	write(compfp->getFd(), &outsize, 2);
	strcat(cacheBuf, buf);
}


void zVerse2::flushCache() {
	if (dirtyCache) {
		unsigned long idxoff;
		unsigned long start, outstart;
		unsigned long size, outsize;
		unsigned long zsize, outzsize;

		idxoff = cacheBufIdx * 12;
		if (cacheBuf) {
			size = outsize = zsize = outzsize = strlen(cacheBuf);
			if (size) {
	//			if (compressor) {
	//				delete compressor;
	//				compressor = new LZSSCompress();
	//			}
				compressor->Buf(cacheBuf);
				compressor->zBuf(&zsize);
				outzsize = zsize;

				SWBuf buf;
				buf.setSize(zsize + 5);
				memcpy(buf.getRawData(), compressor->zBuf(&zsize), zsize);
				buf.setSize(zsize);
				#if 0 // it's a nullop - dunno what it was supposed to be?
				rawZFilter(buf, 1); // 1 = encipher
				#endif

				start = outstart = lseek(textfp->getFd(), 0, SEEK_END);

				outstart  = archtosword32(start);
				outsize   = archtosword32(size);
				outzsize  = archtosword32(zsize);

				write(textfp->getFd(), buf, zsize);

				lseek(idxfp->getFd(), idxoff, SEEK_SET);
				write(idxfp->getFd(), &outstart, 4);
				write(idxfp->getFd(), &outzsize, 4);
				write(idxfp->getFd(), &outsize, 4);
			}
			free(cacheBuf);
			cacheBuf = 0;
		}
		dirtyCache = false;
	}
}

/******************************************************************************
 * RawVerse::linkentry	- links one entry to another
 *
 * ENT: testmt	- testament to find (0 - Bible/module introduction)
 *	destidxoff	- dest offset into .vss
 *	srcidxoff		- source offset into .vss
 */

void zVerse2::doLinkEntry(long destidxoff, long srcidxoff) {
	long bufidx;
	long start;
	unsigned short size;

	destidxoff *= 10;
	srcidxoff  *= 10;

	// get source
	lseek(compfp->getFd(), srcidxoff, SEEK_SET);
	read(compfp->getFd(), &bufidx, 4);
	read(compfp->getFd(), &start, 4);
	read(compfp->getFd(), &size, 2);

	// write dest
	lseek(compfp->getFd(), destidxoff, SEEK_SET);
	write(compfp->getFd(), &bufidx, 4);
	write(compfp->getFd(), &start, 4);
	write(compfp->getFd(), &size, 2);
	
	// !!WDG do we want to link the markup as well?
}


/******************************************************************************
 * RawVerse::CreateModule	- Creates new module files
 *
 * ENT: path	- directory to store module files
 * RET: error status
 */

char zVerse2::createModule(const char *ipath, int blockBound, int indxPerBlock)
{
	char *path = 0;
	char *buf = new char [ strlen (ipath) + 20 ];
	FileDesc *fd, *fd2;

	stdstr(&path, ipath);

	if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
		path[strlen(path)-1] = 0;

	sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]);
	FileMgr::removeFile(buf);
	fd = FileMgr::getSystemFileMgr()->open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
	fd->getFd();
	FileMgr::getSystemFileMgr()->close(fd);

	sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]);
	FileMgr::removeFile(buf);
	fd = FileMgr::getSystemFileMgr()->open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
	fd->getFd();
	FileMgr::getSystemFileMgr()->close(fd);

	sprintf(buf, "%s/ot.%czm", path, uniqueIndexID[blockBound]);
	FileMgr::removeFile(buf);
	fd = FileMgr::getSystemFileMgr()->open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
	fd->getFd();
	FileMgr::getSystemFileMgr()->close(fd);

	sprintf(buf, "%s/ot.%czr", path, uniqueIndexID[blockBound]);
	FileMgr::removeFile(buf);
	fd = FileMgr::getSystemFileMgr()->open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
	fd->getFd();
	FileMgr::getSystemFileMgr()->close(fd);

	
	sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]);
	FileMgr::removeFile(buf);
	fd2 = FileMgr::getSystemFileMgr()->open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE);
	fd2->getFd();

	VerseKey2 vk;
	vk.Headings(1);
	long offset = 0;
	short size = 0;
	for (vk = TOP; !vk.Error(); vk++) {
		write(fd->getFd(), &offset, 4);	//compBufIdxOffset
		write(fd->getFd(), &offset, 4);
		write(fd->getFd(), &size, 2);
		write(fd2->getFd(), &offset, 4);	//compBufIdxOffset
		write(fd2->getFd(), &offset, 4);
		write(fd2->getFd(), &size, 2);
	}

	FileMgr::getSystemFileMgr()->close(fd);
	FileMgr::getSystemFileMgr()->close(fd2);

	delete [] path;
	delete [] buf;
/*
	RawVerse rv(path);
	VerseKey2 mykey("Rev 22:21");
*/
	
	return 0;
}


/******************************************************************************
 * zVerse2::preptext	- Prepares the text before returning it to external
 *				objects
 *
 * ENT:	buf	- buffer where text is stored and where to store the prep'd
 *			text.
 */
#if 0 // should we really need this?
void zVerse2::prepText(SWBuf &buf) {
	unsigned int to, from; 
	char space = 0, cr = 0, realdata = 0, nlcnt = 0;
	char *rawBuf = buf.getRawData();
	for (to = from = 0; rawBuf[from]; from++) {
		switch (rawBuf[from]) {
		case 10:
			if (!realdata)
				continue;
			space = (cr) ? 0 : 1;
			cr = 0;
			nlcnt++;
			if (nlcnt > 1) {
//				*to++ = nl;
				rawBuf[to++] = 10;
//				*to++ = nl[1];
//				nlcnt = 0;
			}
			continue;
		case 13:
			if (!realdata)
				continue;
//			*to++ = nl[0];
			rawBuf[to++] = 10;
			space = 0;
			cr = 1;
			continue;
		}
		realdata = 1;
		nlcnt = 0;
		if (space) {
			space = 0;
			if (rawBuf[from] != ' ') {
				rawBuf[to++] = ' ';
				from--;
				continue;
			}
		}
		rawBuf[to++] = rawBuf[from];
	}
	buf.setSize(to);

	while (to > 1) {			// remove trailing excess
		to--;
		if ((rawBuf[to] == 10) || (rawBuf[to] == ' '))
			buf.setSize(to);
		else break;
	}
}
#endif

SWORD_NAMESPACE_END