[sword-cvs] sword/utilities vpl2mod.cpp,1.20,1.21 vs2osisref.cpp,1.3,1.4

sword@www.crosswire.org sword@www.crosswire.org
Mon, 26 May 2003 01:36:33 -0700


Update of /usr/local/cvsroot/sword/utilities
In directory www:/tmp/cvs-serv17071/utilities

Modified Files:
	vpl2mod.cpp vs2osisref.cpp 
Log Message:
	Improved verse reference parsing to include proper
		parsing of osisRef attributes and other
		misc. anomalies.
	Returned canon.h back to previous state to
		avoid locale key mismatches



Index: vpl2mod.cpp
===================================================================
RCS file: /usr/local/cvsroot/sword/utilities/vpl2mod.cpp,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** vpl2mod.cpp	26 May 2003 04:32:45 -0000	1.20
--- vpl2mod.cpp	26 May 2003 08:36:31 -0000	1.21
***************
*** 1,7 ****
! // Compression on variable granularity
  #include <fcntl.h>
! #include <iostream>
! #include <fstream>
! #include <string>
  
  #ifndef __GNUC__
--- 1,7 ----
! #include <ctype.h>
! #include <stdio.h>
  #include <fcntl.h>
! #include <errno.h>
! #include <stdlib.h>
  
  #ifndef __GNUC__
***************
*** 11,107 ****
  #endif
  
- #include <ztext.h>
- #include <zld.h>
- #include <zcom.h>
  #include <swmgr.h>
! #include <lzsscomprs.h>
! #include <zipcomprs.h>
! #include <versekey.h>
! #include <thmlosis.h>
! #include <stdio.h>
! #include <markupfiltmgr.h>
! #include <algorithm>
  
  #ifndef NO_SWORD_NAMESPACE
! using namespace sword;
  #endif
- using std::endl;
- using std::cerr;
- using std::cout;
  
  
! const char *convertToOSIS(const char *inRef, const SWKey *key) {
! 	static std::string outRef;
  
- 	outRef = "";
  
! 	VerseKey defLanguage;
! 	ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true);
! 	const char *startFrag = inRef;
! 	for (int i = 0; i < verses.Count(); i++) {
! 		VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i));
! 		char buf[5120];
! 		char frag[800];
! 		char preJunk[800];
! 		char postJunk[800];
! 		*preJunk = 0;
! 		*postJunk = 0;
! 		while ((*startFrag) && (strchr(" ;,()[].", *startFrag))) {
! 			outRef += *startFrag;
! 			startFrag++;
! 		}
! 		if (element) {
! 			memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1);
! 			frag[((const char *)element->userData - startFrag) + 1] = 0;
! 			int j;
! 			for (j = strlen(frag)-1; j && (strchr(" ;,()[].", frag[j])); j--);
! 			if (frag[j+1])
! 				strcpy(postJunk, frag+j);
! 			frag[j+1]=0;
! 			startFrag += (j+1);
! 			sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>***%s", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag, postJunk);
  		}
! 		else {
! 			memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1);
! 			frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0;
! 			int j;
! 			for (j = strlen(frag)-1; j && (strchr(" ;,()[].", frag[j])); j--);
! 			if (frag[j+1])
! 				strcpy(postJunk, frag+j+1);
! 			frag[j+1]=0;
! 			startFrag += ((const char *)verses.GetElement(i)->userData - startFrag) + 1;
! 			sprintf(buf, "<reference osisRef=\"%s\">%s</reference>%s", VerseKey(*verses.GetElement(i)).getOSISRef(), frag, postJunk);
  		}
! 		outRef+=buf;
  	}
! 	outRef+=startFrag;
! 	return outRef.c_str();
  }
  
- int main(int argc, char **argv)
- {
-         if (argc < 2) {
-         	cerr << argv[0] << " - a tool to convert verse references from English to OSIS\n";
- 	        cerr << "usage: "<< argv[0] << " <verse ref> [verse context]\n";
-         	cerr << "\n\n";
- 	        exit(-1);
-         }
-         VerseKey verseKey;
-         int i = strlen(argv[1]) + 1;
-         char * verseString = new char[i];
-         *verseString = 0;
-         strcpy (verseString, argv[1]);
-         verseString[i + 1] = 0;
-         
-         if (argc > 2) {
-                 verseKey = argv[2];
-         }
-         else {
-                 verseKey = "Gen 1:1";
-         }
  
!         std::cout << convertToOSIS(verseString, &verseKey) << "\n";
  
! 	return 0;
  }
  
--- 11,263 ----
  #endif
  
  #include <swmgr.h>
! #include <rawtext.h>
! #include <iostream>
! #include <string>
! 
! #ifndef O_BINARY
! #define O_BINARY 0
! #endif
  
  #ifndef NO_SWORD_NAMESPACE
! using sword::SWMgr;
! using sword::RawText;
! using sword::VerseKey;
! using sword::SW_POSITION;
  #endif
  
+ using std::string;
  
! char readline(int fd, char **buf) {
! 	char ch;
! 	if (*buf)
! 		delete [] *buf;
! 	*buf = 0;
! 	int len;
  
  
! 	long index = lseek(fd, 0, SEEK_CUR);
! 	// clean up any preceding white space
! 	while ((len = read(fd, &ch, 1)) == 1) {
! 		if ((ch != 13) && (ch != ' ') && (ch != '\t'))
! 			break;
! 		else index++;
! 	}
! 
! 
! 	while (ch != 10) {
!         if ((len = read(fd, &ch, 1)) != 1)
! 			break;
! 	}
! 	
! 	int size = (lseek(fd, 0, SEEK_CUR) - index) - 1;
! 
! 	*buf = new char [ size + 1 ];
! 
! 	if (size > 0) {
! 		lseek(fd, index, SEEK_SET);
! 		read(fd, *buf, size);
! 		read(fd, &ch, 1);   //pop terminating char
! 		(*buf)[size] = 0;
! 
! 		// clean up any trailing junk on buf
! 		for (char *it = *buf+(strlen(*buf)-1); it > *buf; it--) {
! 			if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t'))
! 				break;
! 			else *it = 0;
  		}
! 	}
! 	else **buf = 0;
! 	return !len;
! }
! 
! 
! char *parseVReg(char *buf) {
! 	char stage = 0;
! 
! 	while (*buf) {
! 		switch (stage) {
! 		case 0:
! 			if (isalpha(*buf))
! 				stage++;
! 			break;
! 		case 1:
! 			if (isdigit(*buf))
! 				stage++;
! 			break;
! 		case 2:
! 			if (*buf == ':')
! 				stage++;
! 			break;
! 		case 3:
! 			if (isdigit(*buf))
! 				stage++;
! 			break;
! 	   case 4:
! 			if (*buf == ' ') {
! 				*buf = 0;
! 				return ++buf;
! 			}
! 			break;
  		}
! 		buf++;
  	}
! 	return (stage == 4) ? buf : 0;  // if we got to stage 4 return after key buf, else return 0;
  }
  
  
! bool isKJVRef(const char *buf) {
! 	VerseKey vk, test;
! 	vk.AutoNormalize(0);
! 	vk.Headings(1);	// turn on mod/testmnt/book/chap headings
! 	vk.Persist(1);
! 	// lets do some tests on the verse --------------
! 	vk = buf;
! 	test = buf;
  
! 	if (vk.Testament() && vk.Book() && vk.Chapter() && vk.Verse()) { // if we're not a heading
! //		std::cerr << (const char*)vk << " == "  << (const char*)test << std::endl;
! 		return (vk == test);
! 	}
! 	else return true;	// no check if we're a heading... Probably bad.
! }
! 
! 
! void fixText(char *text) {
! 	char *to = text;
! 	while(*text) {
! 		*to++ = *text++;
! 		*to++ = *text++;
! 		if (!*text)
! 			break;
! 		if (*text != ' ')
! 			std::cerr << "problem\n";
! 		else	text++;
! 	}
! 	*to = 0;
  }
  
+ int main(int argc, char **argv) {
+ 
+ 	// Let's test our command line arguments
+ 	if (argc < 2) {
+ //		fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]);
+ 		fprintf(stderr, "usage: %s <source_vpl_file> </path/to/output/mod/> [0|1 - prepended verse refs] [0|1 - NT only]\n\n", argv[0]);
+ 		fprintf(stderr, "\tWith no verse refs, source file must contain exactly 31102 lines.\n");
+ 		fprintf(stderr, "\tThis is KJV verse count plus headings for MODULE,\n");
+ 		fprintf(stderr, "\tTESTAMENT, BOOK, CHAPTER. An example snippet follows:\n\n");
+ 		fprintf(stderr, "\t\tMODULE HEADER\n");
+ 		fprintf(stderr, "\t\tOLD TESTAMENT HEADER\n");
+ 		fprintf(stderr, "\t\tGENESIS HEADER\n");
+ 		fprintf(stderr, "\t\tCHAPTER 1 HEADER\n");
+ 		fprintf(stderr, "\t\tIn the beginning...\n\n");
+ 		fprintf(stderr, "\t... implying there must also be a CHAPTER2 HEADER,\n");
+         fprintf(stderr, "\tEXODUS HEADER, NEW TESTAMENT HEADER, etc.  If there is no text for\n");
+ 		fprintf(stderr, "\tthe header, a blank line must, at least, hold place.\n\n");
+ 		fprintf(stderr, "\tWith verse refs, source file must simply contain any number of lines,\n");
+ 		fprintf(stderr, "\tthat begin with the verse reference for which it is an entry.  e.g.:\n\n");
+ 		fprintf(stderr, "\t\tgen 1:0 CHAPTER 1 HEADER\n");
+ 		fprintf(stderr, "\t\tgen 1:1 In the beginning...\n\n");
+ 		exit(-1);
+ 	}
+ 
+ 	// Let's see if we can open our input file
+ 	int fd = open(argv[1], O_RDONLY|O_BINARY);
+ 	if (fd < 0) {
+ 		fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]);
+ 		exit(-2);
+ 	}
+ 
+ 	// Try to initialize a default set of datafiles and indicies at our
+ 	// datapath location passed to us from the user.
+ 	if (RawText::createModule(argv[2])) {
+ 		fprintf(stderr, "error: %s: couldn't create module at path: %s \n", argv[0], argv[2]);
+ 		exit(-3);
+ 	}
+ 
+ 	// not used yet, but for future support of a vpl file with each line
+ 	// prepended with verse reference, eg. "Gen 1:1 In the beginning..."
+ 	bool vref = false;
+ 	if (argc > 3)
+ 		vref = (argv[3][0] == '0') ? false : true;
+ 
+ 	// if 'nt' is the 4th arg, our vpl file only has the NT
+ 	bool ntonly = false;
+ 	if (argc > 4)
+                 ntonly = (argv[4][0] == '0') ? false : true;
+ 	
+ 	// Do some initialization stuff
+ 	char *buffer = 0;
+ 	RawText mod(argv[2]);	// open our datapath with our RawText driver.
+ 	VerseKey vk;
+ 	vk.AutoNormalize(0);
+ 	vk.Headings(1);	// turn on mod/testmnt/book/chap headings
+ 	vk.Persist(1);
+ 
+ 	mod.setKey(vk);
+ 
+ 	// Loop through module from TOP to BOTTOM and set next line from
+ 	// input file as text for this entry in the module
+ 	mod = TOP;
+ 	if (ntonly) vk = "Matthew 1:1";
+ 	  
+ 	int successive = 0;  //part of hack below
+ 	while ((!mod.Error()) && (!readline(fd, &buffer))) {
+ 		if (*buffer == '|')	// comments, ignore line
+ 			continue;
+ 		if (vref) {
+ 			const char *verseText = parseVReg(buffer);
+ 			if (!verseText) {	// if we didn't find a valid verse ref
+ 				std::cerr << "No valid verse ref found on line: " << buffer << "\n";
+ 				exit(-4);
+ 			}
+ 
+ 			vk = buffer;
+ 			if (vk.Error()) {
+ 				std::cerr << "Error parsing key: " << buffer << "\n";
+ 				exit(-5);
+ 			}
+ 			string orig = mod.getRawEntry();
+ 
+ 			if (!isKJVRef(buffer)) {
+ 				VerseKey origVK = vk;
+ 				/* This block is functioning improperly -- problem with AutoNormalize???
+ 				do {
+ 					vk--;
+ 				}
+ 				while (!vk.Error() && !isKJVRef(vk)); */
+ 				//hack to replace above:
+ 				successive++;
+ 				vk -= successive;
+ 				orig = mod.getRawEntry();
+ 
+ 				std::cerr << "Not a valid KJV ref: " << origVK << "\n";
+ 				std::cerr << "appending to ref: " << vk << "\n";
+ 				orig += " [ (";
+ 				orig += origVK;
+ 				orig += ") ";
+ 				orig += verseText;
+ 				orig += " ] ";
+ 				verseText = orig.c_str();
+ 			}
+ 			else {
+ 			  successive = 0;
+ 			}
+ 
+ 			if (orig.length() > 1)
+ 				   std::cerr << "Warning, overwriting verse: " << vk << std::endl;
+ 			  
+ 			// ------------- End verse tests -----------------
+ 			mod << verseText;	// save text to module at current position
+ 		}
+ 		else {
+ 			fixText(buffer);
+ 			mod << buffer;	// save text to module at current position
+ 			mod++;	// increment module position
+ 		}
+ 	}
+ 
+ 	// clear up our buffer that readline might have allocated
+ 	if (buffer)
+ 		delete [] buffer;
+ }

Index: vs2osisref.cpp
===================================================================
RCS file: /usr/local/cvsroot/sword/utilities/vs2osisref.cpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** vs2osisref.cpp	9 May 2003 18:28:00 -0000	1.3
--- vs2osisref.cpp	26 May 2003 08:36:31 -0000	1.4
***************
*** 43,60 ****
  		char buf[5120];
  		char frag[800];
  		if (element) {
  			memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1);
  			frag[((const char *)element->userData - startFrag) + 1] = 0;
! 			startFrag = (const char *)element->userData + 1;
! 			sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag);
  		}
  		else {
  			memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1);
  			frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0;
! 			startFrag = (const char *)verses.GetElement(i)->userData + 1;
! 			sprintf(buf, "<reference osisRef=\"%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag);
  		}
  		outRef+=buf;
  	}
  	return outRef.c_str();
  }
--- 43,79 ----
  		char buf[5120];
  		char frag[800];
+ 		char preJunk[800];
+ 		char postJunk[800];
+ 		*preJunk = 0;
+ 		*postJunk = 0;
+ 		while ((*startFrag) && (strchr(" ;,()[].", *startFrag))) {
+ 			outRef += *startFrag;
+ 			startFrag++;
+ 		}
  		if (element) {
  			memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1);
  			frag[((const char *)element->userData - startFrag) + 1] = 0;
! 			int j;
! 			for (j = strlen(frag)-1; j && (strchr(" ;,()[].", frag[j])); j--);
! 			if (frag[j+1])
! 				strcpy(postJunk, frag+j+1);
! 			frag[j+1]=0;
! 			startFrag += ((const char *)element->userData - startFrag) + 1;
! 			sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>%s", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag, postJunk);
  		}
  		else {
  			memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1);
  			frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0;
! 			int j;
! 			for (j = strlen(frag)-1; j && (strchr(" ;,()[].", frag[j])); j--);
! 			if (frag[j+1])
! 				strcpy(postJunk, frag+j+1);
! 			frag[j+1]=0;
! 			startFrag += ((const char *)verses.GetElement(i)->userData - startFrag) + 1;
! 			sprintf(buf, "<reference osisRef=\"%s\">%s</reference>%s", VerseKey(*verses.GetElement(i)).getOSISRef(), frag, postJunk);
  		}
  		outRef+=buf;
  	}
+ 	outRef+=startFrag;
  	return outRef.c_str();
  }
***************
*** 82,86 ****
          }
  
!         std::cout << convertToOSIS(verseString, &verseKey);
  
  	return 0;
--- 101,105 ----
          }
  
!         std::cout << convertToOSIS(verseString, &verseKey) << "\n";
  
  	return 0;