[sword-svn] r1831 - trunk/utilities

dglassey at crosswire.org dglassey at crosswire.org
Fri Jun 10 09:43:24 MST 2005


Author: dglassey
Date: 2005-06-10 09:43:23 -0700 (Fri, 10 Jun 2005)
New Revision: 1831

Modified:
   trunk/utilities/xml2gbs.cpp
Log:
And finally the conversion of xml2gbs to not using fixed length buffers



Modified: trunk/utilities/xml2gbs.cpp
===================================================================
--- trunk/utilities/xml2gbs.cpp	2005-06-10 16:39:09 UTC (rev 1830)
+++ trunk/utilities/xml2gbs.cpp	2005-06-10 16:43:23 UTC (rev 1831)
@@ -4,16 +4,10 @@
 #include <errno.h>
 #include <stdlib.h>
 
-#ifndef __GNUC__
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
 #include <entriesblk.h>
-//#include <iostream>
+#include <iostream>
 #include <string>
-#include <stdio.h>
+#include <fstream>
 #include <treekeyidx.h>
 #include <rawgenbook.h>
 
@@ -24,29 +18,11 @@
 using sword::SWKey;
 #endif
 
-#define DEBUG
+//#define DEBUG
 
-/*
-void printTree(TreeKeyIdx treeKey, TreeKeyIdx *target = 0, int level = 1) {
-	if (!target)
-		target = &treeKey;
-
-	unsigned long currentOffset = target->getOffset();
-       	std::cout << ((currentOffset == treeKey.getOffset()) ? "==>" : "");
-	for (int i = 0; i < level; i++) std::cout << "\t";
-	std::cout << treeKey.getLocalName() << "/\n";
-	if (treeKey.firstChild()) {
-		printTree(treeKey, target, level+1);
-		treeKey.parent();
-	}
-	if (treeKey.nextSibling())
-		printTree(treeKey, target, level);
-}
-*/
-
-
-void setkey (TreeKeyIdx * treeKey, char* keybuffer) {
-    char* tok = strtok(keybuffer, "/");
+void setkey (TreeKeyIdx * treeKey, const char* keybuffer) {
+    char *keybuf = strdup(keybuffer);
+    char* tok = strtok(keybuf, "/");
     while (tok) {
       bool foundkey = false;
       if (treeKey->hasChildren()) {
@@ -81,79 +57,58 @@
       tok = strtok(NULL, "/");
       
     }
+    free(keybuf);
 }
 
-int readline(FILE* infile, char* linebuffer) {
-  signed char c;
-  char* lbPtr = linebuffer;
-  while ((c = fgetc(infile)) != EOF) {
-    *lbPtr++ = c;
-    if (c == 10) {
-      *lbPtr = 0;
-      return (lbPtr-linebuffer);
-    }
-  }
-  return 0;
-}
-
 enum XML_FORMATS { F_AUTODETECT, F_OSIS, F_THML };
 
 #define HELPTEXT "xml2gbs 1.0 OSIS/ThML General Book module creation tool for the SWORD Project\n  usage:\n   xml2gbs [-l] [-i] [-fT|-fO] <filename> [modname]\n  -l uses long div names in ThML files\n  -i exports to IMP format instead of creating a module\n  -fO and -fT will set the importer to expect OSIS or ThML format respectively\n    (otherwise it attempts to autodetect)\n"
 
-unsigned char detectFormat(char* filename, char* entbuffer) {
+unsigned char detectFormat(char* filename) {
 
   unsigned char format = F_AUTODETECT;
 
-  FILE *infile;
-  infile = fopen(filename, "r");
-  if (!infile) {
-        fprintf(stderr, HELPTEXT);
-        fprintf(stderr, "\n\nCould not open file \"%s\"\n", filename);
+  std::ifstream infile(filename);
+  std::string entbuffer;
+  
+  if (!infile.is_open()) {
+        std::cerr << HELPTEXT;
+        std::cerr << std::endl << std::endl << "Could not open file \"" << filename << "\"" << std::endl;
   }
   else {
-        while (readline(infile, entbuffer) && format == F_AUTODETECT) {
-                if (strstr(entbuffer, "<osis")) {
+	while (std::getline(infile, entbuffer) && format == F_AUTODETECT) {
+                if (strstr(entbuffer.c_str(), "<osis")) {
                         format = F_OSIS;
                 }
-                else if (strstr(entbuffer, "<ThML")) {
+                else if (strstr(entbuffer.c_str(), "<ThML")) {
                         format = F_THML;
                 }
         }
-        fclose(infile);
+        infile.close();
   }
 
   return format;
 }
 
-int getTag(FILE* file, char* keybuffer) {
-        char c;
-        char* kbPtr = keybuffer;
-        while ((c = fgetc(file)) != '>')
-                *kbPtr++ = c;
-        *kbPtr++ = c;
-        *kbPtr = 0;
-        return (kbPtr-keybuffer);
-}
-
-int processXML(char* filename, char* modname, bool longnames, bool exportfile, unsigned char format, char* entbuffer) {
+int processXML(const char* filename, char* modname, bool longnames, bool exportfile, unsigned char format) {
   signed long i = 0;
   char* strtmp;
+  std::string entbuffer;
 
 #ifdef DEBUG
   printf ("%s :%s :%d :%d :%d\n\n", filename, modname, longnames, exportfile, format);
 #endif
 
-  FILE *infile;
-  infile = fopen(filename, "r");
-  if (!infile) {
-        fprintf(stderr, HELPTEXT);
-        fprintf(stderr, "\n\nCould not open file \"%s\"\n", filename);
+  std::ifstream infile(filename);
+  if (!infile.is_open()) {
+        std::cerr << HELPTEXT;
+        std::cerr << std::endl << std::endl << "Could not open file \"" << filename << "\"" << std::endl;
         return -1;
   }
-  FILE *outfile;
+  std::ofstream outfile;
   if (exportfile) {
     strcat (modname, ".imp");
-    outfile = fopen(modname, "w");
+    outfile.open(modname);
   }
 
   TreeKeyIdx * treeKey;
@@ -162,11 +117,11 @@
   std::string divs[32];
 
   int level = 0;
-  char* keybuffer = new char[2048];
-  char* keybuffer2 = new char[2048];
-  char* n = new char[256];
-  char* type = new char[256];
-  char* title= new char[512];
+  std::string keybuffer = "";
+  std::string keybuffer2;
+  std::string n;
+  std::string type;
+  std::string title;
   unsigned long entrysize = 0;
   unsigned long keysize = 0;
   bool closer = false;
@@ -186,198 +141,173 @@
 #endif
 
   int c;
-  while ((c = fgetc(infile)) != EOF) {
+  while ((c = infile.get()) != EOF) {
     if (c == '<') {
-      if (getTag(infile, keybuffer)) {
-	if ((format == F_OSIS) && ((!strcmp(keybuffer, "/div>")) || (!strcmp(keybuffer, "/verse>")) || (!strcmp(keybuffer, "/chapter>"))) ||
-           ((format == F_THML) && ((!strncmp(keybuffer, "/div", 4)) && (keybuffer[4] > '0' && keybuffer[4] < '7')))) {
+	    {
+		    keybuffer = "";
+		    while ((c = infile.get()) != '>')
+			    keybuffer += c;
+		    keybuffer += c;
+	    }
+
+      if (keybuffer.length()) {
+	if ((format == F_OSIS) && ((!strncmp(keybuffer.c_str(), "/div>", 5)) || (!strncmp(keybuffer.c_str(), "/verse>", 7)) || (!strncmp(keybuffer.c_str(), "/chapter>", 9))) ||
+           ((format == F_THML) && ((!strncmp(keybuffer.c_str(), "/div", 4)) && (keybuffer[4] > '0' && keybuffer[4] < '7')))) {
 	  if (!closer) {
        	    keysize = 0;
-            keybuffer2[0] = 0;
+            keybuffer2 = "";
        	    for (i = 0; i < level; i++) {
-              keybuffer2[keysize] = '/';
+              keybuffer2 += '/';
        	      keysize++;
-              keybuffer2[keysize] = 0;
-       	      strcat (keybuffer2, divs[i].c_str());
+       	      keybuffer2 += divs[i];
               keysize += divs[i].length();
+	      std::cout << keybuffer2 << std::endl;
        	    }
 
 	    if (level) {
-	      printf ("%s\n", keybuffer2);
+	      std::cout << keybuffer2 << std::endl;
 	      if (exportfile) {
-		fprintf (outfile, "$$$%s\n%s\n", keybuffer2, entbuffer);
+		outfile << "$$$" << keybuffer2 << std::endl << entbuffer << std::endl;
 	      }
 	      else {
 		treeKey->root();
-		setkey(treeKey, keybuffer2);
-		book->setEntry(entbuffer, entrysize); // save text to module at current position
+		setkey(treeKey, keybuffer2.c_str());
+		book->setEntry(entbuffer.c_str(), entrysize); // save text to module at current position
 	      }
 	    }
 	  }
 	  level--;
-	  entbuffer[0] = 0;
+	  entbuffer = "";
 	  entrysize = 0;
 
 	  closer = true;
 	}
-	else if ((format == F_OSIS) && !((!strcmp(keybuffer, "div>") || !strncmp(keybuffer, "div ", 4)) || (!strcmp(keybuffer, "verse>") || !strncmp(keybuffer, "verse ", 6)) || (!strcmp(keybuffer, "chapter>") || !strncmp(keybuffer, "chapter ", 8))) ||
-                ((format == F_THML) && !((!strncmp(keybuffer, "div", 3)) && (keybuffer[3] > '0' && keybuffer[3] < '7')))) {
-	  entbuffer[entrysize++] = '<';
-	  for (i = 0; i <= strlen(keybuffer); i++) {
-              entbuffer[entrysize++] = keybuffer[i];
-	  }
-          entrysize--;
+	else if ((format == F_OSIS) && !((!strncmp(keybuffer.c_str(), "div>", 4) || !strncmp(keybuffer.c_str(), "div ", 4)) || (!strncmp(keybuffer.c_str(), "verse>", 6) || !strncmp(keybuffer.c_str(), "verse ", 6)) || (!strncmp(keybuffer.c_str(), "chapter>", 8) || !strncmp(keybuffer.c_str(), "chapter ", 8))) ||
+                ((format == F_THML) && !((!strncmp(keybuffer.c_str(), "div", 3)) && (keybuffer[3] > '0' && keybuffer[3] < '7')))) {
+	  entbuffer += '<';
+	  entrysize++;
+	  entrysize += keybuffer.length();
+	  entbuffer += keybuffer;
 	}
 	else {
 	  //we have a divN...
        	  if (!closer) {
             keysize = 0;
-       	    keybuffer2[0] = 0;
+       	    keybuffer2= "";
        	    for (i = 0; i < level; i++) {
-              keybuffer2[keysize] = '/';
+              keybuffer2 += '/';
        	      keysize++;
-              keybuffer2[keysize] = 0;
-       	      strcat (keybuffer2, divs[i].c_str());
+       	      keybuffer2 += divs[i];
               keysize += divs[i].length();
+	      std::cout << keybuffer2 << std::endl;
        	    }
 
 	    if (level) {
-	      printf ("%s\n", keybuffer2);
+	      std::cout << keybuffer2 << std::endl;
 	      if (exportfile) {
-		fprintf (outfile, "$$$%s\n%s\n", keybuffer2, entbuffer);
+		outfile << "$$$" << keybuffer2 << std::endl << entbuffer << std::endl;
 	      }
 	      else {
 		treeKey->root();
-		setkey(treeKey, keybuffer2);
-		book->setEntry(entbuffer, entrysize); // save text to module at current position
+		setkey(treeKey, keybuffer2.c_str());
+		book->setEntry(entbuffer.c_str(), entrysize); // save text to module at current position
 	      }
 	    }
 	  }
 
-	  entbuffer[0] = 0;
+	  entbuffer= "";
 	  entrysize = 0;
 
 	  level++;
-          keysize = strlen(keybuffer)-1;
-/*	  keysize = 0;
-	  while ((c = fgetc(infile)) != EOF) {
-	    if (c != '>') {
-	      keybuffer[keysize] = c;
-	      keysize++;
-	    }
-	    else {
-	      break;
-	    }
-	  }
-	  keybuffer[keysize] = 0;*/
+          keysize = keybuffer.length()-1;
 
-          type[0] = 0;
-      	  n[0] = 0;
-       	  title[0] = 0;
+          type = "";
+      	  n = "";
+       	  title = "";
 
           if (format == F_OSIS && longnames == false) {
-               	  strtmp = strstr(keybuffer, "osisID=\"");
+               	  strtmp = strstr(keybuffer.c_str(), "osisID=\"");
                	  if (strtmp) {
                	    strtmp += 8;
-               	    i = 0;
                	    for (;*strtmp != '\"'; strtmp++) {
                	      if (*strtmp == 10) {
-               		title[i] = ' ';
-               		i++;
+               		title += ' ';
                	      }
                	      else if (*strtmp == '.') {
-                        i = 0;
+                        title = "";
                	      }
                	      else if (*strtmp != 13) {
-               		title[i] = *strtmp;
-               		i++;
+               		title += *strtmp;
                	      }
                	    }
-               	    title[i] = 0;
                	  }
-                  strcpy (keybuffer, title);
+                  keybuffer = title;
           }
           else {
-               	  strtmp = strstr(keybuffer, "type=\"");
+               	  strtmp = strstr(keybuffer.c_str(), "type=\"");
                	  if (strtmp) {
                	    strtmp += 6;
-               	    i = 0;
                	    for (;*strtmp != '\"'; strtmp++) {
                	      if (*strtmp == 10) {
-               		type[i] = ' ';
-               		i++;
+               		type+= ' ';
                	      }
                	      else if (*strtmp != 13) {
-               		type[i] = *strtmp;
-               		i++;
+               		type+= *strtmp;
                	      }
                	    }
-               	    type[i] = 0;
                	  }
 
-               	  strtmp = strstr(keybuffer, "n=\"");
+               	  strtmp = strstr(keybuffer.c_str(), "n=\"");
                	  if (strtmp) {
                	    strtmp += 3;
-               	    i = 0;
                	    for (;*strtmp != '\"'; strtmp++) {
                	      if (*strtmp == 10) {
-               		n[i] = ' ';
-               		i++;
+               		n += ' ';
                	      }
                	      else if (*strtmp != 13) {
-               		n[i] = *strtmp;
-               		i++;
+               		n += *strtmp;
                	      }
                	    }
-               	    n[i] = 0;
                	  }
 
                   if (format == F_OSIS) {
-                       	  strtmp = strstr(keybuffer, "title=\"");
+                       	  strtmp = strstr(keybuffer.c_str(), "title=\"");
                 	  if (strtmp) {
                 	    strtmp += 7;
-                	    i = 0;
                 	    for (;*strtmp != '\"'; strtmp++) {
                 	      if (*strtmp == 10) {
-                		title[i] = ' ';
-                		i++;
+                		title += ' ';
                 	      }
                 	      else if (*strtmp != 13) {
-                		title[i] = *strtmp;
-                		i++;
+                		title += *strtmp;
                 	      }
                 	    }
-                	    title[i] = 0;
                 	  }
                   }
                   else if (format == F_THML) {
-                	  strtmp = strstr(keybuffer, "title=\"");
+                	  strtmp = strstr(keybuffer.c_str(), "title=\"");
                 	  if (strtmp) {
                 	    strtmp += 7;
-                	    i = 0;
                 	    for (;*strtmp != '\"'; strtmp++) {
                 	      if (*strtmp == 10) {
-                		title[i] = ' ';
-                		i++;
+                		title += ' ';
                 	      }
                 	      else if (*strtmp != 13) {
-                		title[i] = *strtmp;
-                		i++;
+                		title += *strtmp;
                 	      }
                 	    }
-                	    title[i] = 0;
                 	  }
                   }
 
-        	  strcpy (keybuffer, type);
-        	  if (strlen(keybuffer) && strlen(n))
-        	    strcat (keybuffer, " ");
-        	  strcat (keybuffer, n);
+        	  keybuffer = type;
+        	  if (keybuffer.length() && n.length())
+        	    keybuffer += " ";
+        	  keybuffer += n;
 
-        	  if (longnames && strlen(keybuffer))
-        	    strcat (keybuffer, ": ");
-        	  if (longnames || !strlen(keybuffer))
-        	    strcat (keybuffer, title);
+        	  if (longnames && keybuffer.length())
+        	    keybuffer += ": ";
+        	  if (longnames || !keybuffer.length())
+        	    keybuffer += title;
           }
           divs[level-1] = keybuffer;
 
@@ -386,9 +316,8 @@
       }
     }
     else if (c != 13) {
-      entbuffer[entrysize] = c;
+      entbuffer += c;
       entrysize++;
-      entbuffer[entrysize] = 0;
     }
   }
 
@@ -397,10 +326,6 @@
 #endif
 
 //  delete book;  //causes nasty-bad errors upon execution
-  delete n;
-  delete type;
-  delete title;
-  delete keybuffer;
 }
 
 int main(int argc, char **argv) {
@@ -439,19 +364,19 @@
                         }
                 }
                 else if (*filename == 0) {
-                        strcpy (filename, argv[i]);
+                        strncpy (filename, argv[i], 200);
                 }
                 else if (*modname == 0) {
-                        strcpy (modname, argv[i]);
+                        strncpy (modname, argv[i], 200);
                 }
         }
   }
   else if (argc > 1) {
-    strcpy (filename, argv[1]);
+    strncpy (filename, argv[1], 200);
   }
 
   if (!*filename) {
-    fprintf(stderr, HELPTEXT);
+    std::cerr << HELPTEXT << std::endl;
     return -1;
   }
   else {
@@ -462,16 +387,14 @@
                 modname[i] = 0;
         }
 
-        char* entbuffer = new char[1048576];
-        format = (format == F_AUTODETECT) ? detectFormat(filename, entbuffer) : format;
+        format = (format == F_AUTODETECT) ? detectFormat(filename) : format;
         if (format == F_AUTODETECT) {
                 fprintf(stderr, HELPTEXT);
                 fprintf(stderr, "\n\nCould not detect file format for file \"%s\", please specify.\n", filename);
                 return -1;
         }
 
-        int retCode =  processXML (filename, modname, longnames, exportfile, format, entbuffer);
-        delete entbuffer;
+        int retCode =  processXML (filename, modname, longnames, exportfile, format);
 
         return retCode;
   }



More information about the sword-cvs mailing list