utilities/imp2ld.cpp File Reference

#include <string>
#include <vector>
#include <fstream>
#include <iostream>
#include <rawld.h>
#include <rawld4.h>
#include <zld.h>
#include <zipcomprs.h>
#include <lzsscomprs.h>
#include <stdio.h>
Include dependency graph for imp2ld.cpp:

Go to the source code of this file.

Functions

int main (int argc, char **argv)
void usage (const char *progName, const char *error=0)

Function Documentation

int main ( int  argc,
char **  argv 
)

Definition at line 61 of file imp2ld.cpp.

00061                                 {
00062 
00063     std::vector<string> linkbuffer;
00064     signed long i = 0;
00065     string keybuffer;
00066     string entbuffer;
00067     string linebuffer;
00068     char links = 0;
00069     string modname;
00070     SWBuf outPath          = "";
00071     bool append            = false;
00072     long blockCount = 30;
00073     bool caseSensitive = false;
00074     SWCompress *compressor = 0;
00075     SWBuf compType         = "";
00076     bool fourByteSize      = false;
00077 
00078     if (argc < 2) usage(*argv);
00079 
00080     const char *progName   = argv[0];
00081     const char *inFileName = argv[1];
00082 
00083     for (int i = 2; i < argc; i++) {
00084         if (!strcmp(argv[i], "-a")) {
00085             append = true;
00086         }
00087         else if (!strcmp(argv[i], "-z")) {
00088             if (compType.size()) usage(*argv, "Cannot specify both -z and -Z");
00089             if (fourByteSize) usage(*argv, "Cannot specify both -z and -4");
00090             compType = "ZIP";
00091         }
00092         else if (!strcmp(argv[i], "-Z")) {
00093             if (compType.size()) usage(*argv, "Cannot specify both -z and -Z");
00094             if (fourByteSize) usage(*argv, "Cannot specify both -Z and -4");
00095             compType = "LZSS";
00096         }
00097         else if (!strcmp(argv[i], "-4")) {
00098             fourByteSize = true;
00099         }
00100         else if (!strcmp(argv[i], "-b")) {
00101             if (i+1 < argc) {
00102                 blockCount = atoi(argv[++i]);
00103                 if (blockCount > 0) continue;
00104             }
00105             usage(*argv, "-b requires in entry count integer > 0");
00106         }
00107         else if (!strcmp(argv[i], "-o")) {
00108             if (i+1 < argc) outPath = argv[++i];
00109             else usage(progName, "-o requires <output_path>");
00110         }
00111         else if (!strcmp(argv[i], "-s")) {
00112             caseSensitive = true;
00113         }
00114         else usage(progName, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str());
00115     }
00116 
00117 
00118 
00119     if (outPath.size() < 1) {
00120         for (i = 0; (i < 16) && (inFileName[i]) && (inFileName[i] != '.'); i++) {
00121             outPath += inFileName[i];
00122         }
00123     }
00124 
00125     std::ifstream infile(inFileName);
00126 
00127 
00128     SWModule *mod = 0;
00129     SWKey *key, *linkKey;
00130 
00131     if (compType == "ZIP") {
00132 #ifndef EXCLUDEZLIB
00133         compressor = new ZipCompress();
00134 #else
00135         usage(*argv, "ERROR: SWORD library not compiled with ZIP compression support.\n\tBe sure libzip is available when compiling SWORD library");
00136 #endif
00137     }
00138     else if (compType == "LZSS") {
00139         compressor = new LZSSCompress();
00140     }
00141 
00142     // setup module
00143     if (!append) {
00144         if (compressor) {
00145             if (zLD::createModule(outPath)) {
00146                 fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", *argv, outPath.c_str());
00147                 exit(-1);
00148             }
00149         }
00150         else {
00151             if (!fourByteSize)
00152                 RawLD::createModule(outPath);
00153             else    RawLD4::createModule(outPath);
00154         }
00155     }
00156 
00157     if (compressor) {
00158         // Create a compressed text module allowing very large entries
00159         // Taking defaults except for first, fourth, fifth and last argument
00160         mod = new zLD(outPath, 0, 0, blockCount, compressor, 0, ENC_UNKNOWN, DIRECTION_LTR, FMT_UNKNOWN, 0, caseSensitive);
00161     }
00162     else {
00163         mod = (!fourByteSize)
00164             ? (SWModule *)new RawLD (outPath, 0, 0, 0, ENC_UNKNOWN, DIRECTION_LTR, FMT_UNKNOWN, 0, caseSensitive)
00165             : (SWModule *)new RawLD4(outPath, 0, 0, 0, ENC_UNKNOWN, DIRECTION_LTR, FMT_UNKNOWN, 0, caseSensitive);
00166     }
00167 
00168 
00169 
00170 
00171     key = mod->createKey();
00172     linkKey = mod->createKey();
00173     key->setPersist(true);
00174     mod->setKey(key);
00175 
00176     while (!infile.eof()) {
00177         std::getline(infile, linebuffer);
00178         if (linebuffer.size() > 3 && linebuffer.substr(0,3) == "$$$") {
00179             if (keybuffer.size() && entbuffer.size()) {
00180                 std::cout << keybuffer << std::endl;
00181                 *key = keybuffer.c_str();
00182 
00183                 mod->setEntry(entbuffer.c_str(), entbuffer.size());
00184                 for (i = 0; i < links; i++) {
00185                     std::cout << "Linking: " << linkbuffer[i] << std::endl;
00186                     *linkKey = linkbuffer[i].c_str();
00187                     mod->linkEntry(linkKey);
00188                 }
00189             }
00190             if (linebuffer.size() > 3)
00191                 keybuffer = linebuffer.substr(3,linebuffer.size());
00192 
00193             entbuffer.resize(0);
00194             linkbuffer.clear();
00195             links = 0;
00196         }
00197         else if (linebuffer.size() > 3 && linebuffer.substr(0,3) == "%%%") {
00198             linkbuffer.push_back(linebuffer.substr(3,linebuffer.size()));
00199             links++;
00200         }
00201         else {
00202             entbuffer += linebuffer;
00203         }
00204     }
00205 
00206     //handle final entry
00207     if (keybuffer.size() && entbuffer.size()) {
00208         std::cout << keybuffer << std::endl;
00209         *key = keybuffer.c_str();
00210 
00211         mod->setEntry(entbuffer.c_str(), entbuffer.size());
00212         for (i = 0; i < links; i++) {
00213             std::cout << "Linking: " << linkbuffer[i] << std::endl;
00214             *linkKey = linkbuffer[i].c_str();
00215             mod->linkEntry(linkKey);
00216         }
00217     }
00218 
00219     infile.close();
00220 
00221     delete linkKey;
00222     delete key;
00223     delete mod;
00224 
00225     return 0;
00226 }

void usage ( const char *  progName,
const char *  error = 0 
)

Definition at line 39 of file imp2ld.cpp.

00039                                                         {
00040     if (error) fprintf(stderr, "\n%s: %s\n", progName, error);
00041     fprintf(stderr, "\n=== imp2ld (Revision $Rev: 2234 $) SWORD lexicon importer.\n");
00042     fprintf(stderr, "\nusage: %s <imp_file> [options]\n", progName);
00043     fprintf(stderr, "  -a\t\t\t augment module if exists (default is to create new)\n");
00044     fprintf(stderr, "  -z\t\t\t use ZIP compression (default no compression)\n");
00045     fprintf(stderr, "  -Z\t\t\t use LZSS compression (default no compression)\n");
00046     fprintf(stderr, "  -o <output_path>\t where to write data files.\n");
00047     fprintf(stderr, "  -4\t\t\t use 4 byte size entries (default is 2).\n");
00048     fprintf(stderr, "  -b <entry_count>\t\t compression block size (default 30 entries)\n");
00049     fprintf(stderr, "  -s\t\t\t case sensitive keys (default is not case sensitive)\n");
00050     fprintf(stderr, "\n");
00051     fprintf(stderr, "'imp' format is a simple standard for importing data into SWORD modules.\n"
00052         "Required is a plain text file containing $$$key lines followed by content.\n\n"
00053         "$$$Abraham\n"
00054         "Abraham was the father of Isaac...\n"
00055         "He was called by God to leave his country and journey to the land of Canaan...\n"
00056         "$$$Isaac\n"
00057         "Isaac was the son of Abraham and Sarah...\n\n");
00058     exit(-1);
00059 }


Generated on 18 Mar 2013 for The SWORD Project by  doxygen 1.6.1