utilities/imp2gbs.cpp File Reference

#include <ctype.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <entriesblk.h>
#include <iostream>
#include <treekeyidx.h>
#include <rawgenbook.h>
#include <utilstr.h>
#include <filemgr.h>
#include <utf8greekaccents.h>
#include <stringmgr.h>
Include dependency graph for imp2gbs.cpp:

Go to the source code of this file.

Functions

int main (int argc, char **argv)
void parseParams (int argc, char **argv)
void usage (const char *app)
void writeEntry (SWModule *book, SWBuf keyBuffer, SWBuf entBuffer)

Variables

bool augEnt = true
bool augMod = false
UTF8GreekAccents greekAccentsFilter
bool greekFilter = false
SWBuf inFile
int lexLevels = 0
SWBuf outPath
bool toUpper = false

Function Documentation

int main ( int  argc,
char **  argv 
)

Definition at line 217 of file imp2gbs.cpp.

00217                                 {
00218     greekAccentsFilter.setOptionValue("Off");       // off = accents off
00219     parseParams(argc, argv);
00220   
00221     // Let's see if we can open our input file
00222     FileDesc *fd = FileMgr::getSystemFileMgr()->open(inFile, FileMgr::RDONLY);
00223     if (fd->getFd() < 0) {
00224         fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], inFile.c_str());
00225         exit(-2);
00226     }
00227   
00228     RawGenBook *book;
00229   
00230     // Do some initialization stuff
00231     if (!augMod) {
00232         RawGenBook::createModule(outPath);
00233     }
00234     book = new RawGenBook(outPath);
00235   
00236     SWBuf lineBuffer;
00237     SWBuf keyBuffer;
00238     SWBuf entBuffer;
00239 
00240     bool more = true;
00241     do {
00242         more = FileMgr::getLine(fd, lineBuffer)!=0;
00243         if (lineBuffer.startsWith("$$$")) {
00244             if ((keyBuffer.size()) && (entBuffer.size())) {
00245                 writeEntry(book, keyBuffer, entBuffer);
00246             }
00247             keyBuffer = lineBuffer;
00248             keyBuffer << 3;
00249             keyBuffer.trim();
00250             entBuffer.size(0);
00251         }
00252         else {
00253             if (keyBuffer.size()) {
00254                 entBuffer += lineBuffer;
00255                 entBuffer += "\n";
00256             }
00257         }
00258     } while (more);
00259     if ((keyBuffer.size()) && (entBuffer.size())) {
00260         writeEntry(book, keyBuffer, entBuffer);
00261     }
00262 
00263     delete book;
00264 
00265     FileMgr::getSystemFileMgr()->close(fd);
00266 
00267     return 0;
00268 }

void parseParams ( int  argc,
char **  argv 
)

Definition at line 75 of file imp2gbs.cpp.

00075                                         {
00076 
00077     if (argc < 2) {
00078         usage(*argv);
00079     }
00080 
00081     inFile = argv[1];
00082 
00083     for (int i = 2; i < argc; i++) {
00084         if (!strcmp(argv[i], "-o")) {
00085             if ((i+1 < argc) && (argv[i+1][0] != '-')) {
00086                 outPath = argv[i+1];
00087                 i++;
00088             }
00089             else usage(*argv);
00090         }
00091         else if (!strcmp(argv[i], "-U")) {
00092             if (StringMgr::hasUTF8Support()) {
00093                 toUpper = true;
00094             }
00095             else {
00096                 fprintf(stderr, "Error: %s.  Cannot reliably toUpper without UTF8 support\n\t(recompile with ICU enabled)\n\n", *argv);
00097                 usage(*argv);
00098             }
00099         }
00100         else if (!strcmp(argv[i], "-g")) {
00101             greekFilter = true;
00102         }
00103         else if (!strcmp(argv[i], "-O")) {
00104             augEnt = false;
00105         }
00106         else if (!strcmp(argv[i], "-a")) {
00107             augMod = true;
00108         }
00109         else if (!strcmp(argv[i], "-l")) {
00110             if (i+1 < argc) {
00111                 lexLevels = atoi(argv[i+1]);
00112                 i++;
00113             }
00114             if (!lexLevels) usage(*argv);
00115         }
00116     }
00117     if (!outPath.size()) {
00118         outPath = inFile;
00119         unsigned int i;
00120         for (i = 0; (i < outPath.size() && outPath[i] != '.'); i++);
00121         outPath.size(i);
00122     }
00123 }

void usage ( const char *  app  ) 

Definition at line 60 of file imp2gbs.cpp.

00060                             {
00061     fprintf(stderr, "imp2gbs 1.0 General Book module creation tool for the SWORD Project\n\n");
00062     fprintf(stderr, "usage: %s <inFile> [OPTIONS]\n", app);
00063     fprintf(stderr, "\t-o <outPath>\n\t\tSpecify an output Path other than inFile location.\n");
00064     fprintf(stderr, "\t-a\n\t\tAugment Module [default: create new]\n");
00065     fprintf(stderr, "\t-O\n\t\tOverwrite entries of same key [default: append to]\n");
00066     fprintf(stderr, "\t-U\n\t\tKey filter: Convert toUpper\n");
00067     fprintf(stderr, "\t-g\n\t\tKey filter: Strip Greek diacritics\n");
00068     fprintf(stderr, "\t-l <levels>\n\t\tKey filter: Pseudo-Lexicon n-level generation using first character\n");
00069     fprintf(stderr, "\t\te.g. -l 2 \"Abbey\" -> \"A/AB/Abbey\"\n");
00070     fprintf(stderr, "\n");
00071     exit (-1);
00072 }

void writeEntry ( SWModule *  book,
SWBuf  keyBuffer,
SWBuf  entBuffer 
)

Definition at line 126 of file imp2gbs.cpp.

00126                                                                   {
00127 
00128 
00129     if (greekFilter) {
00130         greekAccentsFilter.processText(keyBuffer);
00131     }
00132 
00133     if (toUpper) {
00134         unsigned size = (keyBuffer.size()+5)*3;
00135         keyBuffer.setFillByte(0);
00136         keyBuffer.resize(size);
00137         StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2);
00138     }
00139 
00140 // Added for Hesychius, but this stuff should be pushed back into new StringMgr
00141 // functionality
00142 #ifdef _ICU_
00143 //  if (lexLevels) {
00144     if (lexLevels && !keyBuffer.startsWith("/Intro")) {
00145         unsigned size = (keyBuffer.size()+(lexLevels*2));
00146         keyBuffer.setFillByte(0);
00147         keyBuffer.resize(size);
00148             
00149         UErrorCode err = U_ZERO_ERROR;
00150         
00151         int max = (size+5)*3;
00152         UChar *ubuffer = new UChar[max+10];
00153         int32_t len;
00154         
00155         u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
00156         if (err == U_ZERO_ERROR) {
00157             UChar *upper = new UChar[(lexLevels+1)*3];
00158             memcpy(upper, ubuffer, lexLevels*sizeof(UChar));
00159             upper[lexLevels] = 0;
00160             len = u_strToUpper(upper, (lexLevels+1)*3, upper, -1, 0, &err);
00161             memmove(ubuffer+len+1, ubuffer, (max-len)*sizeof(UChar));
00162             memcpy(ubuffer, upper, len*sizeof(UChar));
00163             ubuffer[len] = '/';
00164             delete [] upper;
00165 
00166             int totalShift = 0;
00167             for (int i = lexLevels-1; i; i--) {
00168                 int shift = (i < len)? i : len;
00169                 memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
00170                 ubuffer[shift] = '/';
00171                 totalShift += (shift+1);
00172             }
00173             u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
00174         }
00175         
00176 /*
00177         u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
00178         if (err == U_ZERO_ERROR) {
00179             int totalShift = 0;
00180             for (int i = lexLevels; i; i--) {
00181                 int shift = (i < len)? i : len;
00182                 memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
00183                 ubuffer[shift] = '/';
00184                 totalShift += (shift+1);
00185             }
00186             UChar *upper = new UChar[(totalShift+1)*3];
00187             memcpy(upper, ubuffer, totalShift*sizeof(UChar));
00188             upper[totalShift] = 0;
00189             len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err);
00190             memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar));
00191             memcpy(ubuffer, upper, len*sizeof(UChar));
00192             delete [] upper;
00193             u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
00194         }
00195 */
00196         
00197         delete [] ubuffer;
00198     }
00199 #endif
00200 
00201     std::cout << keyBuffer << std::endl;
00202 
00203     book->setKey(keyBuffer.c_str());
00204 
00205     // check to see if we already have an entry
00206     for (int i = 2; book->getKey()->popError() != KEYERR_OUTOFBOUNDS; i++) {
00207         SWBuf key;
00208         key.setFormatted("%s {%d}", keyBuffer.c_str(), i);
00209         std::cout << "dup key, trying: " << key << std::endl;
00210         book->setKey(key.c_str());
00211     }
00212 
00213     book->setEntry(entBuffer);
00214 }


Variable Documentation

bool augEnt = true

Definition at line 55 of file imp2gbs.cpp.

bool augMod = false

Definition at line 54 of file imp2gbs.cpp.

Definition at line 57 of file imp2gbs.cpp.

bool greekFilter = false

Definition at line 53 of file imp2gbs.cpp.

SWBuf inFile

Definition at line 51 of file imp2gbs.cpp.

int lexLevels = 0

Definition at line 56 of file imp2gbs.cpp.

SWBuf outPath

Definition at line 50 of file imp2gbs.cpp.

bool toUpper = false

Definition at line 52 of file imp2gbs.cpp.


Generated on 18 Mar 2013 for The SWORD Project by  doxygen 1.6.1