#include #include #include #include #include #include #include #include #ifndef __GNUC__ #include #else #include #endif #include #include #ifndef O_BINARY #define O_BINARY 0 #endif #ifdef HAVESWORD #include #include #include using namespace sword; #endif using std::string; using std::cout; using std::endl; static const char *osisOTBooks[] = { "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", "2Chr", "Ezra", "Neh", "Esth", "Job", "Ps", "Prov", "Eccl", "Song", "Isa", "Jer", "Lam", "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal", // extra-Biblical "Bar", "PrAzar", "Bel", "Sus", "1Esd", "2Esd", "AddEsth", "EpJer", "Jdt", "1Macc", "2Macc", "3Macc", "4Macc", "PrMan", "Ps151", "Sir", "Tob", "Wis"}; static const char *osisNTBooks[] = { "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor", "Gal", "Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Titus", "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", "3John", "Jude", "Rev"}; static const char **osisBooks[] = { osisOTBooks, osisNTBooks }; const char nasbMax[2] = {39, 27}; char readline(int fd, char **buf) { char ch; if (*buf) delete [] *buf; *buf = 0; int len; long index = lseek(fd, 0, SEEK_CUR); // clean up any preceding white space while ((len = read(fd, &ch, 1)) == 1) { if ((ch != 10) && (ch != 13) && (ch != ' ') && (ch != '\t')) break; else index++; } while (ch != 10) { if ((len = read(fd, &ch, 1)) != 1) break; } int size = (lseek(fd, 0, SEEK_CUR) - index) - 1; *buf = new char [ (size+2) * 2 ]; if (size > 0) { lseek(fd, index, SEEK_SET); read(fd, *buf, size); read(fd, &ch, 1); //pop terminating char (*buf)[size] = 0; // clean up any trailing junk on buf int buflen = strlen(*buf); for (char *it = *buf+(buflen-1); it > *buf; it--) { if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t')) break; else *it = 0; } // convert all spanish characters to combined for (unsigned char *it = (unsigned char *)(*buf)+1; *it; it++) { switch (*it) { /* case 0xE2 : // ‘ if (isalpha(it[-1]) && it[1] == 0x80 && it[2] == 0x98) { memmove(it, it+1, buflen - (it-(unsigned char *)*buf)); buflen--; it[0] = 0xcc; it[1] = 0x80; // yeah, I know it's already 0x80, but that's just a coincidence } else { fprintf(stderr, "oddity: %s\n", *buf); exit(-4); } break; */ case 0x60 : // ` if (isalpha(it[-1])) { memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); buflen++; it[0] = 0xcc; it[1] = 0x80; } else { // fprintf(stderr, "oddity: %s\n", *buf); // exit(-4); } break; case 0x7E : // ~ memmove(it+1, it, buflen - (it-(unsigned char *)*buf) + 1); buflen++; it[0] = 0xcc; it[1] = 0x83; break; } } } else **buf = 0; return !len; } void outHeader(); void outTrailer(); void unicodeTicks(string &outstring); void prepLine(string &outstring, int currentTestament, bool note); string getNoteBody(int nfd, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx); int replaceFirst(string &haystack, string needle, string replacement); int main(int argc, char **argv) { #ifdef HAVESWORD LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName("es"); #endif std::setlocale(LC_CTYPE, ""); // Let's test our command line arguments if (argc < 2) { // fprintf(stderr, "usage: %s [0|1 - file includes prepended verse references]\n", argv[0]); fprintf(stderr, "usage: %s [notesfile]\n\n", argv[0]); exit(-1); } // Let's see if we can open our input file int fd = open(argv[1], O_RDONLY|O_BINARY); if (fd < 0) { fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]); exit(-2); } int fdn = -1; if (argc > 2) { fdn = open(argv[2], O_RDONLY|O_BINARY); if (fdn < 0) { fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]); exit(-2); } } outHeader(); string header; char *buffer = 0; char *nbuffer = 0; int result = 0; string currentBook = ""; int currentBookNo = 0; int currentTestament = 0; int currentChapter = 0; int currentVerse = 0; bool inBook = false; bool inChapter = false; bool inVerse = false; string noteLine = ""; string preChapNote = ""; string outstring; result = readline(fd, &buffer); string lookahead = buffer; unicodeTicks(lookahead); prepLine(lookahead, currentTestament, false); do { result = readline(fd, &buffer); if (lookahead.length()) { string savebuf = buffer; if (buffer) delete [] buffer; buffer = new char [ lookahead.length() + 1]; strcpy(buffer, lookahead.c_str()); lookahead = savebuf; unicodeTicks(lookahead); prepLine(lookahead, currentTestament, false); result = 0; } else if (!result) { string savebuf = buffer; result = readline(fd, &buffer); lookahead = buffer; unicodeTicks(lookahead); prepLine(lookahead, currentTestament, false); if (buffer) delete [] buffer; buffer = new char [ savebuf.length() + 1]; strcpy(buffer, savebuf.c_str()); result = 0; } outstring = buffer; // BOOK NAMES if (!strncmp(outstring.c_str(), "", 4)) { string book = outstring.c_str()+4; book = book.substr(0, book.find_first_of("<")); outstring = ""; if (inVerse) { outstring += ""; inVerse = false; } if (inChapter) { outstring += ""; inChapter = false; } if (inBook) { outstring += ""; inBook = false; } outstring += (string)"
" + book + ""; inBook = true; } // CHAPTERS //PSALM if ((!strncmp(outstring.c_str(), "", 4)) || (!strncmp(outstring.c_str(), "", 4))) { string chapterTitle = outstring.c_str()+4; chapterTitle = chapterTitle.substr(0, chapterTitle.find_first_of("<")); string chapter = chapterTitle.substr(chapterTitle.find_first_of(" ")+1); outstring = ""; if (inVerse) { outstring += ""; inVerse = false; } if (inChapter) { outstring += ""; inChapter = false; } outstring += (string)""; outstring += (string)"" + chapterTitle + ""; currentChapter = atoi(chapter.c_str()); inChapter = true; currentVerse = 1; } // if (!strncmp(outstring.c_str(), "", 4)) { string heading = outstring.c_str()+4; heading = heading.substr(0, heading.find("")); outstring = ""; if (!strncmp(lookahead.c_str(), "", 4)) { lookahead.erase(0, 4); outstring += ""; } if (inVerse) { outstring += "\n"; inVerse = false; } outstring += (string)"" + heading + (string)""; } // if ((!strncmp(outstring.c_str(), "", 4)) || (!strncmp(outstring.c_str(), "", 5))) { bool shi = outstring.c_str()[3] == 'I'; if (shi) { fprintf(stderr, "found shi.\n"); } string heading = outstring.c_str()+(shi ? 5 : 4); heading = heading.substr(0, heading.find(shi ? "" : "")); outstring = ""; if (!strncmp(lookahead.c_str(), "", 4)) { lookahead.erase(0, 4); outstring += ""; } if (inVerse) { outstring += "\n"; inVerse = false; } outstring += (string)"" + heading + (string)""; } if (!strncmp(outstring.c_str(), "", 4)) { string heading = (outstring.c_str()+4); heading = heading.substr(0, heading.find("")); outstring = (string)"" + heading + (string)""; } if (!strncmp(outstring.c_str(), "", 4)) { string heading = (outstring.c_str()+4); heading = heading.substr(0, heading.find("")); outstring = (string)"" + heading + (string)""; } // {{x::y}} // DUH, find_first_of looks for the first occurance of ANY single character of the supplied string // int start = outstring.find_first_of("{{"); // this is whacked and fails on ">[{.." Try it! const char *outstr = outstring.c_str(); const char *found = strstr(outstr, "{{"); int start = (found) ? (found - outstr) : -1; // ---- end of whacked replacement if (start > -1) { found = strstr(outstr, "}}"); int end = (found) ? (found - outstr) : -1; end++; int testmt = 0, book = 0, chap = 0; string bkch = outstring.substr(start+2, end-start-2); sscanf(bkch.c_str(), "%d::%d", &book, &chap); currentChapter = chap; int vNumEnd = outstring.find_first_of(" ", end); currentVerse = atoi(outstring.substr(end+1, vNumEnd-end-1).c_str()); currentBookNo = book; if (book > nasbMax[0]) { testmt = 1; book -= nasbMax[0]; } if (currentBook != osisBooks[testmt][book-1]) { fprintf(stderr, "error: %s: Found a book/chapter identifier out of place: {{%d::%d}} in book %s, chapter %d; bkch = %s; outstring = %s\n", argv[0], book, chap, currentBook.c_str(), currentChapter, bkch.c_str(), outstring.c_str()); exit(-3); } char chapString[20], verseString[20]; sprintf(chapString, "%d", currentChapter); sprintf(verseString, "%d", currentVerse); string newstring = ""; if (inVerse) { newstring += ""; inVerse = false; } newstring += ""; outstring.replace(start, vNumEnd-start+1, newstring); inVerse = true; noteLine = preChapNote; preChapNote = ""; } // multiple occurances on a line stuff while (1) { // NOTE outstr = outstring.c_str(); found = strstr(outstr, " -1) { int end = outstring.find_first_of(">", start+1); string nStr = outstring.substr(start+2, end-start-2); // NOTE if (isdigit(nStr.c_str()[0]) || strchr("ABCDEFG", nStr.c_str()[0])) { bool preChap = strchr("A", nStr.c_str()[0]); if (preChap) noteLine = ""; char chapString[20], verseString[20]; sprintf(chapString, "%d", currentChapter); sprintf(verseString, "%d", currentVerse); string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "N"); if (preChap) preChapNote = noteLine; outstring.replace(start, end-start+1, (string)"" + noteBody + ""); // outstring.replace(start, end-start+1, (string)"--note--"); continue; } } // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; if (replaceFirst(outstring, "", "") > -1) continue; if (replaceFirst(outstring, "</SHI>", "") > -1) continue; // if (replaceFirst(outstring, "", "¿") > -1) continue; // if (replaceFirst(outstring, "", "¡") > -1) continue; outstr = outstring.c_str(); found = strstr(outstr, " -1) { int end = outstring.find_first_of(">", start+1); string nStr = outstring.substr(start+2, end-start-2); char chapString[20], verseString[20]; sprintf(chapString, "%d", currentChapter); sprintf(verseString, "%d", currentVerse); string osisID = (currentBook + (string)"." + (string)chapString + (string)"." + (string) verseString).c_str(); string noteBody = getNoteBody(fdn, noteLine, osisID, currentBookNo, currentChapter, currentVerse, nStr, "R"); outstring.replace(start, end-start+1, (string)"" + noteBody + ""); continue; } // transChange added {} outstr = outstring.c_str(); found = strstr(outstr, "{"); start = (found) ? (found - outstr) : -1; if (start > -1) { outstring.replace(start, 1, (string)""); size_t end = outstring.find_first_of("}", start+1); if (end != string::npos) { outstring.erase(end, 1); } else end = outstring.size()-1; while ((!isalpha(outstring[end]))&&(outstring[end]!='>')) end--; outstring.insert(end+1, ""); continue; } /* // transChange tenseChange * outstr = outstring.c_str(); found = strstr(outstr, "*"); start = (found) ? (found - outstr) : -1; if (start > -1) { outstring.replace(start, 1, (string)""); for (end = start + 34; (end < outstring.length()); end++) { if ((!isalpha(outstring[end])) && (outstring[end] != '\'')) break; } outstring.replace(end, 1, ""); continue; } */ // <,> if (replaceFirst(outstring, "<,>", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // paragraph break if (replaceFirst(outstring, "", "") > -1) continue; // poetry break if (replaceFirst(outstring, "", "") > -1) continue; // poetry break if (replaceFirst(outstring, "", "") > -1) continue; // poetry break if (replaceFirst(outstring, "", "") > -1) continue; // letter indent if (replaceFirst(outstring, "", "") > -1) continue; // letter indent if (replaceFirst(outstring, "", "") > -1) continue; break; } int strongsStart = 0; int transChangeStart = 0; bool strongsStartFound = false; bool intoken = false; bool intag = false; bool inNote = false; int tokenStart = 0; string lastToken = ""; string previousToken = ""; int tenseChange = -1; // strongs numbers for (unsigned int i = 0; i < outstring.length(); ++i) { if ((!strongsStartFound) && (!inNote) && (!intoken)) { if (!intag) { if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { strongsStart = i; strongsStartFound = true; } } else if (!strncmp(lastToken.c_str(), "hi", 2) && strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) { if ((outstring[i] != ' ') && ((isalpha(outstring[i])) || (isdigit(outstring[i])))) { strongsStart = tokenStart - 1; strongsStartFound = true; } } } if (outstring[i] =='*') tenseChange = i; if (outstring[i] == '<') { tokenStart = i+1; intoken = true; } if (outstring[i] == '>') { intoken = false; previousToken = lastToken; lastToken = outstring.substr(tokenStart, i-tokenStart); // Not completely safe, but works for current NASB data if (strchr(lastToken.c_str(), '/')) intag = false; else intag = true; if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) { transChangeStart = i+1; } /* if (!strncmp(lastToken.c_str(), "seg", 3)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "divineName", 10)) { strongsStartFound = false; strongsStart = i+1; } */ if (!strncmp(lastToken.c_str(), "/divineName", 10)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "note", 4)) { strongsStartFound = false; strongsStart = i+1; inNote = true; } if (!strncmp(lastToken.c_str(), "/note", 5)) { strongsStartFound = false; strongsStart = i+1; inNote = false; } if (!strncmp(lastToken.c_str(), "q who=\"Jesus\"", 13)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "/q", 2)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "seg type=\"otPassage\"", 20)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "hi type=\"inscription\"", 21)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "/transChange", 12)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "milestone", 9)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "/seg", 4)) { strongsStartFound = false; strongsStart = i+1; } if (!strncmp(lastToken.c_str(), "verse", 5)) { strongsStartFound = false; strongsStart = i+1; } if ((!strncmp(lastToken.c_str(), "verse", 5))) { intag = false; } if ( (!strncmp(lastToken.c_str(), "MG", 2)) || (!strncmp(lastToken.c_str(), "MH", 2))) { // insert // fix tenseChange to be inside so we can include a subset of the content. outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "":"")); i = (tokenStart-1) + ((tenseChange > -1) ? 18:4); // build tag char lang = lastToken[1]; // H or G lastToken.replace(0, 1, " -1) { lastToken.replace(start, 2, (string)" strong:" + lang); } lastToken += "\">"; intag = false; if (tenseChange > -1) { lastToken.insert(0, ""); } if (!strncmp(previousToken.c_str(), "transChange type=\"added", 23)) { outstring.insert(transChangeStart, lastToken); intag = true; i += lastToken.length() - 1; // (-1 because we're about to i++) } // insert our token else { outstring.insert(strongsStart, lastToken); i += lastToken.length() - 1; // (-1 because we're about to i++) } strongsStart = i+1; strongsStartFound = false; if (tenseChange > -1) { // relocate because position may have changed from all the token inserts const char *buf = outstring.c_str(); tenseChange = (strchr(buf, '*') - buf); outstring.erase(tenseChange, 1); tenseChange = -1; } } } } // clean up stuff that didn't work quite right while (1) { // divineName strongs tags misorderings string target = ""; size_t s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } target = ","; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ","); continue; } break; } std::cout << outstring; if (!result) std::cout << "\n"; } while (!result); outstring = ""; if (inVerse) { outstring += ""; inVerse = false; } if (inChapter) { outstring += ""; inChapter = false; } if (inBook) { outstring += "
"; inBook = false; } std::cout << outstring; outTrailer(); // clean up our buffers that readline might have allocated if (buffer) delete [] buffer; if (nbuffer) delete [] nbuffer; close(fd); if (fdn > -1) close(fdn); } void outHeader() { std::cout << "" << "\n"; std::cout << "" << "\n"; std::cout << " " << "\n"; std::cout << "
" << "\n"; std::cout << " " << "\n"; std::cout << " NEW AMERICAN STANDARD BIBLE" << "\n"; std::cout << " Bible.en.NASB.1995" << "\n"; std::cout << " Copyright (C) 1960,1962,1963,1968,1971,1972,1973,1975,1977,1995 by THE LOCKMAN FOUNDATION" << "\n"; std::cout << " Bible" << "\n"; std::cout << " " << "\n"; std::cout << " " << "\n"; std::cout << " " << "\n"; std::cout << "
" << "\n"; } void outTrailer() { std::cout << "
\n"; std::cout << "
\n"; } void unicodeTicks(string &outstring) { while (1) { const char *outstr; const char *found; int start; outstr = outstring.c_str(); found = strstr(outstr, "``"); char uchar[4]; uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9c; uchar[3]=0; start = (found) ? (found - outstr) : -1; if (start > -1) { outstring.replace(start, 2, "“"); continue; } outstr = outstring.c_str(); found = strstr(outstr, "`"); uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x98; uchar[3]=0; start = (found) ? (found - outstr) : -1; if (start > -1) { outstring.replace(start, 1, uchar); continue; } outstr = outstring.c_str(); found = strstr(outstr, "'"); uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x99; uchar[3]=0; start = (found) ? (found - outstr) : -1; if (start > -1) { outstring.replace(start, 1, uchar); continue; } outstr = outstring.c_str(); found = strstr(outstr, "\""); uchar[0]=0xe2; uchar[1]=0x80; uchar[2]=0x9d; uchar[3]=0; start = (found) ? (found - outstr) : -1; if (start > -1) { outstring.replace(start, 1, uchar); continue; } break; } } // return offset of occurence replace; otherwise -1 int replaceFirst(string &haystack, string needle, string replacement) { const char *outstr = haystack.c_str(); const char *found = strstr(outstr, needle.c_str()); int start = (found) ? (found - outstr) : -1; if (start > -1) { haystack.replace(start, needle.size(), replacement); } return start; } void prepLine(string &outstring, int currentTestament, bool note) { int end = 0; while (1) { // ------------------------------------------ // redundant or unneeded or unknown markers size_t s; // <1EVA> if (replaceFirst(outstring, "<1EVA>", "") > -1) continue; // <1EVB> if (replaceFirst(outstring, "<1EVB>", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; //

if (replaceFirst(outstring, "

", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // if (replaceFirst(outstring, "", "") > -1) continue; // <$F...>> s = outstring.find("<$F"); if (s != string::npos) { size_t e = outstring.find(">>", s); outstring.erase(s, e-s+2); continue; } // s = outstring.find(""); if (s != string::npos) { size_t e = outstring.find("", s); outstring.erase(s, e-s+6); continue; } // ---------------------------------------------- // if (replaceFirst(outstring, "", "") > -1) continue; // ~“ char uchar[6]; uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x9c; uchar[5]=0; // string target = "~“"; string target = uchar; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // +« target = "+«"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // +» target = "+»"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // +“ target = "+“"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // +” target = "+”"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // +‘ target = "+‘"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // +’ target = "+’"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // -« target = "-«"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // -» target = "-»"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // -“ target = "-“"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // -” target = "-”"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // -‘ target = "-‘"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // -’ target = "-’"; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } // ~‘ uchar[0]=0xcc; uchar[1]=0x83; uchar[2]=0xe2; uchar[3]=0x80; uchar[4]=0x98; uchar[5]=0; // target = "~‘"; target = uchar; s = outstring.find(target); if (s != string::npos) { outstring.replace(s, target.length(), ""); continue; } if (replaceFirst(outstring, "", "") > -1) continue; if (replaceFirst(outstring, "", "") > -1) continue; if (replaceFirst(outstring, "L<\\>{ORD}'<\\>{S}", "Lord's") > -1) continue; if (replaceFirst(outstring, "L<\\>{ORD’S}", "Lord’s") > -1) continue; if (replaceFirst(outstring, "L<\\>{ORD}’<\\>{S}", "Lord’s") > -1) continue; if (replaceFirst(outstring, "L<\\>{ORD}’<\\>{S} ", "Lord’s ") > -1) continue; if (replaceFirst(outstring, "L<\\>ORD’<\\>S", "Lord’s") > -1) continue; if (replaceFirst(outstring, "L<\\>{ORD,}", "Lord,") > -1) continue; if (replaceFirst(outstring, "L<\\>{ORD}", "Lord") > -1) continue; if (replaceFirst(outstring, "L<\\>{ORD} ", "Lord ") > -1) continue; if (replaceFirst(outstring, "L}<\\>{ORD}{", "Lord") > -1) continue; if (replaceFirst(outstring, "L}<\\>{ORD}", "Lord}") > -1) continue; if (replaceFirst(outstring, "S<\\>{EN~OR}", "Sen~or") > -1) continue; if (replaceFirst(outstring, "S<\\>{EÑOR}", "Señor") > -1) continue; if (replaceFirst(outstring, "Y<\\>{AH,}", "Yah,") > -1) continue; if (replaceFirst(outstring, "Y<\\>{AH,} ", "Yah, ") > -1) continue; if (replaceFirst(outstring, "Y<\\>{AH}", "Yah") > -1) continue; // Do these first before Daniel Inscriptions // LB = add macron, only with 'a': ā if (replaceFirst(outstring, "a", "ā") > -1) continue; if (replaceFirst(outstring, "E", "Ē") > -1) continue; if (replaceFirst(outstring, "e", "ē") > -1) continue; if (replaceFirst(outstring, "M<\\>ENĒ", "Menē") > -1) continue; if (replaceFirst(outstring, "MENĒ", "Menē") > -1) continue; // these are probably legacy if (replaceFirst(outstring, "M<\\>ENĒ:", "Menē:") > -1) continue; if (replaceFirst(outstring, "M<\\>ENE", "Mene") > -1) continue; if (replaceFirst(outstring, "M<\\>ENE:", "Mene:") > -1) continue; if (replaceFirst(outstring, "M<\\>ENĒ", "Menē") > -1) continue; if (replaceFirst(outstring, "MENĒ", "Menē") > -1) continue; if (replaceFirst(outstring, "MENE", "Mene") > -1) continue; // end of probably legacy if (replaceFirst(outstring, "T<\\>EKĒL", "Tekēl") > -1) continue; if (replaceFirst(outstring, "TEKĒL", "Tekēl") > -1) continue; // these are probably legacy if (replaceFirst(outstring, "TEKEL", "Tekel") > -1) continue; if (replaceFirst(outstring, "T<\\>EKEL", "Tekel") > -1) continue; if (replaceFirst(outstring, "T<\\>EKEL:", "Tekel:") > -1) continue; if (replaceFirst(outstring, "T<\\>EKĒL", "Tekēl") > -1) continue; if (replaceFirst(outstring, "T<\\>EKĒL:", "Tekēl:") > -1) continue; if (replaceFirst(outstring, "TEKĒL", "Tekēl") > -1) continue; // end of probably legacy if (replaceFirst(outstring, "U<\\>PHARSIN", "Upharsin") > -1) continue; if (replaceFirst(outstring, "UPHARSIN", "Upharsin") > -1) continue; // these are probably legacy if (replaceFirst(outstring, "UPHARSIN", "Upharsin") > -1) continue; if (replaceFirst(outstring, "UFARSIN", "Ufarsin") > -1) continue; if (replaceFirst(outstring, "U<\\>FARSIN", "Ufarsin") > -1) continue; if (replaceFirst(outstring, "UPHARSIN", "Upharsin") > -1) continue; if (replaceFirst(outstring, "UFARSIN", "Ufarsin") > -1) continue; // end of probably legacy if (replaceFirst(outstring, "P<\\>ERĒS", "Perēs") > -1) continue; if (replaceFirst(outstring, "PERĒS", "Perēs") > -1) continue; // these are probably legacy if (replaceFirst(outstring, "PERES", "Peres") > -1) continue; if (replaceFirst(outstring, "P<\\>ERES", "Peres") > -1) continue; if (replaceFirst(outstring, "P<\\>ERES:", "Peres:") > -1) continue; if (replaceFirst(outstring, "PERĒS", "Perēs") > -1) continue; if (replaceFirst(outstring, "P<\\>ERĒS", "Perēs") > -1) continue; if (replaceFirst(outstring, "P<\\>ERĒS:", "Perēs:") > -1) continue; // end of probably legacy if (replaceFirst(outstring, "H<\\>OLY TO THE L<\\>ORD", "Holy to the L<\\>ORD") > -1) continue; const char *outstr = outstring.c_str(); const char *found = strstr(outstr+end, "<\\>"); int start = (found) ? (found - outstr) : -1; if (start > -1) { for (--start;start;start--) { if ((!std::isupper(outstring[start])) && (!strchr("\\/ ~", outstring[start]))) { break; } } for (start++; outstring[start] == ' '; start++); if (currentTestament) { outstring.insert(start, ""); start += 22; } else { outstring.insert(start, ""); start += 17; int s = replaceFirst(outstring, "L<\\>{ORD}", "Lord"); if (s > -1) end = s+4; } // do small cap logic bool lower = false; string token = ""; for (int charLen = 1; start < (int)outstring.length(); start += charLen) { const unsigned char *startChar = (const unsigned char *)outstring.c_str()+start; const unsigned char *endChar = startChar; SW_u32 testChar = getUniCharFromUTF8(&endChar, true); charLen = endChar - startChar; // set the size of the UTF-8 sequence if (!token.size()) { if (testChar == '<') { token = "<"; continue; } // what is this? It screws MENE MENE up in Daniel // if (testChar == ':') // break; if (StringMgr::getSystemStringMgr()->isAlpha(testChar)) { if (StringMgr::getSystemStringMgr()->isLower(testChar)) break; if (lower) outstring.replace(start, charLen, SWBuf((const char *)startChar, charLen).toLower()); continue; } } else { token += testChar; if (testChar == '>') { if (token == "<\\>") { lower = true; outstring.erase(start-2, 3); start -= 3; } if (token == "") { lower = false; outstring.erase(start-2, 3); end = start - 2; start -= 3; unsigned int nextStrongs = outstring.find(""); if (nextStrongs != string::npos && nextUp != string::npos && nextStrongs < nextUp) { break; } } // end divineName if we hit a PO in the middle if (token == "") { break; } unsigned int s = token.find(" 0) s = token.find(" 2) { if (StringMgr::getSystemStringMgr()->isDigit(token[2])) { break; } } token = ""; } } } if (currentTestament) { outstring.insert(end, ""); end+=6; } else { outstring.insert(end, ""); end+=19; } continue; } // these are places where we unnecessarily stop and then start otPassage // we could make the otPassage logic work better, but these exception clean // thing up for now. if (replaceFirst(outstring, "’s", "’s") > -1) continue; if (replaceFirst(outstring, "-", "-") > -1) continue; if (replaceFirst(outstring, ",", ",") > -1) continue; if (replaceFirst(outstring, ", ", ", ") > -1) continue; if (replaceFirst(outstring, "! ", "! ") > -1) continue; if (replaceFirst(outstring, "; ", "; ") > -1) continue; if (replaceFirst(outstring, " ", " ") > -1) continue; if (replaceFirst(outstring, ", ‘", ", ‘") > -1) continue; if (replaceFirst(outstring, ",’ ", ",’ ") > -1) continue; if (note) { outstr = outstring.c_str(); found = strstr(outstr, "{"); start = (found) ? (found - outstr) : -1; if (start > -1) { outstring.replace(start, 1, ""); continue; } outstr = outstring.c_str(); found = strstr(outstr, "}"); start = (found) ? (found - outstr) : -1; if (start > -1) { outstring.replace(start, 1, ""); continue; } s = outstring.find(""); if (s != string::npos) { const size_t s2 = outstring.find(""); continue; } } } // if (replaceFirst(outstring, ")", ")") > -1) continue; break; } } string getNoteBody(int fdn, string ¬eLine, string osisID, int currentBookNo, int currentChapter, int currentVerse, string nStr, const char *nx) { char *nbuffer = 0; int start = -1; const char *found = (const char *)-1; const char *outstr = (const char *)-1; while (start == -1) { if (!noteLine.length() && fdn > -1) { if (readline(fdn, &nbuffer)) return ""; // eof noteLine = nbuffer; } outstr = noteLine.c_str(); found = strstr(outstr, "{{"); start = (found) ? (found - outstr) : -1; // be sure we have at least one of these. We've found note lines without any actual notes if (found) found = strstr(outstr, " -1) { found = strstr(outstr, "}}"); int end = (found) ? (found - outstr) : -1; end++; int book, chap; string bkch = noteLine.substr(start+2, end-start-2); sscanf(bkch.c_str(), "%d::%d", &book, &chap); int vNumEnd = noteLine.find_first_of(" ", end); int verse = atoi(noteLine.substr(end+1, vNumEnd-end-1).c_str()); if ((book != currentBookNo) || (chap != currentChapter) || ((verse != currentVerse) && /*kindof allow case in header before verse marker */ (verse != currentVerse+1))) { fprintf(stderr, "Not correct note line(%s - %s - %d:%d, %d:%d, %d:%d): %s\n\n", osisID.c_str(), nStr.c_str(), currentBookNo, book, currentChapter, chap, currentVerse, verse, noteLine.c_str()); exit(-1); } } else { fprintf(stderr, "Not a note line: %s\n\n", noteLine.c_str()); exit(-1); } outstr = noteLine.c_str(); string tag = (string)"<"+(string)nx+nStr+(string)">"; found = strstr(outstr, tag.c_str()); start = (found) ? (found - outstr) : -1; string retVal = ""; if (start > -1) { start += tag.length(); const char *nFound = strstr(outstr+start, " -1) { retVal.replace(start, 3, " "); } outstr = retVal.c_str(); found = strstr(outstr, ";}"); start = (found) ? (found - outstr) : -1; if (start > -1) { retVal.replace(start, 2, "};"); } VerseKey key = osisID.c_str(); //std::cerr << osisID.c_str() << ": Convert to OSISRef: " << retVal.c_str(); retVal = VerseKey::convertToOSIS(retVal.c_str(), &key); //std::cerr << ": " << retVal.c_str(); } #endif prepLine(retVal, 0, true); if (nbuffer) delete [] nbuffer; //std::cerr << ": " << retVal.c_str() << "\n"; return retVal; }