#include #include #include #include #include #include #include #include #include #include #include using namespace sword; using namespace std; #include "matchers/matcher.h" // select your matcher here #include "matchers/gntmatcher.h" //#include "matchers/defaultmatcher.h" Matcher *matcher = new GNTMatcher(); // hard code your from and to modules here or pass them on the command line with - SWBuf strongsSourceModuleNameOT = ""; SWBuf strongsSourceModuleNameNT = "WHNU"; SWBuf targetModuleName = "NA28FromImp"; SWBuf targetTEIFile = ""; const char *ignoreSeries = "⸆¹⸆²⸆⸇᾿˸¹˸²˸³˸·¹²⟦–ʹ°¹°²⸋¹⸋²⸋⸌¹⸌°*[];⸀¹⸀²⸀³⸁⸀◆⟧ ⸂¹⸂²⸄⸂⸅⸃⸉¹⸈⸉⸊ "; //const char *ignoreSeries = ""; typedef vector BibMap; void insert(SWBuf addText, SWBuf &out, int bibPos, BibMap &bibMap, BibMap &wTags, bool after = false); SWBuf findCanonicalBibleText(SWBuf orig, BibMap &bibMap, BibMap &tTags); SWBuf buildWordMaps(const SWBuf &markupBuf, const BibMap &bibMap, vector &targetWords, vector &targetWordStarts, vector &targetWordEnds); void pullFromModData(SWModule &fromMod, vector&wordTags, vector &fromWords, vector &fromWordTags); void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector &targetWordTags, const vector &wordTags, const vector &targetWordStarts, const vector &targetWordEnds, vector &fromWords, SWConfig *lex = 0); // app options bool optionFilterAccents = false; bool optionFilterAppCrit = false; bool optionDebug = false; bool optionIncludeLex = false; vector optionExceptionFile; SWConfig *exceptionFile = 0; void usage(const char *progName, const char *error = 0) { if (error) fprintf(stderr, "\n%s: %s\n", progName, error); fprintf(stderr, "\n=== migratetags (Revision $Rev$) Migrate word morphology from one module to another.\n"); fprintf(stderr, "\nusage: %s [options]\n", progName); fprintf(stderr, " -ss \t provide the Strong's source module name for both OT and NT\n"); fprintf(stderr, " -ssot \t provide a different Strong's source module name for the OT\n"); fprintf(stderr, " -l \t\t include lexical and source information\n"); fprintf(stderr, " -t \t provide the target module name\n"); fprintf(stderr, " -tei \t provide the target tei filename\n"); fprintf(stderr, " -e \t provide an ini-style .conf file with overriding tag exceptions.\n"); fprintf(stderr, " -fa\t\t\t filter accents: remove Greek accents from final text\n"); fprintf(stderr, " -fc\t\t\t filter critical apparatus markers from final text\n"); fprintf(stderr, " -v\t\t\t verbose: print lots of information while processing\n"); fprintf(stderr, " --help\t\t this usage message\n"); fprintf(stderr, "\n\n"); exit(-1); } SWModule *targetMod = 0; bool getNextVerse(VerseKey *targetModKey, SWBuf *targetModText) { static int z = 0; static bool finished = false; if (++z == 1) { ((VerseKey *)targetMod->getKey())->setIntros(true); targetMod->getKey()->setText("mat0.0"); } // assert our source is in good condition to give us more data if (finished) return false; // grab our raw, fully marked up TargetMod text for this verse (*targetModText) = targetMod->getRawEntryBuf(); (*targetModKey) = (*(targetMod->getKey())); // clear any error from retrieving text targetMod->popError(); (*targetMod)++; finished = targetMod->popError(); return true; } FileDesc *targetInput = 0; bool getNextVerseTEI(VerseKey *targetModKeyNT, VerseKey *targetModKeyOT, VerseKey *&targetModKey, SWBuf *targetModText) { targetModKey = targetModKeyNT; static bool finished = false; static bool fileEnd = false; static SWBuf line = ""; XMLTag lastAB(""); (*targetModText) = ""; while (!fileEnd || line.size()) { if (!line.size()) { fileEnd = !FileMgr::getLine(targetInput, line, false); if (!fileEnd) line.append("\n"); } int offset = line.indexOf(""); if (offset < 0) offset = endOffset; else if (endOffset > -1 && endOffset < offset) offset = endOffset; // was found and was before else { // this is for when we have interverse data we've output before we hit an if (targetModText->length() || offset > 0) { targetModText->append(line, offset); line << offset; break; } } if (offset > -1) { targetModText->append(line, offset); line << offset; int end = line.indexOf(">"); if (end > -1) { SWBuf abText = ""; abText.append(line, end+1); XMLTag ab(abText); targetModText->append(abText); line << (end+1); if (ab.isEndTag()) { break; } lastAB = ab; } } else { targetModText->append(line); line = ""; } } // assert our source is in good condition to give us more data if (fileEnd && !line.size() && !targetModText->size()) return false; // grab our raw, fully marked up TargetMod text for this verse if (lastAB.isEndTag()) { // we are just returning interverse material so targetModKey is out of bounds // just set to any error targetModKey->setError(-99); } else { // SWBuf id = lastAB.getAttribute("xml:id"); SWBuf bkv = ""; SWBuf bookName = ""; SWBuf bookNum = ""; SWBuf chapter = ""; SWBuf verse = ""; SWBuf segment = id.stripPrefix('-'); if (!segment.size()) bkv = id; if (!bkv.size() && !segment.startsWith("B")) { segment = id.stripPrefix('-'); } else if (!bkv.size()) bkv = segment; if (!bkv.size() && !segment.startsWith("B")) { segment = id.stripPrefix('-'); } else if (!bkv.size()) bkv = segment; // if we have more segments, find the last segment // because this is likely the bookName if (bkv.size() && id.size() && id != bkv) { id.stripPrefix('-'); id.stripPrefix('-'); id.stripPrefix('-'); id.stripPrefix('-'); bookName = id; } if (bkv.size()) { bkv << 1; bookNum = bkv.stripPrefix('K'); chapter = bkv.stripPrefix('V'); verse = bkv; SWBuf osisID = (bookName.size() ? bookName : bookNum); osisID.appendFormatted(".%s.%s", chapter.c_str(), verse.c_str()); (*targetModKey) = osisID; if (targetModKey->getError() || targetModKey->getTestament() == 1) { targetModKey = targetModKeyOT; (*targetModKey) = osisID; } } } return true; } int main(int argc, char **argv) { const char *progName = argv[0]; for (int i = 1; i < argc; ++i) { if (!strcmp(argv[i], "-v")) { optionDebug = true; } else if (!strcmp(argv[i], "-fa")) { optionFilterAccents = true; } else if (!strcmp(argv[i], "-l")) { optionIncludeLex = true; } else if (!strcmp(argv[i], "-fc")) { optionFilterAppCrit = true; } else if (!strcmp(argv[i], "-ss")) { if ((i + 1) < argc) { strongsSourceModuleNameNT = argv[++i]; if (!strongsSourceModuleNameOT.length()) strongsSourceModuleNameOT = argv[i]; } else usage(progName, "-ss argument requires a module name."); } else if (!strcmp(argv[i], "-ssot")) { if ((i + 1) < argc) { strongsSourceModuleNameOT = argv[++i]; } else usage(progName, "-ssot argument requires a module name."); } else if (!strcmp(argv[i], "-t")) { if ((i + 1) < argc) { targetModuleName = argv[++i]; } else usage(progName, "-t argument requires a module name."); } else if (!strcmp(argv[i], "-tei")) { if ((i + 1) < argc) { targetTEIFile = argv[++i]; } else usage(progName, "-tei argument requires a tei filename."); } else if (!strcmp(argv[i], "-e")) { if (i+1 < argc) { optionExceptionFile.push_back(argv[++i]); } else usage(progName, "-e argument requires a file name."); } else usage(progName, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } SWMgr lib; lib.setGlobalOption("Textual Variants", "Secondary Reading"); SWModule *m = 0; if (targetTEIFile.size()) { targetInput = FileMgr::getSystemFileMgr()->open(targetTEIFile, FileMgr::RDONLY); if (!targetInput || targetInput->getFd() < 1) { cerr << "\nERROR: couldn't open tei file: " << targetTEIFile << ".\n"; usage(progName, "Use -tei to supply tei filename"); exit(1); } } else { m = lib.getModule(targetModuleName); if (!m) { cerr << "\nERROR: couldn't find target module: " << targetModuleName << ".\n"; if (argc < 2) usage(progName, "Use -t to supply target module name"); exit(1); } targetMod = m; } m = lib.getModule(strongsSourceModuleNameNT.c_str()); if (!m) { cerr << "\nERROR: couldn't find Strong's source module: " << strongsSourceModuleNameNT.c_str() << ".\n"; if (argc < 2) usage(progName, "Use -ss to supply Strong's source module name"); exit(1); } SWModule &fromModNT = *m; m = lib.getModule(strongsSourceModuleNameOT.c_str()); if (!m) { cerr << "\nERROR: couldn't find Strong's source module: " << strongsSourceModuleNameOT.c_str() << ".\n"; if (argc < 2) usage(progName, "Use -ssot to supply OT Strong's source module name"); exit(1); } SWModule &fromModOT = *m; for (int i = 0; i < optionExceptionFile.size(); ++i) { SWBuf fileName = optionExceptionFile[i]; if (!i) exceptionFile = new SWConfig(fileName); else (*exceptionFile) += SWConfig(fileName); } VerseKey *targetModKeyNT = (VerseKey *)(targetInput ? fromModNT.createKey() : targetMod->createKey()); VerseKey *targetModKeyOT = (VerseKey *)(targetInput ? fromModOT.createKey() : targetMod->createKey()); targetModKeyOT->setIntros(true); targetModKeyNT->setIntros(true); SWBuf targetModText; SWConfig *lex = 0; if (optionIncludeLex) { lex = new SWConfig("../flashtools/greek.conf"); } VerseKey *targetModKey = targetModKeyNT; while ((targetInput ? getNextVerseTEI(targetModKeyNT, targetModKeyOT, targetModKey, &targetModText) : getNextVerse(targetModKey, &targetModText))) { SWModule &fromMod = (targetModKey == targetModKeyNT ? fromModNT : fromModOT); if (targetModKey->getError()) { cout << targetModText; continue; } // we'll do the whole Bible eventually, but let's just get one verse // working well. // XML word tags which should be placed in this verse (start tag) // eg., // pulled from FromMod vector wordTags; // Just the raw canonical Bible text of this verse with no tags // eg., "In the beginning God created the heavens and the earth." SWBuf justTargetModBibleText = ""; // a mapping for each character in justTargetModBibleText to the real location // in our out buffer. This allows us to insert our and // tags in the correct place amongst the fully marked up // TargetMod out buffer. This work is all done in the insert() method // above BibMap bibMap; BibMap wTags; // justTargetModBibleText (above) broken down into separate words // ie. all words in the TargetMod from this verse // eg. [0] = "In"; [1] = "the"; [2] = "beginning"; ... vector targetWords; // where each corresponding targetWords[x] starts in justTargetModBibleText // eg. for "In the beginning..." // [0] = 0; [1] = 3; [2] = 7; ... // Needed to pass to insert method so we know where // to insert the start tag vector targetWordStarts; // same as targetWordStarts, but the end of each word // eg. [0] = 1; [1] = 5; [2] = 15 // Needed to pass to insert method so we know where // to insert the end tag vector targetWordEnds; // This is the doozy. This maps each TargetMod word to the correct // wordTags entry. vector targetWordTags; // Equivalent to targetWords above, but for the FromMod. // Useful for helping determine matches to TargetMod words vector fromWords; // Equivalent to targetWordTag which we need to produce, // but this one is produced for us from the FromMod data // If we can match a fromWords[x] entry, then we can assign // targetWorkTags[ourMatch] = fromWordTags[x] vector fromWordTags; bibMap.clear(); wTags.clear(); fromMod.setKey(targetModKey); if (!targetTEIFile.size()) { cout << "$$$ " << targetModKey->getText() << endl; } if (optionDebug) { cerr << "\nProcessing Verse: " << targetModKey->getText() << endl; cerr << "---------------------" << endl; cerr << "\nOur strongsSourceModule Markup" << endl; cerr << "---------------------" << endl; cerr << fromMod.getRawEntry() << endl; cerr << "---------------------" << endl; } // grab our raw, fully marked up TargetMod text for this verse SWBuf orig = targetModText; if (optionDebug) { cerr << "\nOur Original targetModule Markup" << endl; cerr << "---------------------" << endl; cerr << orig << endl; cerr << "---------------------" << endl; } if (optionFilterAppCrit) { SWBuf o = orig; const unsigned char* from = (unsigned char*)o.c_str(); orig = ""; while (*from) { SW_u32 ch = getUniCharFromUTF8(&from, true); // if ch is bad, then convert to replacement char if (!ch) ch = 0xFFFD; SWBuf checkChar; getUTF8FromUniChar(ch, &checkChar); if (checkChar != " " && strstr(ignoreSeries, checkChar.c_str())) continue; orig.append(checkChar); } if (optionDebug) { cerr << "\nOur Original targetModule Markup after FilterAppCrit" << endl; cerr << "---------------------" << endl; cerr << orig << endl; cerr << "---------------------" << endl; } } // let's find where just the canonical text is amongst // all our markup // newTargetModMarkup will eventually hold our updated markup with // the new tags, but we'll start here by setting it to // the processed original markup. // on return, bibMap will be populated with each character // and the corresponding location into newTargetModMarkup where // the character resides. SWBuf newTargetModMarkup = findCanonicalBibleText(orig, bibMap, wTags); if (optionDebug) { cerr << "\nOur Original targetModule Markup After XMLTag-ifying" << endl; cerr << "---------------------" << endl; cerr << newTargetModMarkup << endl; cerr << "---------------------" << endl; cerr << "\nOur bibMap" << endl; cerr << "---------------------" << endl; for (BibMap::iterator it = bibMap.begin(); it != bibMap.end(); ++it) { cerr << *it << " "; } cerr << "\n---------------------" << endl; } // let's populate our TargetMod word data and fill in our // justTargetModBibleText buffer justTargetModBibleText = buildWordMaps(newTargetModMarkup, bibMap, targetWords, targetWordStarts, targetWordEnds); if (optionDebug) { cerr << "\nJust targetModule Bible Text" << endl; cerr << "---------------------" << endl; cerr << justTargetModBibleText << endl; cerr << "---------------------" << endl; } // ok, now lets grab out the groovy data from the FromMod module pullFromModData(fromMod, wordTags, fromWords, fromWordTags); // // ok, here's the real work. // // This method needs to guess which TargetMod words match which FromMod // words and then point them to their same original language // word tag by populating targetWordTags // matcher->matchWords(targetWordTags, targetWords, fromWords, fromWordTags); // ok, now that we have our targetWordTags magically populated // let's do the grunt work of inserting the and tags insertWordTags((VerseKey *)targetModKey, newTargetModMarkup, bibMap, wTags, targetWordTags, wordTags, targetWordStarts, targetWordEnds, fromWords, lex); if (optionDebug) { cerr << "\nHere's how we mapped things..." << endl; cerr << "---------------------" << endl; cerr << "Total wordTags: " << wordTags.size() << endl; cerr << "\nTargetMod Words: " << endl; } bool warned = false; for (int i = 0; i < targetWords.size(); ++i) { if (targetWordTags[i] == -1 && !strstr(ignoreSeries, targetWords[i])) { if (!warned) { cerr << "*** Error: didn't match all words: " << targetModKey->getText() << endl; cerr << (targetModKey->getTestament() == 2 ? strongsSourceModuleNameNT.c_str() : strongsSourceModuleNameOT.c_str()) << ":"; for (int j = 0; j < fromWords.size(); ++j) { cerr << " " << fromWords[j]; } cerr << endl; cerr << (targetTEIFile.size() ? targetTEIFile : targetModuleName) << ":"; for (int j = 0; j < targetWords.size(); ++j) { cerr << " " << targetWords[j]; } cerr << endl; cerr << endl; cerr << "Unmatched Words:" << endl; warned = true; } cerr << " " << i << ": " << targetWords[i] << " (" << matcher->sanitizeWord(targetWords[i]) << ")" << endl; } if (optionDebug) { cerr << targetWords[i] << " : " << targetWordTags[i] << " => " << (targetWordTags[i] > -1 ? wordTags[targetWordTags[i]] : "") << endl; } } if (warned) { cerr << "\n" << (targetTEIFile.size() ? targetTEIFile : targetModuleName) << " Tags:\n"; VerseKey *vk = (VerseKey *)targetModKey; for (int j = 0; j < targetWords.size(); ++j) { if (!strstr(ignoreSeries, targetWords[j])) { cerr << targetWords[j] << "\t\t " << vk->getOSISRef() << "." << j << "=" << (targetWordTags[j] > -1 ? (const char *)wordTags[targetWordTags[j]] : (targetWordTags[j] == -2 ? "{Using Exception}" : "")) << endl; } } cerr << "---------------------" << endl; } if (optionFilterAccents) { UTF8GreekAccents filter; filter.setOptionValue("off"); filter.processText(newTargetModMarkup); } if (optionDebug) { cerr << "---------------------" << endl; cerr << "\nAND... Here's our final output" << endl; cerr << "---------------------" << endl; } cout << newTargetModMarkup; if (optionDebug) { cerr << newTargetModMarkup << endl; } if (!targetTEIFile.size()) { cout << endl; } if (optionDebug) { cerr << endl; } } delete exceptionFile; delete lex; return 0; } // builds up bibMap to contain only characters of Biblical text // and each character's corresponding real location in our output // buffer (returned value) SWBuf findCanonicalBibleText(SWBuf orig, BibMap &bibMap, BibMap &wTags) { bool XML_TAGGIFY = false; SWBuf out = ""; SWBuf tag = ""; int tagLevel = 0; int wTag = -1; int inTag = 0; bool wTagsPresent = orig.indexOf(" -1; SWBuf lastWElementText = ""; bool lastLBBreak = false; for (int i = 0; i < orig.length(); ++i) { if (orig[i] == '<') { inTag = true; } else if (inTag && orig[i] == '>') { inTag = false; XMLTag t = tag.c_str(); bool skipTag = false; if (!t.isEmpty()) { if (t.isEndTag()) { // clear out empty w tags if (t.getName() && !strcmp("w", t.getName())) { /* if (!lastWElementText.size()) { out.setSize(wTag); if (out.endsWith(' ')) { out.setSize(out.size() - 1); bibMap.pop_back(); wTags.pop_back(); } skipTag = true; } */ } tagLevel--; if (t.getName() && !strcmp("w", t.getName())) wTag = -1; } else { tagLevel++; if (t.getName() && !strcmp("w", t.getName())) { wTag = out.size(); lastWElementText = ""; } } } else { if (SWBuf(t.getName()) == "lb") { lastLBBreak = !(SWBuf(t.getAttribute("break")) == "no"); } } if (!skipTag) out += (XML_TAGGIFY ? t : SWBuf("<") + tag + ">"); tag = ""; } else if (inTag) { tag += orig[i]; } else { if ( // for texts without tags (!wTagsPresent && (!tagLevel || wTag != -1)) // for texts with tags || ( wTagsPresent && (wTag != -1 || orig[i] == ' ')) ) { bibMap.push_back(out.size()); wTags.push_back(wTag); } out += orig[i]; lastWElementText += orig[i]; } } return out; } // Inserts addText into out buffer and adjusts Bible character pointers accordingly // void insert(SWBuf addText, SWBuf &out, int bibPos, BibMap &bibMap, BibMap &wTags, bool after) { int to = 0; if (!after && wTags[bibPos] != -1) { to = wTags[bibPos] + 2; addText--; // discard the '>' addText << 2; // discard the ' &targetWords, vector &targetWordStarts, vector &targetWordEnds) { SWBuf bibWord = ""; SWBuf fromWord = ""; SWBuf bibText = ""; for (BibMap::const_iterator it = bibMap.begin(); it != bibMap.end(); it++) { /* char *b1 = markupBuf.getRawData()+*it; char *b2 = b1; __u32 uc = getUniCharFromUTF8(&b2); bool wordBreak = false; if (uc) { SWBuf u8c; u8c.append(b1, b2-b1); if (strstr(ignoreSeries, u8c.getRawData())) } */ char c = markupBuf[*it]; if (c != ' ' && c != '.' && c != ';' && c != ',') { if (!bibWord.length()) targetWordStarts.push_back(bibText.length()); bibWord += c; } else { if (bibWord.length()) { targetWordEnds.push_back(bibText.length()-1); targetWords.push_back(bibWord); bibWord = ""; } } bibText += c; } if (bibWord.length()) { targetWordEnds.push_back(bibText.length()-1); targetWords.push_back(bibWord); } return bibText; } void pullFromModData(SWModule &fromMod, vector&wordTags, vector &fromWords, vector &fromWordTags) { fromMod.renderText(); // be sure FromMod has processed entry attributes AttributeList &words = fromMod.getEntryAttributes()["Word"]; SWBuf fromWord = ""; SWBuf bibWord = ""; for (AttributeList::iterator it = words.begin(); it != words.end(); it++) { // this is our new XMLTag. // attributes will be added below XMLTag w("w"); // this only gives us word count, not if we have multiple entries per word // don't use as loop int parts = atoi(it->second["PartCount"]); SWBuf lemma = ""; SWBuf morph = ""; bool found = true; for (int i = 1; found; ++i) { found = false; SWBuf key = ""; key = SWBuf().setFormatted("Lemma.%d", i); AttributeValue::iterator li = it->second.find(key); if (i == 1 && li == it->second.end()) li = it->second.find("Lemma"); if (li != it->second.end()) { found = true; if (i > 1) lemma += " "; key = SWBuf().setFormatted("LemmaClass.%d", i); AttributeValue::iterator lci = it->second.find(key); if (i == 1 && lci == it->second.end()) lci = it->second.find("LemmaClass"); if (lci != it->second.end()) { lemma += lci->second + ":"; } lemma += li->second; } key = SWBuf().setFormatted("Morph.%d", i); li = it->second.find(key); if (i == 1 && li == it->second.end()) li = it->second.find("Morph"); if (li != it->second.end()) { found = true; if (i > 1) morph += " "; key = SWBuf().setFormatted("MorphClass.%d", i); AttributeValue::iterator lci = it->second.find(key); if (i == 1 && lci == it->second.end()) lci = it->second.find("MorphClass"); if (lci != it->second.end()) { morph += lci->second + ":"; } morph += li->second; } // TODO: add src tags and maybe other attributes } if (lemma.length()) w.setAttribute("lemma", lemma); if (morph.length()) w.setAttribute("morph", morph); fromWord = it->second["Text"]; bibWord = ""; for (int j = 0; j < fromWord.length(); ++j) { char c = fromWord[j]; if (c != ' ' && c != '.' && c != ';' && c != ',') { bibWord += c; } else { if (bibWord.length()) { fromWords.push_back(bibWord); fromWordTags.push_back(wordTags.size()); bibWord = ""; } } } if (bibWord.length()) { fromWords.push_back(bibWord); fromWordTags.push_back(wordTags.size()); } wordTags.push_back(w); } } void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector &targetWordTags, const vector &wordTags, const vector &targetWordStarts, const vector &targetWordEnds, vector &fromWords, SWConfig *lex) { // TODO: this method needs some work, // like putting multiple consecutive words // together in one tag ConfigEntMap exceptions; if (exceptionFile) { exceptions = exceptionFile->getSection("exceptions"); } for (int i = 0; i < targetWordTags.size(); ++i) { SWBuf wordTag = ""; SWBuf norm = ""; if (targetWordTags[i] > -1) { wordTag = wordTags[targetWordTags[i]]; if (lex) { norm = fromWords[targetWordTags[i]]; } } if (exceptionFile) { SWBuf key; key.setFormatted("%s.%d", vk->getOSISRef(), i); ConfigEntMap::const_iterator it = exceptions.find(key); if (it != exceptions.end()) { targetWordTags[i] = -2; // note that we are using an exception, not a mapping, not unset (-1) wordTag = it->second; } } if (wordTag.length()) { // if we have been asked to include extra lexical data if (lex) { XMLTag w(wordTag); int attCount = w.getAttributePartCount("lemma", ' '); for (int i = 0; i < attCount; ++i) { SWBuf a = w.getAttribute("lemma", i, ' '); SWBuf c = a.stripPrefix(':'); if (c == "strong") { if (a.startsWith("G") || a.startsWith("H")) a << 1; SWBuf dict = (*lex)[a]["UTF8"]; SWBuf gloss = (*lex)[a]["Meaning"]; //w.setAttribute("corresp", dict); if (norm.length()) w.setAttribute("norm", norm); //w.setAttribute("gloss", gloss); wordTag = w.toString(); } } } insert((const char *)wordTag, markupBuf, targetWordStarts[i], bibMap, wTags); insert("", markupBuf, targetWordEnds[i], bibMap, wTags, true); } } }