[sword-svn] r96 - trunk/flashtools

scribe at www.crosswire.org scribe at www.crosswire.org
Sun Sep 2 13:00:06 MST 2007


Author: scribe
Date: 2007-09-02 13:00:05 -0700 (Sun, 02 Sep 2007)
New Revision: 96

Modified:
   trunk/flashtools/flash.cpp
Log:
reworked 'with' vector to go with phrase, instead of count
added more comments


Modified: trunk/flashtools/flash.cpp
===================================================================
--- trunk/flashtools/flash.cpp	2007-09-02 18:27:12 UTC (rev 95)
+++ trunk/flashtools/flash.cpp	2007-09-02 20:00:05 UTC (rev 96)
@@ -14,15 +14,43 @@
 using namespace sword;
 using namespace std;
 
-class PhraseCount {
+// used to hold a KJV translation phrase for a greek/hebrew word
+// and any greek/hebrew words combined to make this KJV phrase
+// e.g. hO QEOS = QEOS: [+ hO ]: God
+class Phrase {
 public:
-	PhraseCount()
-		: count(0)
+	Phrase()
+		: phrase("")
 	{}
-	int count;
+	SWBuf phrase;
 	vector<SWBuf> with;
+	inline bool operator ==(const Phrase &other) const { return !compare(other); }
+	inline bool operator !=(const Phrase &other) const { return compare(other); }
+	inline bool operator > (const Phrase &other) const { return compare(other) > 0; }
+	inline bool operator < (const Phrase &other) const { return compare(other) < 0; }
+	inline bool operator <=(const Phrase &other) const { return compare(other) <= 0; }
+	inline bool operator >=(const Phrase &other) const { return compare(other) >= 0; }
+
+	int compare(const Phrase &right) const {
+		int c = phrase.compare(right.phrase);
+		if (c) return c;
+		vector<SWBuf>::const_iterator lit = with.begin();
+		vector<SWBuf>::const_iterator rit = right.with.begin();
+		while (lit != with.end() && rit != right.with.end()) {
+			c = lit->compare(*rit);
+			if (c) return c;
+			lit++; rit++;
+		}
+		if (lit !=       with.end()) return  1;
+		if (rit != right.with.end()) return -1;
+		return 0;
+	}
 };
 
+// KJV phrases and their occurance frequency
+typedef map<Phrase, int> KJVPhrases;
+
+// primary result class
 class Word {
 public:
 	Word()
@@ -31,37 +59,50 @@
 		, freq(0)
 		, def("")
 	{}
+
+	// lexical form of this word in utf8 greek/hebrew
 	SWBuf utf8;
+
+	// strongs number for this word (e.g. G3588)
 	SWBuf strong;
+
+	// frequency of occurance in the iterated text
 	int freq;
-	// from stongs lex
+
+	// definition pulled from short strongs def
 	SWBuf def;
-	// computed ourselves
-	map<SWBuf, PhraseCount> kjvFreq;
+
+	// kjv translation phrases and their frequencies
+	KJVPhrases kjvFreq;
 };
 
+
 string itoa(int v) { stringstream str; str << v; return str.str(); }
 
+
 bool compareFreq(const Word &w1, const Word &w2) {
 	return w1.freq > w2.freq;
 }
 
-bool compareKJVFreq(const map<SWBuf, PhraseCount>::const_iterator &i1, const map<SWBuf, PhraseCount>::const_iterator &i2) {
-	return i1->second.count > i2->second.count;
+
+bool compareKJVFreq(const KJVPhrases::const_iterator &i1, const KJVPhrases::const_iterator &i2) {
+	return i1->second > i2->second;
 }
 
-SWBuf prettyKJVFreq(map<SWBuf, PhraseCount> in) {
+
+// sort and pretty up all the KJV phrases for a word into a nice output buffer
+SWBuf prettyKJVFreq(KJVPhrases in) {
 	SWBuf retVal;
-	vector<map<SWBuf, PhraseCount>::const_iterator> sorted;
-	for (map<SWBuf, PhraseCount>::const_iterator it = in.begin(); it != in.end(); it++) {
+	vector<KJVPhrases::const_iterator> sorted;
+	for (KJVPhrases::const_iterator it = in.begin(); it != in.end(); it++) {
 		// combine cap words with lowercase, if exists
-		SWBuf k = it->first;
-		if (k.size() && toupper(k[0]) == k[0] && k != "God" && k != "Lord") {
-			k[0] = tolower(k[0]);
+		Phrase k = it->first;
+		if (k.phrase.size() && toupper(k.phrase[0]) == k.phrase[0] && k.phrase != "God" && k.phrase != "Lord") {
+			k.phrase[0] = tolower(k.phrase[0]);
 			if (k != it->first) {
-				map<SWBuf, PhraseCount>::iterator i = in.find(k);
+				KJVPhrases::iterator i = in.find(k);
 				if (i != in.end()) {
-					i->second.count += it->second.count;
+					i->second += it->second;
 					// don't include us in the list cuz we added our freq to another
 					continue;
 				}
@@ -70,21 +111,24 @@
 		sorted.push_back(it);
 	}
 	sort(sorted.begin(), sorted.end(), compareKJVFreq);
-	for (vector<map<SWBuf, PhraseCount>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
+	for (vector<KJVPhrases::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
 		if (retVal.size()) retVal += "; ";
 		// prepend 'with other strongs' if present
-		if ((*it)->second.with.size()) {
+		if ((*it)->first.with.size()) {
 			retVal += "[+";
-			for (int i = 0; i < (*it)->second.with.size(); i++) {
-				retVal.appendFormatted(" %s", (*it)->second.with[i].c_str());
+			for (int i = 0; i < (*it)->first.with.size(); i++) {
+				retVal.appendFormatted(" %s", (*it)->first.with[i].c_str());
 			}
 			retVal += " ] ";
 		}
-		retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second.count);
+		retVal.appendFormatted("%s (%d)", (*it)->first.phrase.c_str(), (*it)->second);
 	}
 	return retVal;
 }
 
+
+// take utf8 text and spit out equiv. text substituting escaped codes for multibyte chars
+// java .properties files wants this format (flashcard .flash lessons use this format)
 SWBuf escapedUTF8(SWBuf inText) {
 	static UTF8UTF16 convert;
 	convert.processText(inText);
@@ -105,6 +149,7 @@
 }
 
 
+// output a simple CSV ('|' separated really) format for importing into OOo or excel
 void outputCSV(vector<Word> &wordList) {
 	for (vector<Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
 		Word &w = (*it);
@@ -184,12 +229,14 @@
 	} 
 }
 
+
 /**
  * do the work
  *
  * range - the range of verses to process (e.g. "gen-mal")
  * addAll - if we should add all words in our lexicon for the testaments
  *		included in the range even if they don't exist in the text
+ *		(useful for generating complete OT or NT strongs word lists)
  *
  */
 vector<Word> processWords(const char *range, bool addAll = true) {
@@ -238,12 +285,15 @@
 					while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1);
 					if (!text.size()) text = "[Untranslated]";
 				}
-				wordList[strong].kjvFreq[text].count++;
+				Phrase p;
+				p.phrase = text;
 				if (parts > 1) {
+					// lets build our 'with' list excluding ourselves
 					list<SWBuf> withoutMe = lemmas;
 					withoutMe.remove(strong);
-					wordList[strong].kjvFreq[text].with = vector<SWBuf>(withoutMe.begin(), withoutMe.end());
+					p.with = vector<SWBuf>(withoutMe.begin(), withoutMe.end());
 				}
+				wordList[strong].kjvFreq[p]++;
 				wordList[strong].freq++;
 			}
 		}




More information about the sword-cvs mailing list