[sword-svn] r148 - trunk/textsstats

scribe at www.crosswire.org scribe at www.crosswire.org
Fri Dec 12 00:58:29 MST 2008


Author: scribe
Date: 2008-12-12 00:58:28 -0700 (Fri, 12 Dec 2008)
New Revision: 148

Modified:
   trunk/textsstats/stats.cpp
Log:


Modified: trunk/textsstats/stats.cpp
===================================================================
--- trunk/textsstats/stats.cpp	2008-11-13 05:30:37 UTC (rev 147)
+++ trunk/textsstats/stats.cpp	2008-12-12 07:58:28 UTC (rev 148)
@@ -112,6 +112,12 @@
 bool compareFreq(const Word &w1, const Word &w2) {
 	return w1.freq > w2.freq;
 }
+bool compareSeqLenFreq(const Word &w1, const Word &w2) {
+	if (w1.utf16.size() != w2.utf16.size()) {
+		return (w1.utf16.size() > w2.utf16.size());
+	}
+	return w1.freq > w2.freq;
+}
 
 
 bool compareKJVFreq(const KJVPhrases::const_iterator &i1, const KJVPhrases::const_iterator &i2) {
@@ -196,12 +202,31 @@
 	for (vector<Word>::const_iterator it = seqList.begin(); it != seqList.end(); it++) {
 		const Word &w = (*it);
 //		cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
-		cout << w.freq << "|" << toUTF8(w.utf16).c_str() << "|" << w.utf16.size() << "\n";
+		cout << w.freq << "," << toUTF8(w.utf16).c_str() << "," << w.utf16.size() << "\n";
 	}
 	std::cout << std::endl;
 }
 
+void outputHTML(const vector<Word> &seqList) {
+	for (vector<Word>::const_iterator it = seqList.begin(); it != seqList.end(); it++) {
+		const Word &w = (*it);
+//		cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
+		cout << "<tr><td>" << w.freq << "</td><td>" << toUTF8(w.utf16).c_str() << "</td></tr>\n";
+	}
+	std::cout << std::endl;
+}
 
+void outputXML(const vector<Word> &seqList) {
+	for (vector<Word>::const_iterator it = seqList.begin(); it != seqList.end(); it++) {
+		const Word &w = (*it);
+//		cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
+		cout << "<Row><Cell><Data ss:Type=\"Number\">" << w.freq << "</Data></Cell>";
+		cout << "<Cell><Data ss:Type=\"String\">" << toUTF8(w.utf16).c_str() << "</Data></Cell>";
+		cout << "<Cell><Data ss:Type=\"Number\">" << w.utf16.size() << "</Data></Cell></Row>\n";
+	}
+	std::cout << std::endl;
+}
+
 /**
  * output our flashcard .flash file format
  *
@@ -339,10 +364,14 @@
 	int minLength = 1;
 	int maxLength = 3;
 	char *range = "mat-rev";
+	int order = 1;
+	int format = 1;
 
 	if (argc > 1) minLength = atoi(argv[1]);
 	if (argc > 2) maxLength = atoi(argv[2]);
 	if (argc > 3) range = argv[3];
+	if (argc > 4) order = atoi(argv[4]);
+	if (argc > 5) format = atoi(argv[5]);
 
 	vector<Word> results;
 	for (int i = minLength; i <= maxLength; i++) {
@@ -350,8 +379,21 @@
 		results.insert(results.end(), pass.begin(), pass.end());
 	}
 	
-	sort(results.begin(), results.end(), compareFreq);
-	outputCSV(results);
+	if (order == 1) {
+		sort(results.begin(), results.end(), compareFreq);
+	}
+	else {
+		sort(results.begin(), results.end(), compareSeqLenFreq);
+	}
+	if (format == 1) {
+		outputCSV(results);
+	}
+	else if (format == 2) {
+		outputHTML(results);
+	}
+	else {
+		outputXML(results);
+	}
 
 	return 0;
 }




More information about the sword-cvs mailing list