[sword-svn] r1981 - in trunk: include src/modules/filters

Wed Oct 4 02:28:31 MST 2006

Author: scribe
Date: 2006-10-04 02:28:28 -0700 (Wed, 04 Oct 2006)
New Revision: 1981

Modified:
   trunk/include/swbasicfilter.h
   trunk/src/modules/filters/osishtmlhref.cpp
   trunk/src/modules/filters/osisosis.cpp
   trunk/src/modules/filters/swbasicfilter.cpp
   trunk/src/modules/filters/thmlgbf.cpp
   trunk/src/modules/filters/thmlhtml.cpp
   trunk/src/modules/filters/thmlhtmlhref.cpp
   trunk/src/modules/filters/thmlosis.cpp
   trunk/src/modules/filters/thmlplain.cpp
   trunk/src/modules/filters/thmlrtf.cpp
Log:
Applied patch from DM Smith which adds better handling for escape sequences.


Modified: trunk/include/swbasicfilter.h
===================================================================

--- trunk/include/swbasicfilter.h	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/include/swbasicfilter.h	2006-10-04 09:28:28 UTC (rev 1981)
@@ -70,6 +70,7 @@
 	bool tokenCaseSensitive;
 	bool passThruUnknownToken;
 	bool passThruUnknownEsc;
+	bool passThruNumericEsc;
 	char processStages;
 
 
@@ -116,14 +117,28 @@
 	 */
 	void setPassThruUnknownEscapeString(bool val);
 
+	/** Sets whether to pass thru a numeric escape sequence unchanged
+	 *	or allow it to be handled otherwise.
+	 * Default is false.*/
+	void setPassThruNumericEscapeString(bool val);
+
 	/** Are escapeStrings case sensitive or not? Call this
 	 *	function before addEscapeStingSubstitute()
 	 */
 	void setEscapeStringCaseSensitive(bool val);
 
+	/** Registers an esc control sequence that can pass unchanged
+	 */
+	void addAllowedEscapeString(const char *findString);
+
+	/** Unregisters an esc control sequence that can pass unchanged
+	 */
+	void removeAllowedEscapeString(const char *findString);
+
 	/** Registers an esc control sequence
 	 */
 	void addEscapeStringSubstitute(const char *findString, const char *replaceString);
+
 	/** Unregisters an esc control sequence
 	 */
 	void removeEscapeStringSubstitute(const char *findString);
@@ -131,6 +146,12 @@
 	/** This function performs the substitution of escapeStrings */
 	bool substituteEscapeString(SWBuf &buf, const char *escString);
 
+	/** This passes allowed escapeStrings */
+	bool passAllowedEscapeString(SWBuf &buf, const char *escString);
+
+	/** This appends escString to buf as an entity */
+	void appendEscapeString(SWBuf &buf, const char *escString);
+
 	/** Are tokens case sensitive (like in GBF) or not? Call this
 	 *	function before addTokenSubstitute()
 	 */
@@ -168,6 +189,14 @@
 	 */
 	virtual bool handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData);
 
+	/** This function is called for all numeric escape sequences. If passThrough
+	 * @param buf the output buffer 
+	 * @param escString the escape sequence (e.g. <code>"#235"</code> for &amp;235;)
+	 * @return subclasses should return true if they handled the esc seq, or false if they did not.
+         */
+	virtual bool handleNumericEscapeString(SWBuf &buf, const char *escString);
+
+
 };
 
 SWORD_NAMESPACE_END

Modified: trunk/src/modules/filters/osishtmlhref.cpp
===================================================================
--- trunk/src/modules/filters/osishtmlhref.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/osishtmlhref.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -63,15 +63,13 @@
 	setEscapeEnd(";");
 
 	setEscapeStringCaseSensitive(true);
+	setPassThruNumericEscapeString(true);
 
-//   commenting these out.  If someone is sure we shouldn't
-//   convert these since we are outputing to a markup that
-//   recognizes them, then please delete these lines
-//   addEscapeStringSubstitute("amp",  "&");
-//   addEscapeStringSubstitute("apos", "'");
-//   addEscapeStringSubstitute("lt",   "<");
-//   addEscapeStringSubstitute("gt",   ">");
-//   addEscapeStringSubstitute("quot", "\"");
+	addAllowedEscapeString("quot");
+	addAllowedEscapeString("apos");
+	addAllowedEscapeString("amp");
+	addAllowedEscapeString("lt");
+	addAllowedEscapeString("gt");
 
 	setTokenCaseSensitive(true);
 	

Modified: trunk/src/modules/filters/osisosis.cpp
===================================================================
--- trunk/src/modules/filters/osisosis.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/osisosis.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -35,6 +35,14 @@
 	setEscapeEnd(";");
 
 	setEscapeStringCaseSensitive(true);
+	setPassThruNumericEscapeString(true);
+
+	addAllowedEscapeString("quot");
+	addAllowedEscapeString("apos");
+	addAllowedEscapeString("amp");
+	addAllowedEscapeString("lt");
+	addAllowedEscapeString("gt");
+
 	setTokenCaseSensitive(true);
 }
 

Modified: trunk/src/modules/filters/swbasicfilter.cpp
===================================================================
--- trunk/src/modules/filters/swbasicfilter.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/swbasicfilter.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -29,16 +29,19 @@
 #include <utilstr.h>
 #include <stringmgr.h>
 #include <map>
+#include <set>
 
 SWORD_NAMESPACE_START
 
 typedef std::map<SWBuf, SWBuf> DualStringMap;
+typedef std::set<SWBuf> StringSet;
 
 // I hate bridge patterns but this isolates std::map from a ton of filters
 class SWBasicFilter::Private {
 public:
 	DualStringMap tokenSubMap;
 	DualStringMap escSubMap;
+	StringSet escPassSet;
 };
 
 const char SWBasicFilter::INITIALIZE = 1;
@@ -65,6 +68,7 @@
 	tokenCaseSensitive     = false;
 	passThruUnknownToken   = false;
 	passThruUnknownEsc     = false;
+	passThruNumericEsc     = false;
 }
 
 
@@ -94,7 +98,11 @@
 	passThruUnknownEsc = val;
 }
 
+void SWBasicFilter::setPassThruNumericEscapeString(bool val) {
+	passThruUnknownEsc = val;
+}
 
+
 void SWBasicFilter::setTokenCaseSensitive(bool val) {
 	tokenCaseSensitive = val;
 }
@@ -124,6 +132,24 @@
 	}
 }
 
+void SWBasicFilter::addAllowedEscapeString(const char *findString) {
+	char *buf = 0;
+
+	if (!escStringCaseSensitive) {
+		stdstr(&buf, findString);
+		toupperstr(buf);
+		p->escPassSet.insert(StringSet::value_type(buf));
+		delete [] buf;
+	}
+	else p->escPassSet.insert(StringSet::value_type(findString));
+}
+
+void SWBasicFilter::removeAllowedEscapeString(const char *findString) {
+	if (p->escPassSet.find(findString) != p->escPassSet.end()) {
+		p->escPassSet.erase( p->escPassSet.find(findString) );
+	}
+}
+
 void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) {
 	char *buf = 0;
 
@@ -161,9 +187,51 @@
 	return false;
 }
 
+void SWBasicFilter::appendEscapeString(SWBuf &buf, const char *escString) {
+	buf += escStart;
+	buf += escString;
+	buf += escEnd;
+}
+
+bool SWBasicFilter::passAllowedEscapeString(SWBuf &buf, const char *escString) {
+	StringSet::iterator it;
+
+	if (!escStringCaseSensitive) {
+	        char *tmp = 0;
+		stdstr(&tmp, escString);
+		toupperstr(tmp);
+		it = p->escPassSet.find(tmp);
+		delete [] tmp;
+	} else 
+		it = p->escPassSet.find(escString);
+
+	if (it != p->escPassSet.end()) {
+		appendEscapeString(buf, escString);
+		return true;
+	}
+
+	return false;
+}
+
+bool SWBasicFilter::handleNumericEscapeString(SWBuf &buf, const char *escString) {
+	if (passThruNumericEsc) {
+		appendEscapeString(buf, escString);
+		return true;
+	}
+	return false;
+}
+
 bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) {
 	DualStringMap::iterator it;
 
+	if (*escString == '#') {
+		return handleNumericEscapeString(buf, escString);
+	}
+
+	if (passAllowedEscapeString(buf, escString)) {
+		return true;
+	}
+
 	if (!escStringCaseSensitive) {
 	        char *tmp = 0;
 		stdstr(&tmp, escString);
@@ -278,9 +346,7 @@
 					
 					if (!userData->suspendTextPassThru)  { //if text through is disabled no tokens should pass, too
 						if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) {
-							text += escStart;
-							text += token;
-							text += escEnd;
+							appendEscapeString(text, token);
 						}
 					}
 					escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;

Modified: trunk/src/modules/filters/thmlgbf.cpp
===================================================================
--- trunk/src/modules/filters/thmlgbf.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlgbf.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -64,7 +64,7 @@
 			else if (!strncmp("amp", token, 3)) text += '&';
 			else if (!strncmp("lt", token, 2)) text += '<';
 			else if (!strncmp("gt", token, 2)) text += '>';
-			else if (!strncmp("brvbar", token, 6)) text += '|';
+			else if (!strncmp("brvbar", token, 6)) text += '¦';
 			else if (!strncmp("sect", token, 4)) text += '§';
 			else if (!strncmp("copy", token, 4)) text += '©';
 			else if (!strncmp("laquo", token, 5)) text += '«';
@@ -155,6 +155,13 @@
 			else if (!strncmp("uml", token, 3)) text += '¨';
 			else if (!strncmp("shy", token, 3)) text += '';
 			else if (!strncmp("macr", token, 4)) text += '¯';
+			else if (!strncmp("micro", token, 5)) text += "µ";
+			else if (!strncmp("middot", token, 6)) text +="·";
+			else if (!strncmp("cedil", token, 5)) text += "¸";
+			else if (!strncmp("ordm", token, 4)) text +=  "º";
+			else if (!strncmp("times", token, 5)) text += "×";
+			else if (!strncmp("divide", token, 6)) text +="÷";
+			else if (!strncmp("oslash", token, 6)) text +="ø";
 			continue;
 		
 		}

Modified: trunk/src/modules/filters/thmlhtml.cpp
===================================================================
--- trunk/src/modules/filters/thmlhtml.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlhtml.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -24,109 +24,119 @@
 ThMLHTML::ThMLHTML() {
 	setTokenStart("<");
 	setTokenEnd(">");
-/*
+
 	setEscapeStart("&");
 	setEscapeEnd(";");
 
 	setEscapeStringCaseSensitive(true);
+	setPassThruNumericEscapeString(true);
 
-	addEscapeStringSubstitute("nbsp", " ");
-	addEscapeStringSubstitute("quot", "\"");
-	addEscapeStringSubstitute("amp", "&");
-	addEscapeStringSubstitute("lt", "<");
-	addEscapeStringSubstitute("gt", ">");
-	addEscapeStringSubstitute("brvbar", "|");
-	addEscapeStringSubstitute("sect", "§");
-	addEscapeStringSubstitute("copy", "©");
-	addEscapeStringSubstitute("laquo", "«");
-	addEscapeStringSubstitute("reg", "®");
-	addEscapeStringSubstitute("acute", "´");
-	addEscapeStringSubstitute("para", "¶");
-	addEscapeStringSubstitute("raquo", "»");
+	addAllowedEscapeString("quot");
+	addAllowedEscapeString("amp");
+	addAllowedEscapeString("lt");
+	addAllowedEscapeString("gt");
 
-	addEscapeStringSubstitute("Aacute", "Á");
-	addEscapeStringSubstitute("Agrave", "À");
-	addEscapeStringSubstitute("Acirc", "Â");
-	addEscapeStringSubstitute("Auml", "Ä");
-	addEscapeStringSubstitute("Atilde", "Ã");
-	addEscapeStringSubstitute("Aring", "Å");
-	addEscapeStringSubstitute("aacute", "á");
-	addEscapeStringSubstitute("agrave", "à");
-	addEscapeStringSubstitute("acirc", "â");
-	addEscapeStringSubstitute("auml", "ä");
-	addEscapeStringSubstitute("atilde", "ã");
-	addEscapeStringSubstitute("aring", "å");
-	addEscapeStringSubstitute("Eacute", "É");
-	addEscapeStringSubstitute("Egrave", "È");
-	addEscapeStringSubstitute("Ecirc", "Ê");
-	addEscapeStringSubstitute("Euml", "Ë");
-	addEscapeStringSubstitute("eacute", "é");
-	addEscapeStringSubstitute("egrave", "è");
-	addEscapeStringSubstitute("ecirc", "ê");
-	addEscapeStringSubstitute("euml", "ë");
-	addEscapeStringSubstitute("Iacute", "Í");
-	addEscapeStringSubstitute("Igrave", "Ì");
-	addEscapeStringSubstitute("Icirc", "Î");
-	addEscapeStringSubstitute("Iuml", "Ï");
-	addEscapeStringSubstitute("iacute", "í");
-	addEscapeStringSubstitute("igrave", "ì");
-	addEscapeStringSubstitute("icirc", "î");
-	addEscapeStringSubstitute("iuml", "ï");
-	addEscapeStringSubstitute("Oacute", "Ó");
-	addEscapeStringSubstitute("Ograve", "Ò");
-	addEscapeStringSubstitute("Ocirc", "Ô");
-	addEscapeStringSubstitute("Ouml", "Ö");
-	addEscapeStringSubstitute("Otilde", "Õ");
-	addEscapeStringSubstitute("oacute", "ó");
-	addEscapeStringSubstitute("ograve", "ò");
-	addEscapeStringSubstitute("ocirc", "ô");
-	addEscapeStringSubstitute("ouml", "ö");
-	addEscapeStringSubstitute("otilde", "õ");
-	addEscapeStringSubstitute("Uacute", "Ú");
-	addEscapeStringSubstitute("Ugrave", "Ù");
-	addEscapeStringSubstitute("Ucirc", "Û");
-	addEscapeStringSubstitute("Uuml", "Ü");
-	addEscapeStringSubstitute("uacute", "ú");
-	addEscapeStringSubstitute("ugrave", "ù");
-	addEscapeStringSubstitute("ucirc", "û");
-	addEscapeStringSubstitute("uuml", "ü");
-	addEscapeStringSubstitute("Yacute", "Ý");
-	addEscapeStringSubstitute("yacute", "ý");
-	addEscapeStringSubstitute("yuml", "ÿ");
+	addAllowedEscapeString("nbsp");
+	addAllowedEscapeString("brvbar"); // "¦"
+	addAllowedEscapeString("sect");   // "§"
+	addAllowedEscapeString("copy");   // "©"
+	addAllowedEscapeString("laquo");  // "«"
+	addAllowedEscapeString("reg");    // "®"
+	addAllowedEscapeString("acute");  // "´"
+	addAllowedEscapeString("para");   // "¶"
+	addAllowedEscapeString("raquo");  // "»"
 
-	addEscapeStringSubstitute("deg", "°");
-	addEscapeStringSubstitute("plusmn", "±");
-	addEscapeStringSubstitute("sup2", "²");
-	addEscapeStringSubstitute("sup3", "³");
-	addEscapeStringSubstitute("sup1", "¹");
-	addEscapeStringSubstitute("nbsp", "º");
-	addEscapeStringSubstitute("pound", "£");
-	addEscapeStringSubstitute("cent", "¢");
-	addEscapeStringSubstitute("frac14", "¼");
-	addEscapeStringSubstitute("frac12", "½");
-	addEscapeStringSubstitute("frac34", "¾");
-	addEscapeStringSubstitute("iquest", "¿");
-	addEscapeStringSubstitute("iexcl", "¡");
-	addEscapeStringSubstitute("ETH", "Ð");
-	addEscapeStringSubstitute("eth", "ð");
-	addEscapeStringSubstitute("THORN", "Þ");
-	addEscapeStringSubstitute("thorn", "þ");
-	addEscapeStringSubstitute("AElig", "Æ");
-	addEscapeStringSubstitute("aelig", "æ");
-	addEscapeStringSubstitute("Oslash", "Ø");
-	addEscapeStringSubstitute("curren", "¤");
-	addEscapeStringSubstitute("Ccedil", "Ç");
-	addEscapeStringSubstitute("ccedil", "ç");
-	addEscapeStringSubstitute("szlig", "ß");
-	addEscapeStringSubstitute("Ntilde", "Ñ");
-	addEscapeStringSubstitute("ntilde", "ñ");
-	addEscapeStringSubstitute("yen", "¥");
-	addEscapeStringSubstitute("not", "¬");
-	addEscapeStringSubstitute("ordf", "ª");
-	addEscapeStringSubstitute("uml", "¨");
-	addEscapeStringSubstitute("shy", "");
-	addEscapeStringSubstitute("macr", "¯");
-*/
+	addAllowedEscapeString("Aacute"); // "Á"
+	addAllowedEscapeString("Agrave"); // "À"
+	addAllowedEscapeString("Acirc");  // "Â"
+	addAllowedEscapeString("Auml");   // "Ä"
+	addAllowedEscapeString("Atilde"); // "Ã"
+	addAllowedEscapeString("Aring");  // "Å"
+	addAllowedEscapeString("aacute"); // "á"
+	addAllowedEscapeString("agrave"); // "à"
+	addAllowedEscapeString("acirc");  // "â"
+	addAllowedEscapeString("auml");   // "ä"
+	addAllowedEscapeString("atilde"); // "ã"
+	addAllowedEscapeString("aring");  // "å"
+	addAllowedEscapeString("Eacute"); // "É"
+	addAllowedEscapeString("Egrave"); // "È"
+	addAllowedEscapeString("Ecirc");  // "Ê"
+	addAllowedEscapeString("Euml");   // "Ë"
+	addAllowedEscapeString("eacute"); // "é"
+	addAllowedEscapeString("egrave"); // "è"
+	addAllowedEscapeString("ecirc");  // "ê"
+	addAllowedEscapeString("euml");   // "ë"
+	addAllowedEscapeString("Iacute"); // "Í"
+	addAllowedEscapeString("Igrave"); // "Ì"
+	addAllowedEscapeString("Icirc");  // "Î"
+	addAllowedEscapeString("Iuml");   // "Ï"
+	addAllowedEscapeString("iacute"); // "í"
+	addAllowedEscapeString("igrave"); // "ì"
+	addAllowedEscapeString("icirc");  // "î"
+	addAllowedEscapeString("iuml");   // "ï"
+	addAllowedEscapeString("Oacute"); // "Ó"
+	addAllowedEscapeString("Ograve"); // "Ò"
+	addAllowedEscapeString("Ocirc");  // "Ô"
+	addAllowedEscapeString("Ouml");   // "Ö"
+	addAllowedEscapeString("Otilde"); // "Õ"
+	addAllowedEscapeString("oacute"); // "ó"
+	addAllowedEscapeString("ograve"); // "ò"
+	addAllowedEscapeString("ocirc");  // "ô"
+	addAllowedEscapeString("ouml");   // "ö"
+	addAllowedEscapeString("otilde"); // "õ"
+	addAllowedEscapeString("Uacute"); // "Ú"
+	addAllowedEscapeString("Ugrave"); // "Ù"
+	addAllowedEscapeString("Ucirc");  // "Û"
+	addAllowedEscapeString("Uuml");   // "Ü"
+	addAllowedEscapeString("uacute"); // "ú"
+	addAllowedEscapeString("ugrave"); // "ù"
+	addAllowedEscapeString("ucirc");  // "û"
+	addAllowedEscapeString("uuml");   // "ü"
+	addAllowedEscapeString("Yacute"); // "Ý"
+	addAllowedEscapeString("yacute"); // "ý"
+	addAllowedEscapeString("yuml");   // "ÿ"
+
+	addAllowedEscapeString("deg");    // "°"
+	addAllowedEscapeString("plusmn"); // "±"
+	addAllowedEscapeString("sup2");   // "²"
+	addAllowedEscapeString("sup3");   // "³"
+	addAllowedEscapeString("sup1");   // "¹"
+	addAllowedEscapeString("nbsp");   // "º"
+	addAllowedEscapeString("pound");  // "£"
+	addAllowedEscapeString("cent");   // "¢"
+	addAllowedEscapeString("frac14"); // "¼"
+	addAllowedEscapeString("frac12"); // "½"
+	addAllowedEscapeString("frac34"); // "¾"
+	addAllowedEscapeString("iquest"); // "¿"
+	addAllowedEscapeString("iexcl");  // "¡"
+	addAllowedEscapeString("ETH");    // "Ð"
+	addAllowedEscapeString("eth");    // "ð"
+	addAllowedEscapeString("THORN");  // "Þ"
+	addAllowedEscapeString("thorn");  // "þ"
+	addAllowedEscapeString("AElig");  // "Æ"
+	addAllowedEscapeString("aelig");  // "æ"
+	addAllowedEscapeString("Oslash"); // "Ø"
+	addAllowedEscapeString("curren"); // "¤"
+	addAllowedEscapeString("Ccedil"); // "Ç"
+	addAllowedEscapeString("ccedil"); // "ç"
+	addAllowedEscapeString("szlig");  // "ß"
+	addAllowedEscapeString("Ntilde"); // "Ñ"
+	addAllowedEscapeString("ntilde"); // "ñ"
+	addAllowedEscapeString("yen");    // "¥"
+	addAllowedEscapeString("not");    // "¬"
+	addAllowedEscapeString("ordf");   // "ª"
+	addAllowedEscapeString("uml");    // "¨"
+	addAllowedEscapeString("shy");    // ""
+	addAllowedEscapeString("macr");   // "¯"
+
+	addAllowedEscapeString("micro");  // "µ"
+	addAllowedEscapeString("middot"); // "·"
+	addAllowedEscapeString("cedil");  // "¸"
+	addAllowedEscapeString("ordm");   // "º"
+	addAllowedEscapeString("times");  // "×"
+	addAllowedEscapeString("divide"); // "÷"
+	addAllowedEscapeString("oslash"); // "ø"
+
 	setTokenCaseSensitive(true);
 
 	addTokenSubstitute("note", " <font color=\"#800000\"><small>(");

Modified: trunk/src/modules/filters/thmlhtmlhref.cpp
===================================================================
--- trunk/src/modules/filters/thmlhtmlhref.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlhtmlhref.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -38,6 +38,118 @@
 	setTokenStart("<");
 	setTokenEnd(">");
 
+	setEscapeStart("&");
+	setEscapeEnd(";");
+
+	setEscapeStringCaseSensitive(true);
+	setPassThruNumericEscapeString(true);
+
+	addAllowedEscapeString("quot");
+	addAllowedEscapeString("amp");
+	addAllowedEscapeString("lt");
+	addAllowedEscapeString("gt");
+
+	addAllowedEscapeString("nbsp");
+	addAllowedEscapeString("brvbar"); // "¦"
+	addAllowedEscapeString("sect");   // "§"
+	addAllowedEscapeString("copy");   // "©"
+	addAllowedEscapeString("laquo");  // "«"
+	addAllowedEscapeString("reg");    // "®"
+	addAllowedEscapeString("acute");  // "´"
+	addAllowedEscapeString("para");   // "¶"
+	addAllowedEscapeString("raquo");  // "»"
+
+	addAllowedEscapeString("Aacute"); // "Á"
+	addAllowedEscapeString("Agrave"); // "À"
+	addAllowedEscapeString("Acirc");  // "Â"
+	addAllowedEscapeString("Auml");   // "Ä"
+	addAllowedEscapeString("Atilde"); // "Ã"
+	addAllowedEscapeString("Aring");  // "Å"
+	addAllowedEscapeString("aacute"); // "á"
+	addAllowedEscapeString("agrave"); // "à"
+	addAllowedEscapeString("acirc");  // "â"
+	addAllowedEscapeString("auml");   // "ä"
+	addAllowedEscapeString("atilde"); // "ã"
+	addAllowedEscapeString("aring");  // "å"
+	addAllowedEscapeString("Eacute"); // "É"
+	addAllowedEscapeString("Egrave"); // "È"
+	addAllowedEscapeString("Ecirc");  // "Ê"
+	addAllowedEscapeString("Euml");   // "Ë"
+	addAllowedEscapeString("eacute"); // "é"
+	addAllowedEscapeString("egrave"); // "è"
+	addAllowedEscapeString("ecirc");  // "ê"
+	addAllowedEscapeString("euml");   // "ë"
+	addAllowedEscapeString("Iacute"); // "Í"
+	addAllowedEscapeString("Igrave"); // "Ì"
+	addAllowedEscapeString("Icirc");  // "Î"
+	addAllowedEscapeString("Iuml");   // "Ï"
+	addAllowedEscapeString("iacute"); // "í"
+	addAllowedEscapeString("igrave"); // "ì"
+	addAllowedEscapeString("icirc");  // "î"
+	addAllowedEscapeString("iuml");   // "ï"
+	addAllowedEscapeString("Oacute"); // "Ó"
+	addAllowedEscapeString("Ograve"); // "Ò"
+	addAllowedEscapeString("Ocirc");  // "Ô"
+	addAllowedEscapeString("Ouml");   // "Ö"
+	addAllowedEscapeString("Otilde"); // "Õ"
+	addAllowedEscapeString("oacute"); // "ó"
+	addAllowedEscapeString("ograve"); // "ò"
+	addAllowedEscapeString("ocirc");  // "ô"
+	addAllowedEscapeString("ouml");   // "ö"
+	addAllowedEscapeString("otilde"); // "õ"
+	addAllowedEscapeString("Uacute"); // "Ú"
+	addAllowedEscapeString("Ugrave"); // "Ù"
+	addAllowedEscapeString("Ucirc");  // "Û"
+	addAllowedEscapeString("Uuml");   // "Ü"
+	addAllowedEscapeString("uacute"); // "ú"
+	addAllowedEscapeString("ugrave"); // "ù"
+	addAllowedEscapeString("ucirc");  // "û"
+	addAllowedEscapeString("uuml");   // "ü"
+	addAllowedEscapeString("Yacute"); // "Ý"
+	addAllowedEscapeString("yacute"); // "ý"
+	addAllowedEscapeString("yuml");   // "ÿ"
+
+	addAllowedEscapeString("deg");    // "°"
+	addAllowedEscapeString("plusmn"); // "±"
+	addAllowedEscapeString("sup2");   // "²"
+	addAllowedEscapeString("sup3");   // "³"
+	addAllowedEscapeString("sup1");   // "¹"
+	addAllowedEscapeString("nbsp");   // "º"
+	addAllowedEscapeString("pound");  // "£"
+	addAllowedEscapeString("cent");   // "¢"
+	addAllowedEscapeString("frac14"); // "¼"
+	addAllowedEscapeString("frac12"); // "½"
+	addAllowedEscapeString("frac34"); // "¾"
+	addAllowedEscapeString("iquest"); // "¿"
+	addAllowedEscapeString("iexcl");  // "¡"
+	addAllowedEscapeString("ETH");    // "Ð"
+	addAllowedEscapeString("eth");    // "ð"
+	addAllowedEscapeString("THORN");  // "Þ"
+	addAllowedEscapeString("thorn");  // "þ"
+	addAllowedEscapeString("AElig");  // "Æ"
+	addAllowedEscapeString("aelig");  // "æ"
+	addAllowedEscapeString("Oslash"); // "Ø"
+	addAllowedEscapeString("curren"); // "¤"
+	addAllowedEscapeString("Ccedil"); // "Ç"
+	addAllowedEscapeString("ccedil"); // "ç"
+	addAllowedEscapeString("szlig");  // "ß"
+	addAllowedEscapeString("Ntilde"); // "Ñ"
+	addAllowedEscapeString("ntilde"); // "ñ"
+	addAllowedEscapeString("yen");    // "¥"
+	addAllowedEscapeString("not");    // "¬"
+	addAllowedEscapeString("ordf");   // "ª"
+	addAllowedEscapeString("uml");    // "¨"
+	addAllowedEscapeString("shy");    // ""
+	addAllowedEscapeString("macr");   // "¯"
+
+	addAllowedEscapeString("micro");  // "µ"
+	addAllowedEscapeString("middot"); // "·"
+	addAllowedEscapeString("cedil");  // "¸"
+	addAllowedEscapeString("ordm");   // "º"
+	addAllowedEscapeString("times");  // "×"
+	addAllowedEscapeString("divide"); // "÷"
+	addAllowedEscapeString("oslash"); // "ø"
+
 	setTokenCaseSensitive(true);
 	addTokenSubstitute("scripture", "<i> ");
 	addTokenSubstitute("/scripture", "</i> ");

Modified: trunk/src/modules/filters/thmlosis.cpp
===================================================================
--- trunk/src/modules/filters/thmlosis.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlosis.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -32,6 +32,7 @@
 	int tokpos = 0;
 	bool intoken = false;
 	bool keepToken = false;
+	bool ampersand = false;
 
 //	static QuoteStack quoteStack;
 	
@@ -74,13 +75,138 @@
 			token[0] = 0;
 			token[1] = 0;
 			token[2] = 0;
+			ampersand = false;
 			textEnd = from-1;
 			wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
 
 // 			wordEnd = to;
 			continue;
 		}
-		
+
+		if (*from == '&') {
+			intoken = true;
+			tokpos = 0;
+			token[0] = 0;
+			token[1] = 0;
+			token[2] = 0;
+			ampersand = true;
+			continue;
+		}
+
+		if (*from == ';' && ampersand) {
+			intoken = false;
+			ampersand = false;
+
+			if (*token == '#') {
+				text += '&';
+				text += token;
+				text += ';';
+			}
+			else if (!strncmp("nbsp", token, 4)) text += ' ';
+			else if (!strncmp("quot", token, 4)) text += '"';
+			else if (!strncmp("amp", token, 3)) text += '&';
+			else if (!strncmp("lt", token, 2)) text += '<';
+			else if (!strncmp("gt", token, 2)) text += '>';
+			else if (!strncmp("brvbar", token, 6)) text += '¦';
+			else if (!strncmp("sect", token, 4)) text += '§';
+			else if (!strncmp("copy", token, 4)) text += '©';
+			else if (!strncmp("laquo", token, 5)) text += '«';
+			else if (!strncmp("reg", token, 3)) text += '®';
+			else if (!strncmp("acute", token, 5)) text += '´';
+			else if (!strncmp("para", token, 4)) text += '¶';
+			else if (!strncmp("raquo", token, 5)) text += '»';
+			else if (!strncmp("Aacute", token, 6)) text += 'Á';
+			else if (!strncmp("Agrave", token, 6)) text += 'À';
+			else if (!strncmp("Acirc", token, 5)) text += 'Â';
+			else if (!strncmp("Auml", token, 4)) text += 'Ä';
+			else if (!strncmp("Atilde", token, 6)) text += 'Ã';
+			else if (!strncmp("Aring", token, 5)) text += 'Å';
+			else if (!strncmp("aacute", token, 6)) text += 'á';
+			else if (!strncmp("agrave", token, 6)) text += 'à';
+			else if (!strncmp("acirc", token, 5)) text += 'â';
+			else if (!strncmp("auml", token, 4)) text += 'ä';
+			else if (!strncmp("atilde", token, 6)) text += 'ã';
+			else if (!strncmp("aring", token, 5)) text += 'å';
+			else if (!strncmp("Eacute", token, 6)) text += 'É';
+			else if (!strncmp("Egrave", token, 6)) text += 'È';
+			else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
+			else if (!strncmp("Euml", token, 4)) text += 'Ë';
+			else if (!strncmp("eacute", token, 6)) text += 'é';
+			else if (!strncmp("egrave", token, 6)) text += 'è';
+			else if (!strncmp("ecirc", token, 5)) text += 'ê';
+			else if (!strncmp("euml", token, 4)) text += 'ë';
+			else if (!strncmp("Iacute", token, 6)) text += 'Í';
+			else if (!strncmp("Igrave", token, 6)) text += 'Ì';
+			else if (!strncmp("Icirc", token, 5)) text += 'Î';
+			else if (!strncmp("Iuml", token, 4)) text += 'Ï';
+			else if (!strncmp("iacute", token, 6)) text += 'í';
+			else if (!strncmp("igrave", token, 6)) text += 'ì';
+			else if (!strncmp("icirc", token, 5)) text += 'î';
+			else if (!strncmp("iuml", token, 4)) text += 'ï';
+			else if (!strncmp("Oacute", token, 6)) text += 'Ó';
+			else if (!strncmp("Ograve", token, 6)) text += 'Ò';
+			else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
+			else if (!strncmp("Ouml", token, 4)) text += 'Ö';
+			else if (!strncmp("Otilde", token, 6)) text += 'Õ';
+			else if (!strncmp("oacute", token, 6)) text += 'ó';
+			else if (!strncmp("ograve", token, 6)) text += 'ò';
+			else if (!strncmp("ocirc", token, 5)) text += 'ô';
+			else if (!strncmp("ouml", token, 4)) text += 'ö';
+			else if (!strncmp("otilde", token, 6)) text += 'õ';
+			else if (!strncmp("Uacute", token, 6)) text += 'Ú';
+			else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
+			else if (!strncmp("Ucirc", token, 5)) text += 'Û';
+			else if (!strncmp("Uuml", token, 4)) text += 'Ü';
+			else if (!strncmp("uacute", token, 6)) text += 'ú';
+			else if (!strncmp("ugrave", token, 6)) text += 'ù';
+			else if (!strncmp("ucirc", token, 5)) text += 'û';
+			else if (!strncmp("uuml", token, 4)) text += 'ü';
+			else if (!strncmp("Yacute", token, 6)) text += 'Ý';
+			else if (!strncmp("yacute", token, 6)) text += 'ý';
+			else if (!strncmp("yuml", token, 4)) text += 'ÿ';
+
+			else if (!strncmp("deg", token, 3)) text += '°';
+			else if (!strncmp("plusmn", token, 6)) text += '±';
+			else if (!strncmp("sup2", token, 4)) text += '²';
+			else if (!strncmp("sup3", token, 4)) text += '³';
+			else if (!strncmp("sup1", token, 4)) text += '¹';
+			else if (!strncmp("nbsp", token, 4)) text += 'º';
+			else if (!strncmp("pound", token, 5)) text += '£';
+			else if (!strncmp("cent", token, 4)) text += '¢';
+			else if (!strncmp("frac14", token, 6)) text += '¼';
+			else if (!strncmp("frac12", token, 6)) text += '½';
+			else if (!strncmp("frac34", token, 6)) text += '¾';
+			else if (!strncmp("iquest", token, 6)) text += '¿';
+			else if (!strncmp("iexcl", token, 5)) text += '¡';
+			else if (!strncmp("ETH", token, 3)) text += 'Ð';
+			else if (!strncmp("eth", token, 3)) text += 'ð';
+			else if (!strncmp("THORN", token, 5)) text += 'Þ';
+			else if (!strncmp("thorn", token, 5)) text += 'þ';
+			else if (!strncmp("AElig", token, 5)) text += 'Æ';
+			else if (!strncmp("aelig", token, 5)) text += 'æ';
+			else if (!strncmp("Oslash", token, 6)) text += 'Ø';
+			else if (!strncmp("curren", token, 6)) text += '¤';
+			else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
+			else if (!strncmp("ccedil", token, 6)) text += 'ç';
+			else if (!strncmp("szlig", token, 5)) text += 'ß';
+			else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
+			else if (!strncmp("ntilde", token, 6)) text += 'ñ';
+			else if (!strncmp("yen", token, 3)) text += '¥';
+			else if (!strncmp("not", token, 3)) text += '¬';
+			else if (!strncmp("ordf", token, 4)) text += 'ª';
+			else if (!strncmp("uml", token, 3)) text += '¨';
+			else if (!strncmp("shy", token, 3)) text += '';
+			else if (!strncmp("macr", token, 4)) text += '¯';
+			else if (!strncmp("micro", token, 5)) text += "µ";
+			else if (!strncmp("middot", token, 6)) text +="·";
+			else if (!strncmp("cedil", token, 5)) text += "¸";
+			else if (!strncmp("ordm", token, 4)) text +=  "º";
+			else if (!strncmp("times", token, 5)) text += "×";
+			else if (!strncmp("divide", token, 6)) text +="÷";
+			else if (!strncmp("oslash", token, 6)) text +="ø";
+			continue;
+		}
+
 		// handle silly <variant word> items in greek whnu, remove when module is fixed
 		if ((*from == '>') && (*(from-1) < 0)) {
 			text += "&gt;";

Modified: trunk/src/modules/filters/thmlplain.cpp
===================================================================
--- trunk/src/modules/filters/thmlplain.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlplain.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -48,13 +48,14 @@
 		}
 		if (*from == ';' && ampersand) {
 			intoken = false;
+			ampersand = false;
 
 			if (!strncmp("nbsp", token, 4)) text += ' ';
 			else if (!strncmp("quot", token, 4)) text += '"';
 			else if (!strncmp("amp", token, 3)) text += '&';
 			else if (!strncmp("lt", token, 2)) text += '<';
 			else if (!strncmp("gt", token, 2)) text += '>';
-			else if (!strncmp("brvbar", token, 6)) text += '|';
+			else if (!strncmp("brvbar", token, 6)) text += '¦';
 			else if (!strncmp("sect", token, 4)) text += '§';
 			else if (!strncmp("copy", token, 4)) text += '©';
 			else if (!strncmp("laquo", token, 5)) text += '«';
@@ -145,6 +146,13 @@
 			else if (!strncmp("uml", token, 3)) text += '¨';
 			else if (!strncmp("shy", token, 3)) text += '';
 			else if (!strncmp("macr", token, 4)) text += '¯';
+			else if (!strncmp("micro", token, 5)) text += "µ";
+			else if (!strncmp("middot", token, 6)) text +="·";
+			else if (!strncmp("cedil", token, 5)) text += "¸";
+			else if (!strncmp("ordm", token, 4)) text +=  "º";
+			else if (!strncmp("times", token, 5)) text += "×";
+			else if (!strncmp("divide", token, 6)) text +="÷";
+			else if (!strncmp("oslash", token, 6)) text +="ø";
 			continue;
 
 		}

Modified: trunk/src/modules/filters/thmlrtf.cpp
===================================================================
--- trunk/src/modules/filters/thmlrtf.cpp	2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlrtf.cpp	2006-10-04 09:28:28 UTC (rev 1981)
@@ -38,7 +38,7 @@
 	addEscapeStringSubstitute("amp", "&");
 	addEscapeStringSubstitute("lt", "<");
 	addEscapeStringSubstitute("gt", ">");
-	addEscapeStringSubstitute("brvbar", "|");
+	addEscapeStringSubstitute("brvbar", "¦");
 	addEscapeStringSubstitute("sect", "§");
 	addEscapeStringSubstitute("copy", "©");
 	addEscapeStringSubstitute("laquo", "«");
@@ -130,6 +130,14 @@
 	addEscapeStringSubstitute("shy", "");
 	addEscapeStringSubstitute("macr", "¯");
 
+	addEscapeStringSubstitute("micro",  "µ");
+	addEscapeStringSubstitute("middot", "·");
+	addEscapeStringSubstitute("cedil",  "¸");
+	addEscapeStringSubstitute("ordm",   "º");
+	addEscapeStringSubstitute("times",  "×");
+	addEscapeStringSubstitute("divide", "÷");
+	addEscapeStringSubstitute("oslash", "ø");
+
 	setTokenCaseSensitive(true);