[sword-devel] HTML filter patches

Terry Biggs sword-devel@crosswire.org
03 Mar 2001 17:53:49 -0500


--=-rOIeWTjeJh3GisOXbeju
Content-Type: multipart/alternative; boundary="=-RiCAUDf4g+mRx8+x3HHn"


--=-RiCAUDf4g+mRx8+x3HHn
Content-Type: text/plain

Thanks again Troy,

Here is the gbfhtml.cpp patch.
I hope to the rwphtml patch ready soon.

Terry


On 03 Mar 2001 14:20:11 -0700, Troy A. Griffitts wrote:
> Terry,
>       We could.  I'm planning on removing the FILTERPAD mechanism as it is
> fairly error-prone.  I'll be replacing the functionality by changing the
> signature of the SWFilter::ProcessText method as follows:
> 
> -virtual char ProcessText(char *text, int maxlen = -1, const SWKey *key
> = 0)
> +virtual char ProcessText(char **text, const SWKey *key = 0)
> 
> And let the filter reallocate the memory for the new buffer.
> 
> But for now, I've changed FILTERPAD to 5.
> 
>       -Troy.
> 


--=-RiCAUDf4g+mRx8+x3HHn
Content-Type: text/html; charset=utf-8

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 TRANSITIONAL//EN">
<HTML>
<HEAD>
  <META HTTP-EQUIV="Content-Type" CONTENT="text/html; CHARSET=UTF-8">
  <META NAME="GENERATOR" CONTENT="GtkHTML/0.8.2">
</HEAD>
<BODY>Thanks again Troy,<br>
<br>
Here is the gbfhtml.cpp patch.<br>
I hope to the rwphtml patch ready soon.<br>
<br>
Terry<br>
<pre>On 03 Mar 2001 14:20:11 -0700, Troy A. Griffitts wrote:<br>
&gt; Terry,<br>
&gt;       We could.  I'm planning on removing the FILTERPAD mechanism as it is<br>
&gt; fairly error-prone.  I'll be replacing the functionality by changing the<br>
&gt; signature of the SWFilter::ProcessText method as follows:<br>
&gt; <br>
&gt; -virtual char ProcessText(char *text, int maxlen = -1, const SWKey *key<br>
&gt; = 0)<br>
&gt; +virtual char ProcessText(char **text, const SWKey *key = 0)<br>
&gt; <br>
&gt; And let the filter reallocate the memory for the new buffer.<br>
&gt; <br>
&gt; But for now, I've changed FILTERPAD to 5.<br>
&gt; <br>
&gt;       -Troy.<br>
&gt; <br>
</pre></BODY>
</HTML>

--=-RiCAUDf4g+mRx8+x3HHn--

--=-rOIeWTjeJh3GisOXbeju
Content-Type: text/plain
Content-Disposition: attachment; filename=gbfhtml.cpp.dif
Content-Transfer-Encoding: 7bit

--- /tmp/gedit-983659855-23046-1	Sat Mar  3 17:50:55 2001
+++ /tmp/gedit-983659855-23046-2	Sat Mar  3 17:50:55 2001
@@ -19,7 +19,6 @@
 #include <string.h>
 #include <gbfhtml.h>
 
-
 GBFHTML::GBFHTML()
 {
 }
@@ -34,21 +33,23 @@
 	bool isRightJustified = false;
 	bool isCentered = false;
 	int len;
-	unsigned int i;
-
-	len = strlen(text) + 1;		// shift string to right of buffer
+		
+	len = strlen(text) + 1;
 	if (len < maxlen) {
 		memmove(&text[maxlen - len], text, len);
 		from = &text[maxlen - len];
-	}
-	else
-		from = text;			// -------------------------------
-	
-	for (to = text; *from; from++)
-	{
+	} else
+		from = text;
+	for (to = text; *from; from++) {
+		/*
+		if (newParagraph) {
+			*to++ = 182;
+			newParagraph = false;
+		}
+		*/
 		if (*from == '\n') {
 			*from = ' ';
-		}			
+		}
 		if (*from == '<') {
 			intoken = true;
 			tokpos = 0;
@@ -56,420 +57,206 @@
 			continue;
 		}
 		if (*from == '>') {
-			unsigned int i;
 			intoken = false;
 			// process desired tokens
 			switch (*token) {
-				case 'W':	// Strongs
-					switch(token[1])
-					{
-						case 'G':               // Greek
-						case 'H':               // Hebrew
-							*to++ = ' ';
-							*to++ = '<';
-							*to++ = 'S';
-							*to++ = 'M';
-							*to++ = 'A';
-							*to++ = 'L';
-							*to++ = 'L';
-							*to++ = '>';
-							*to++ = '<';
-							*to++ = 'E';
-							*to++ = 'M';
-							*to++ = '>';
-							for (i = 2; i < strlen(token); i++)
-								*to++ = token[i];
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'E';
-							*to++ = 'M';
-							*to++ = '>';
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'S';
-							*to++ = 'M';
-							*to++ = 'A';
-							*to++ = 'L';
-							*to++ = 'L';
-							*to++ = '>';
-							*to++ = ' ';
-							continue;
+			case 'W':	// Strongs
+				switch (token[1]) {
+				case 'G':	// Greek
+				case 'H':	// Hebrew
+					strcpy(to," <A HREF=\"#");
+					to += strlen(to);					
+					for (unsigned int i = 2;
+					     i < strlen(token); i++)
+						*to++ = token[i];
+					strcpy(to," \"><FONT SIZE=\"-1\">");
+					to += strlen(to);
+					for (unsigned int i = 2;
+					     i < strlen(token); i++)
+						*to++ = token[i];
+					strcpy(to,"</FONT></A> ");
+					to += strlen(to);
+					continue;
+				case 'T':	// Tense
+					strcpy(to," <A HREF=\"#");
+					to += strlen(to);
+					for (unsigned int i = 3;
+					     i < strlen(token); i++)
+						*to++ = token[i];					
+					strcpy(to,"\"> <FONT SIZE=\"-1\"><I>");
+					to += strlen(to);
+					for (unsigned int i = 3;
+					     i < strlen(token); i++)
+						*to++ = token[i];					
+					strcpy(to,"</I></FONT></A> ");
+					to += strlen(to);
+					continue;
+				}
+				break;
+			case 'R':
+				switch (token[1]) {
+				case 'B':	//word(s) explained in footnote
+					strcpy(to,"<I>");
+					to += strlen(to);
+					hasFootnotePreTag = true;	//we have the RB tag
+					continue;
+				case 'F':	// footnote begin
+					if (hasFootnotePreTag) {
+						strcpy(to,"</I>");
+						to += strlen(to);
+					}
+					strcpy(to,"<FONT COLOR=\"800000\"><SMALL>(");
+					to += strlen(to);
+					continue;
+				case 'f':	// footnote end
+					strcpy(to,")</SMALL></FONT>");
+					to += strlen(to);
+					hasFootnotePreTag = false;
+					continue;
+				}
+				break;
+			case 'F':	// font tags
+				switch (token[1]) {
+				case 'I':	// italic start
+					strcpy(to,"<I>");
+					to += strlen(to);
+					continue;
+				case 'i':	// italic end
+					strcpy(to,"</I>");
+					to += strlen(to);
+					continue;
+				case 'B':	// bold start
+					strcpy(to,"<B>");
+					to += strlen(to);
+					continue;
+				case 'b':	// bold end
+					strcpy(to,"</B>");
+					to += strlen(to);
+					continue;
+				case 'R':	// words of Jesus begin
+					strcpy(to,"<FONT COLOR=\"FF0000\">");
+					to += strlen(to);
+					continue;
+				case 'r':	// words of Jesus end
+					strcpy(to,"</FONT>");
+					to += strlen(to);
+					continue;
+				case 'U':	// Underline start
+					strcpy(to,"<U>");
+					to += strlen(to);
+					continue;
+				case 'u':	// Underline end
+					strcpy(to,"</U>");
+					to += strlen(to);
+					continue;
+				case 'O':	// Old Testament quote begin
+					strcpy(to,"<CITE>");
+					to += strlen(to);
+					continue;
+				case 'o':	// Old Testament quote end
+					strcpy(to,"</CITE>");
+					to += strlen(to);
+					continue;
+				case 'S':	// Superscript begin
+					strcpy(to,"<SUP>");
+					to += strlen(to);
+					continue;
+				case 's':	// Superscript end
+					strcpy(to,"</SUP>");
+					to += strlen(to);
+					continue;
+				case 'V':	// Subscript begin
+					strcpy(to,"<SUB>");
+					to += strlen(to);				
+					continue;
+				case 'v':	// Subscript end
+					strcpy(to,"</SUB>");
+					to += strlen(to);
+					continue;
+				}
+				break;
+			case 'C':	// special character tags
+				switch (token[1]) {
+				case 'A':	// ASCII value
+					*to++ = (char) atoi(&token[2]);
+					continue;
+				case 'G':
+					//*to++ = ' ';
+					continue;
+				case 'L':	// line break
+					strcpy(to,"<BR>");
+					to += strlen(to);
+					continue;
+				case 'M':	// new paragraph
+					strcpy(to,"<P>");
+					to += strlen(to);
+					continue;
+				case 'T':
+					//*to++ = ' ';
+					continue;
+				}
+				break;
+			case 'J':	//Justification
+				switch (token[1]) {
+				case 'R':	//right
+					strcpy(to,"<DIV ALIGN=\"RIGHT\">");
+					to += strlen(to);
+					isRightJustified = true;
+					continue;
+				case 'C':	//center
+					strcpy(to,"<DIV ALIGN=\"CENTER\">");
+					to += strlen(to);
+					isCentered = true;
+					continue;
 
-						case 'T':               // Tense
-							*to++ = ' ';
-							*to++ = '<';
-							*to++ = 'S';
-							*to++ = 'M';
-							*to++ = 'A';
-							*to++ = 'L';
-							*to++ = 'L';
-							*to++ = '>';
-							*to++ = '<';
-							*to++ = 'I';
-							*to++ = '>';
-							for (i = 3; i < strlen(token); i++)
-								*to++ = token[i];
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'I';
-							*to++ = '>';
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'S';
-							*to++ = 'M';
-							*to++ = 'A';
-							*to++ = 'L';
-							*to++ = 'L';
-							*to++ = '>';
-							*to++ = ' ';
-							continue;
-					}
-					break;
-				case 'R':
-					switch(token[1])
-					{
-					  case 'B':								//word(s) explained in footnote
-							*to++ = '<';
-							*to++ = 'I';					
-							*to++ = '>';						
-							hasFootnotePreTag = true; //we have the RB tag
-							continue;
-						case 'F':               // footnote begin
-							if (hasFootnotePreTag) {
-								*to++ = '<';
-								*to++ = '/';
-								*to++ = 'I';
-								*to++ = '>';						
-								*to++ = ' ';
-							}
-	 						*to++ = '<';
-							*to++ = 'F';
-							*to++ = 'O';
-							*to++ = 'N';
-							*to++ = 'T';
-							*to++ = ' ';
-							*to++ = 'C';
-							*to++ = 'O';
-							*to++ = 'L';
-							*to++ = 'O';
-							*to++ = 'R';
-							*to++ = '=';
-							*to++ = '\"';
-							*to++ = '#';
-							*to++ = '8';
-							*to++ = '0';
-							*to++ = '0';
-							*to++ = '0';
-							*to++ = '0';
-							*to++ = '0';
-							*to++ = '\"';
-							*to++ = '>';
-							
-							*to++ = ' ';
-							*to++ = '<';
-							*to++ = 'S';
-							*to++ = 'M';
-							*to++ = 'A';
-							*to++ = 'L';
-							*to++ = 'L';
-							*to++ = '>';
-							*to++ = '(';
-													
-							continue;
-						case 'f':               // footnote end
-							*to++ = ')';
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'S';
-							*to++ = 'M';
-							*to++ = 'A';
-							*to++ = 'L';
-							*to++ = 'L';
-							*to++ = '>';
-							*to++ = ' ';
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'F';
-							*to++ = 'O';
-							*to++ = 'N';
-							*to++ = 'T';
-							*to++ = '>';
-							hasFootnotePreTag = false;
-							continue;
-					}
-					break;
-				
-				case 'F':			// font tags
-					switch(token[1])
-					{
-						case 'I':		// italic start
-							*to++ = '<';
-							*to++ = 'I';
-							*to++ = '>';
-							continue;
-						case 'i':		// italic end
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'I';
-							*to++ = '>';
-							continue;
-						case 'B':		// bold start
-							*to++ = '<';
-							*to++ = 'B';
-							*to++ = '>';
-							continue;
-						case 'b':		// bold end
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'B';
-							*to++ = '>';
-							continue;
-						case 'R':		// words of Jesus begin
-							*to++ = '<';
-							*to++ = 'F';
-							*to++ = 'O';
-							*to++ = 'N';
-							*to++ = 'T';
-							*to++ = ' ';
-							*to++ = 'C';
-							*to++ = 'O';
-							*to++ = 'L';
-							*to++ = 'O';
-							*to++ = 'R';
-							*to++ = '=';
-							*to++ = '#';
-							*to++ = 'F';
-							*to++ = 'F';
-							*to++ = '0';
-							*to++ = '0';
-							*to++ = '0';
-							*to++ = '0';
-							*to++ = '>';
-							continue;
-						case 'r':		// words of Jesus end
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'F';
-							*to++ = 'O';
-							*to++ = 'N';
-							*to++ = 'T';
-							*to++ = '>';
-							continue;
-						case 'U':		// Underline start
-							*to++ = '<';
-							*to++ = 'U';
-							*to++ = '>';
-							continue;
-							case 'u':		// Underline end
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'U';
-							*to++ = '>';
-							continue;
-						case 'O':		// Old Testament quote begin
-							*to++ = '<';
-							*to++ = 'C';
-							*to++ = 'I';
-							*to++ = 'T';
-							*to++ = 'E';
-							*to++ = '>';
-							continue;
-						case 'o':		// Old Testament quote end
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'C';
-							*to++ = 'I';
-							*to++ = 'T';
-							*to++ = 'E';
-							*to++ = '>';
-							continue;
-						case 'S':		// Superscript begin
-							*to++ = '<';
-							*to++ = 'S';
-							*to++ = 'U';
-							*to++ = 'P';
-							*to++ = '>';
-							continue;
-						case 's':		// Superscript end
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'S';
-							*to++ = 'U';
-							*to++ = 'P';
-							*to++ = '>';
-							continue;
-						case 'V':		// Subscript begin
-							*to++ = '<';
-							*to++ = 'S';
-							*to++ = 'U';
-	  						*to++ = 'B';
-							*to++ = '>';
-							continue;
-						case 'v':		// Subscript end
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'S';
-							*to++ = 'U';
-							*to++ = 'B';
-							*to++ = '>';
-							continue;
-					}
-					break;
-				case 'C':			// special character tags
-					switch(token[1])
-					{
-						case 'A':               // ASCII value
-							*to++ = (char)atoi(&token[2]);
-							continue;
-						case 'G':
-							//*to++ = ' ';
-							continue;
-						case 'L':               // line break
-							*to++ = '<';
-							*to++ = 'B';
-							*to++ = 'R';
-							*to++ = '>';
-							*to++ = ' ';
-							continue;
-						case 'M':               // new paragraph
-							*to++ = '<';
-							*to++ = 'B';
-							*to++ = 'R';
-							*to++ = '>';
-							continue;
-						case 'T':
-							//*to++ = ' ';
-							continue;
-					}
-					break;
-				case 'J':	//Justification
-					switch(token[1]) 
-					{
-						case 'R':	//right
-							*to++ = '<';
-							*to++ = 'D';
-							*to++ = 'I';
-							*to++ = 'V';
-							*to++ = ' ';
-							*to++ = 'A';
-							*to++ = 'L';
-							*to++ = 'I';
-							*to++ = 'G';
-							*to++ = 'N';
-							*to++ = '=';
-							*to++ = '\"';
-							*to++ = 'R';
-							*to++ = 'I';
-							*to++ = 'G';
-							*to++ = 'H';
-							*to++ = 'T';
-							*to++ = '\"';
-							*to++ = '>';
-							isRightJustified = true;
-							continue;
-	
-						case 'C':	//center
-							*to++ = '<';
-							*to++ = 'D';
-							*to++ = 'I';
-							*to++ = 'V';
-							*to++ = ' ';
-							*to++ = 'A';
-							*to++ = 'L';
-							*to++ = 'I';
-							*to++ = 'G';
-							*to++ = 'N';
-							*to++ = '=';
-							*to++ = '\"';
-							*to++ = 'C';
-							*to++ = 'E';
-							*to++ = 'N';
-							*to++ = 'T';
-							*to++ = 'E';
-							*to++ = 'R';
-							*to++ = '\"';
-							*to++ = '>';
-							isCentered = true;
-							continue;
-	
-						case 'L': //left, reset right and center
-							if (isCentered) {
-								*to++ = '<';
-								*to++ = '/';
-								*to++ = 'C';
-								*to++ = 'E';
-								*to++ = 'N';
-								*to++ = 'T';
-								*to++ = 'E';
-								*to++ = 'R';
-								*to++ = '>';
-								isCentered = false;
-							}
-							if (isRightJustified) {
-								*to++ = '<';
-								*to++ = '/';
-								*to++ = 'D';
-								*to++ = 'I';
-								*to++ = 'V';
-								*to++ = '>';
-								isRightJustified = false;
-							}
-							continue;
-					}
-					break;
-				case 'T':			// title formatting
-					switch(token[1])
-					{
-						case 'T':               // Book title begin
-							*to++ = '<';
-							*to++ = 'B';
-							*to++ = 'I';
-							*to++ = 'G';
-							*to++ = '>';
-							continue;
-						case 't':
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'B';
-							*to++ = 'I';
-							*to++ = 'G';
-							*to++ = '>';
-							continue;
-					}
-					break;
-	
-				case 'P': // special formatting
-					switch(token[1])
-					{
-						case 'P': // Poetry begin
-							*to++ = '<';
-							*to++ = 'C';
-							*to++ = 'I';
-							*to++ = 'T';
-							*to++ = 'E';
-							*to++ = '>';
-							continue;
-						case 'p':
-							*to++ = '<';
-							*to++ = '/';
-							*to++ = 'C';
-							*to++ = 'I';
-							*to++ = 'T';
-							*to++ = 'E';
-							*to++ = '>';
-							continue;
-					}
-					break;
+				case 'L':	//left, reset right and center
+					if (isCentered) {
+						strcpy(to,"</CENTER>");
+						to += strlen(to);
+						isCentered = false;
+					}
+					if (isRightJustified) {
+						strcpy(to,"</DIV>");
+						to += strlen(to);
+						isRightJustified = false;
+					}
+					continue;
+				}
+				break;
+			case 'T':	// title formatting
+				switch (token[1]) {
+				case 'T':	// Book title begin
+					strcpy(to,"<BIG>");
+					to += strlen(to);
+					continue;
+				case 't':
+					strcpy(to,"</BIG>");
+					to += strlen(to);
+					continue;
+				}
+				break;
+
+			case 'P':	// special formatting
+				switch (token[1]) {
+				case 'P':	// Poetry begin
+					strcpy(to,"<CITE>");
+					to += strlen(to);
+					continue;
+				case 'p':
+					strcpy(to,"</CITE>");
+					to += strlen(to);
+					continue;
+				}
+				break;
 			}
 			continue;
 		}
 		if (intoken) {
-		 	if (tokpos < 2047) {
-		 		token[tokpos] = *from;
-		 		tokpos++;
-		 	}
-		 }
-		else
+			if (tokpos < 2047) {
+				token[tokpos] = *from;
+				tokpos++;
+			}
+		} else
 			*to++ = *from;
 	}
 	*to = 0;

--=-rOIeWTjeJh3GisOXbeju--