[jsword-svn] common/java/core/org/crosswire/common/xml s

jswordcvs at crosswire.org jswordcvs at crosswire.org
Tue May 24 18:12:33 MST 2005


Update of /cvs/jsword/common/java/core/org/crosswire/common/xml
In directory www.crosswire.org:/tmp/cvs-serv11090/java/core/org/crosswire/common/xml

Modified Files:
	XMLUtil.java 
Log Message:
Fixed a problem of character entity references causing a parsing error.

Index: XMLUtil.java
===================================================================
RCS file: /cvs/jsword/common/java/core/org/crosswire/common/xml/XMLUtil.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -C2 -d -r1.13 -r1.14
*** XMLUtil.java	17 May 2005 00:47:01 -0000	1.13
--- XMLUtil.java	25 May 2005 01:12:31 -0000	1.14
***************
*** 24,27 ****
--- 24,28 ----
  import java.io.IOException;
  import java.io.InputStream;
+ import java.util.regex.Pattern;
  
  import org.crosswire.common.util.FileUtil;
***************
*** 148,152 ****
          int cleanfrom = 0;
  
-     allEntities:
          while (true)
          {
--- 149,152 ----
***************
*** 156,168 ****
              if (amp == -1)
              {
!                 break allEntities;
              }
  
              // Check for chars that should not be in an entity name
              int i = amp + 1;
-         singleEntity:
              while (true)
              {
!                 // if we are at the end of the string the disgard from the & on
                  if (i >= working.length())
                  {
--- 156,174 ----
              if (amp == -1)
              {
!                 break;
!             }
!             
!             // Skip references of the kind &#ddd;
!             if (validCharacterEntityPattern.matcher(working.substring(amp)).find())
!             {
!                 cleanfrom = working.indexOf(';', amp) + 1;
!                 continue;
              }
  
              // Check for chars that should not be in an entity name
              int i = amp + 1;
              while (true)
              {
!                 // if we are at the end of the string the discard from the & on
                  if (i >= working.length())
                  {
***************
*** 172,181 ****
  
                      working = working.substring(0, amp) + replace;
!                     break singleEntity;
                  }
  
                  // if we have come to an ; then we just have an entity that isn't
                  // properly declared, (or maybe it is but something else is
!                 // broken) so disgard it
                  char c = working.charAt(i);
                  if (c == ';')
--- 178,187 ----
  
                      working = working.substring(0, amp) + replace;
!                     break;
                  }
  
                  // if we have come to an ; then we just have an entity that isn't
                  // properly declared, (or maybe it is but something else is
!                 // broken) so discard it
                  char c = working.charAt(i);
                  if (c == ';')
***************
*** 186,190 ****
  
                      working = working.substring(0, amp) + replace + working.substring(i + 1);
!                     break singleEntity;
                  }
  
--- 192,196 ----
  
                      working = working.substring(0, amp) + replace + working.substring(i + 1);
!                     break;
                  }
  
***************
*** 198,202 ****
  
                      working = working.substring(0, amp) + replace + working.substring(i);
!                     break singleEntity;
                  }
  
--- 204,208 ----
  
                      working = working.substring(0, amp) + replace + working.substring(i);
!                     break;
                  }
  
***************
*** 369,371 ****
--- 375,379 ----
       */
      protected static final Logger log = Logger.getLogger(XMLUtil.class);
+ 
+     private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$
  }



More information about the jsword-svn mailing list