[jsword-svn] r1336 - trunk/common/src/main/java/org/crosswire/common/diff

dmsmith at www.crosswire.org dmsmith at www.crosswire.org
Mon May 21 13:43:40 MST 2007


Author: dmsmith
Date: 2007-05-21 13:43:38 -0700 (Mon, 21 May 2007)
New Revision: 1336

Added:
   trunk/common/src/main/java/org/crosswire/common/diff/PatchEntry.java
Modified:
   trunk/common/src/main/java/org/crosswire/common/diff/Diff.java
   trunk/common/src/main/java/org/crosswire/common/diff/Match.java
   trunk/common/src/main/java/org/crosswire/common/diff/Patch.java
Log:


Modified: trunk/common/src/main/java/org/crosswire/common/diff/Diff.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/Diff.java	2007-05-21 11:37:00 UTC (rev 1335)
+++ trunk/common/src/main/java/org/crosswire/common/diff/Diff.java	2007-05-21 20:43:38 UTC (rev 1336)
@@ -22,8 +22,12 @@
 package org.crosswire.common.diff;
 
 import java.util.List;
+import java.util.ListIterator;
 import java.util.Map;
+import java.util.Stack;
 
+import org.crosswire.common.util.IntStack;
+
 /**
  * Computes the difference between two texts to create a patch.
  * Applies the patch onto another text, allowing for errors.
@@ -43,20 +47,20 @@
     ////If checklines is present and false, then don't run a line-level diff first to identify the changed areas.
     ////Check for equality (speedup)
 //if (text1 == text2)
-//return [[DIFF_EQUAL, text1]];
+//return [new Difference(EditType.EQUAL, text1]];
 //
 //if (typeof checklines == 'undefined')
 //checklines = true;
 //
 //var a;
     ////Trim off common prefix (speedup)
-//a = diff_prefix(text1, text2);
+//a = Diff.prefix(text1, text2);
 //text1 = a[0];
 //text2 = a[1];
 //var commonprefix = a[2];
 //
     ////Trim off common suffix (speedup)
-//a = diff_suffix(text1, text2);
+//a = Diff.suffix(text1, text2);
 //text1 = a[0];
 //text2 = a[1];
 //var commonsuffix = a[2];
@@ -66,19 +70,19 @@
 //var shorttext = text1.length > text2.length ? text2 : text1;
 //
 //if (!text1) {  // Just add some text (speedup)
-//diff = [[DIFF_INSERT, text2]];
+//diff = [new Difference(EditType.INSERT, text2]];
 //} else if (!text2) { // Just delete some text (speedup)
-//diff = [[DIFF_DELETE, text1]];
+//diff = [new Difference(EditType.DELETE, text1]];
 //} else if ((i = longtext.indexOf(shorttext)) != -1) {
 //// Shorter text is inside the longer text (speedup)
-//diff = [[DIFF_INSERT, longtext.substring(0, i)], [DIFF_EQUAL, shorttext], [DIFF_INSERT, longtext.substring(i+shorttext.length)]];
+//diff = [new Difference(EditType.INSERT, longtext.substring(0, i)], new Difference(EditType.EQUAL, shorttext], new Difference(EditType.INSERT, longtext.substring(i+shorttext.length)]];
 //// Swap insertions for deletions if diff is reversed.
 //if (text1.length > text2.length)
-//  diff[0][0] = diff[2][0] = DIFF_DELETE;
+//  diff[0][0] = diff[2][0] = EditType.DELETE;
 //} else {
 //longtext = shorttext = null; // Garbage collect
 //// Check to see if the problem can be split in two.
-//var hm = diff_halfmatch(text1, text2);
+//var hm = Diff.halfmatch(text1, text2);
 //if (hm) {
 //  // A half-match was found, sort out the return data.
 //  var text1_a = hm[0];
@@ -87,46 +91,46 @@
 //  var text2_b = hm[3];
 //  var mid_common = hm[4];
 //  // Send both pairs off for separate processing.
-//  var diff_a = diff_main(text1_a, text2_a, checklines);
-//  var diff_b = diff_main(text1_b, text2_b, checklines);
+//  var diff_a = Diff.main(text1_a, text2_a, checklines);
+//  var diff_b = Diff.main(text1_b, text2_b, checklines);
 //  // Merge the results.
-//  diff = diff_a.concat([[DIFF_EQUAL, mid_common]], diff_b);
+//  diff = diff_a.concat([new Difference(EditType.EQUAL, mid_common]], diff_b);
 //} else {
 //  // Perform a real diff.
 //  if (checklines && text1.length + text2.length < 250)
 //    checklines = false; // Too trivial for the overhead.
 //  if (checklines) {
 //    // Scan the text on a line-by-line basis first.
-//    a = diff_lines2chars(text1, text2);
+//    a = Diff.lines2chars(text1, text2);
 //    text1 = a[0];
 //    text2 = a[1];
 //    var linearray = a[2];
 //  }
-//  diff = diff_map(text1, text2);
+//  diff = Diff.map(text1, text2);
 //  if (!diff) // No acceptable result.
-//    diff = [[DIFF_DELETE, text1], [DIFF_INSERT, text2]];
+//    diff = [new Difference(EditType.DELETE, text1], new Difference(EditType.INSERT, text2]];
 //  if (checklines) {
-//    diff_chars2lines(diff, linearray); // Convert the diff back to original text.
-//    diff_cleanup_semantic(diff); // Eliminate freak matches (e.g. blank lines)
+//    Diff.chars2lines(diff, linearray); // Convert the diff back to original text.
+//    Diff.cleanup_semantic(diff); // Eliminate freak matches (e.g. blank lines)
 //
 //    // Rediff any replacement blocks, this time on character-by-character basis.
-//    diff.push([DIFF_EQUAL, '']);  // Add a dummy entry at the end.
+//    diff.push(new Difference(EditType.EQUAL, ""]);  // Add a dummy entry at the end.
 //    var pointer = 0;
 //    var count_delete = 0;
 //    var count_insert = 0;
-//    var text_delete = '';
-//    var text_insert = '';
+//    var text_delete = "";
+//    var text_insert = "";
 //    while(pointer < diff.length) {
-//      if (diff[pointer][0] == DIFF_INSERT) {
+//      if (diff[pointer][0] == EditType.INSERT) {
 //        count_insert++;
 //        text_insert += diff[pointer][1];
-//      } else if (diff[pointer][0] == DIFF_DELETE) {
+//      } else if (diff[pointer][0] == EditType.DELETE) {
 //        count_delete++;
 //        text_delete += diff[pointer][1];
 //      } else {  // Upon reaching an equality, check for prior redundancies.
 //        if (count_delete >= 1 && count_insert >= 1) {
 //          // Delete the offending records and add the merged ones.
-//          a = diff_main(text_delete, text_insert, false);
+//          a = Diff.main(text_delete, text_insert, false);
 //          diff.splice(pointer - count_delete - count_insert, count_delete + count_insert);
 //          pointer = pointer - count_delete - count_insert;
 //          for (i=a.length-1; i>=0; i--)
@@ -135,8 +139,8 @@
 //        }
 //        count_insert = 0;
 //        count_delete = 0;
-//        text_delete = '';
-//        text_insert = '';
+//        text_delete = "";
+//        text_insert = "";
 //      }
 //      pointer++;
 //    }
@@ -147,10 +151,10 @@
 //}
 //
 //if (commonprefix)
-//diff.unshift([DIFF_EQUAL, commonprefix]);
+//diff.unshift(new Difference(EditType.EQUAL, commonprefix]);
 //if (commonsuffix)
-//diff.push([DIFF_EQUAL, commonsuffix]);
-//diff_cleanup_merge(diff);
+//diff.push(new Difference(EditType.EQUAL, commonsuffix]);
+//Diff.cleanup_merge(diff);
 //return diff;
 }
 
@@ -165,12 +169,12 @@
 //
 ////"\x00" is a valid JavaScript character, but the Venkman debugger doesn't like it (bug 335098)
 ////So we'll insert a junk entry to avoid generating a null character.
-//linearray.push('');
+//linearray.push("");
 //
-//function diff_lines2chars_munge(text) {
+//function lines2chars_munge(text) {
 //// My first ever closure!
 //var i, line;
-//var chars = '';
+//var chars = "";
 //while (text) {
 //  i = text.indexOf('\n');
 //  if (i == -1)
@@ -188,25 +192,31 @@
 //return chars;
 //}
 //
-//var chars1 = diff_lines2chars_munge(text1);
-//var chars2 = diff_lines2chars_munge(text2);
+//var chars1 = Diff.lines2chars_munge(text1);
+//var chars2 = Diff.lines2chars_munge(text2);
 //return [chars1, chars2, linearray];
 }
 
 
   //Rehydrate the text in a diff from a string of line hashes to real lines of text.
-  public static void chars2lines(List diff, List linearray) {
-//var chars, text;
-//for (var x=0; x<diff.length; x++) {
-//chars = diff[x][1];
-//text = '';
-//for (var y=0; y<chars.length; y++)
-//  text += linearray[chars.charCodeAt(y)];
-//diff[x][1] = text;
-//}
-}
+  public static void chars2lines(List diffs, List linearray) {
+      String chars;
+      StringBuffer text = new StringBuffer();
+      for (int x = 0; x < diffs.size(); x++)
+      {
+          Difference diff = (Difference) diffs.get(x);
+          chars = diff.getText();
 
+          for (int y = 0; y < chars.length(); y++)
+          {
+              text.append(linearray.get(chars.charAt(y)));
+          }
 
+          diff.setText(text.toString());
+      }
+  }
+
+
   //Explore the intersection points between the two texts.
   public static String map(String text1, String text2)
   {
@@ -258,8 +268,8 @@
 //  if (done) {
 //    // Front path ran over reverse path.
 //    v_map2 = v_map2.slice(0, footsteps[footstep]+1);
-//    var a = diff_path1(v_map1, text1.substring(0, x), text2.substring(0, y));
-//    return a.concat(diff_path2(v_map2, text1.substring(x), text2.substring(y)));
+//    var a = Diff.path1(v_map1, text1.substring(0, x), text2.substring(0, y));
+//    return a.concat(Diff.path2(v_map2, text1.substring(x), text2.substring(y)));
 //  }
 //}
 //
@@ -289,8 +299,8 @@
 //  if (done) {
 //    // Reverse path ran over front path.
 //    v_map1 = v_map1.slice(0, footsteps[footstep]+1);
-//    var a = diff_path1(v_map1, text1.substring(0, text1.length-x), text2.substring(0, text2.length-y));
-//    return a.concat(diff_path2(v_map2, text1.substring(text1.length-x), text2.substring(text2.length-y)));
+//    var a = Diff.path1(v_map1, text1.substring(0, text1.length-x), text2.substring(0, text2.length-y));
+//    return a.concat(Diff.path2(v_map2, text1.substring(text1.length-x), text2.substring(text2.length-y)));
 //  }
 //}
 //}
@@ -310,30 +320,30 @@
 //while(1) {
 //  if (v_map[d].hasOwnProperty ? v_map[d].hasOwnProperty((x-1)+","+y) : (v_map[d][(x-1)+","+y] !== undefined)) {
 //    x--;
-//    if (last_op === DIFF_DELETE)
+//    if (last_op === EditType.DELETE)
 //      path[0][1] = text1.charAt(x) + path[0][1];
 //    else
-//      path.unshift([DIFF_DELETE, text1.charAt(x)]);
-//    last_op = DIFF_DELETE;
+//      path.unshift(new Difference(EditType.DELETE, text1.charAt(x)]);
+//    last_op = EditType.DELETE;
 //    break;
 //  } else if (v_map[d].hasOwnProperty ? v_map[d].hasOwnProperty(x+","+(y-1)) : (v_map[d][x+","+(y-1)] !== undefined)) {
 //    y--;
-//    if (last_op === DIFF_INSERT)
+//    if (last_op === EditType.INSERT)
 //      path[0][1] = text2.charAt(y) + path[0][1];
 //    else
-//      path.unshift([DIFF_INSERT, text2.charAt(y)]);
-//    last_op = DIFF_INSERT;
+//      path.unshift(new Difference(EditType.INSERT, text2.charAt(y)]);
+//    last_op = EditType.INSERT;
 //    break;
 //  } else {
 //    x--;
 //    y--;
 //    //if (text1.charAt(x) != text2.charAt(y))
-//    //  return alert("No diagonal.  Can't happen. (diff_path1)");
-//    if (last_op === DIFF_EQUAL)
+//    //  return alert("No diagonal.  Can't happen. (Diff.path1)");
+//    if (last_op === EditType.EQUAL)
 //      path[0][1] = text1.charAt(x) + path[0][1];
 //    else
-//      path.unshift([DIFF_EQUAL, text1.charAt(x)]);
-//    last_op = DIFF_EQUAL;
+//      path.unshift(new Difference(EditType.EQUAL, text1.charAt(x)]);
+//    last_op = EditType.EQUAL;
 //  }
 //}
 //}
@@ -341,7 +351,7 @@
 }
 
 
-  ////Work from the middle back to the end to determine the path.
+  // Work from the middle back to the end to determine the path.
 public static List path2(Map v_map, String text1, String text2) {
     return null;
 //var path = [];
@@ -352,308 +362,466 @@
 //while(1) {
 //  if (v_map[d].hasOwnProperty ? v_map[d].hasOwnProperty((x-1)+","+y) : (v_map[d][(x-1)+","+y] !== undefined)) {
 //    x--;
-//    if (last_op === DIFF_DELETE)
+//    if (last_op === EditType.DELETE)
 //      path[path.length-1][1] += text1.charAt(text1.length-x-1);
 //    else
-//      path.push([DIFF_DELETE, text1.charAt(text1.length-x-1)]);
-//    last_op = DIFF_DELETE;
+//      path.push(new Difference(EditType.DELETE, text1.charAt(text1.length-x-1)]);
+//    last_op = EditType.DELETE;
 //    break;
 //  } else if (v_map[d].hasOwnProperty ? v_map[d].hasOwnProperty(x+","+(y-1)) : (v_map[d][x+","+(y-1)] !== undefined)) {
 //    y--;
-//    if (last_op === DIFF_INSERT)
+//    if (last_op === EditType.INSERT)
 //      path[path.length-1][1] += text2.charAt(text2.length-y-1);
 //    else
-//      path.push([DIFF_INSERT, text2.charAt(text2.length-y-1)]);
-//    last_op = DIFF_INSERT;
+//      path.push(new Difference(EditType.INSERT, text2.charAt(text2.length-y-1)]);
+//    last_op = EditType.INSERT;
 //    break;
 //  } else {
 //    x--;
 //    y--;
 //    //if (text1.charAt(text1.length-x-1) != text2.charAt(text2.length-y-1))
-//    //  return alert("No diagonal.  Can't happen. (diff_path2)");
-//    if (last_op === DIFF_EQUAL)
+//    //  return alert("No diagonal.  Can't happen. (Diff.path2)");
+//    if (last_op === EditType.EQUAL)
 //      path[path.length-1][1] += text1.charAt(text1.length-x-1);
 //    else
-//      path.push([DIFF_EQUAL, text1.charAt(text1.length-x-1)]);
-//    last_op = DIFF_EQUAL;
+//      path.push(new Difference(EditType.EQUAL, text1.charAt(text1.length-x-1)]);
+//    last_op = EditType.EQUAL;
 //  }
 //}
 //}
 //return path;
 }
 
-//Trim off common prefix
-public static void prefix(String text1, String text2) {
-//var pointermin = 0;
-//var pointermax = Math.min(text1.length, text2.length);
-//var pointermid = pointermax;
-//while(pointermin < pointermid) {
-//if (text1.substring(0, pointermid) == text2.substring(0, pointermid))
-//  pointermin = pointermid;
-//else
-//  pointermax = pointermid;
-//pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin);
-//}
-//var commonprefix = text1.substring(0, pointermid);
-//text1 = text1.substring(pointermid);
-//text2 = text2.substring(pointermid);
-//return [text1, text2, commonprefix];
+// Trim off common prefix
+public static CommonEnd prefix(String text1, String text2) {
+    int pointermin = 0;
+    int pointermax = Math.min(text1.length(), text2.length());
+    int pointermid = pointermax;
+    while (pointermin < pointermid)
+    {
+        if (text1.substring(0, pointermid) == text2.substring(0, pointermid))
+        {
+            pointermin = pointermid;
+        }
+        else
+        {
+            pointermax = pointermid;
+        }
+        pointermid = (int) Math.floor((pointermax - pointermin) / 2 + pointermin);
+    }
+    String commonprefix = text1.substring(0, pointermid);
+    String left = text1.substring(pointermid);
+    String right = text2.substring(pointermid);
+    return new CommonPrefix(left, right, commonprefix);
 }
 
 
     // Trim off common suffix
-public static void suffix(String text1, String text2)
+public static CommonEnd suffix(String text1, String text2)
 {
-//var pointermin = 0;
-//var pointermax = Math.min(text1.length, text2.length);
-//var pointermid = pointermax;
-//while(pointermin < pointermid) {
-//if (text1.substring(text1.length-pointermid) == text2.substring(text2.length-pointermid))
-//  pointermin = pointermid;
-//else
-//  pointermax = pointermid;
-//pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin);
-//}
-//var commonsuffix = text1.substring(text1.length-pointermid);
-//text1 = text1.substring(0, text1.length-pointermid);
-//text2 = text2.substring(0, text2.length-pointermid);
-//return [text1, text2, commonsuffix];
+    int pointermin = 0;
+    int pointermax = Math.min(text1.length(), text2.length());
+    int pointermid = pointermax;
+    while (pointermin < pointermid)
+    {
+        if (text1.substring(text1.length() - pointermid) == text2.substring(text2.length() - pointermid))
+        {
+            pointermin = pointermid;
+        }
+        else
+        {
+            pointermax = pointermid;
+        }
+        pointermid = (int) Math.floor((pointermax - pointermin) / 2 + pointermin);
+    }
+    String commonsuffix = text1.substring(text1.length() - pointermid);
+    String left = text1.substring(0, text1.length() - pointermid);
+    String right = text2.substring(0, text2.length() - pointermid);
+    return new CommonSuffix(left, right, commonsuffix);
 }
 
 
-    // Do the two texts share a substring which is at least half the length of the longer text?
-    public static void halfmatch(String text1, String text2) {
-//var longtext = text1.length > text2.length ? text1 : text2;
-//var shorttext = text1.length > text2.length ? text2 : text1;
-//if (longtext.length < 10 || shorttext.length < 1)
-//return null; // Pointless.
-//
-//function diff_halfmatch_i(longtext, shorttext, i) {
-//// Start with a 1/4 length substring at position i as a seed.
-//var seed = longtext.substring(i, i+Math.floor(longtext.length/4));
-//var j = -1;
-//var best_common = '';
-//var best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b;
-//while ((j = shorttext.indexOf(seed, j+1)) != -1) {
-//  var my_prefix = diff_prefix(longtext.substring(i), shorttext.substring(j));
-//  var my_suffix = diff_suffix(longtext.substring(0, i), shorttext.substring(0, j));
-//  if (best_common.length < (my_suffix[2] + my_prefix[2]).length) {
-//    best_common = my_suffix[2] + my_prefix[2];
-//    best_longtext_a = my_suffix[0];
-//    best_longtext_b = my_prefix[0];
-//    best_shorttext_a = my_suffix[1];
-//    best_shorttext_b = my_prefix[1];
-//  }
-//}
-//if (best_common.length >= longtext.length/2)
-//  return [best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common];
-//else
-//  return null;
-//}
-//
-////First check if the second quarter is the seed for a half-match.
-//var hm1 = diff_halfmatch_i(longtext, shorttext, Math.ceil(longtext.length/4));
-////Check again based on the third quarter.
-//var hm2 = diff_halfmatch_i(longtext, shorttext, Math.ceil(longtext.length/2));
-//var hm;
-//if (!hm1 && !hm2)
-//return null;
-//else if (!hm2)
-//hm = hm1;
-//else if (!hm1)
-//hm = hm2;
-//else // Both matched.  Select the longest.
-//hm = hm1[4].length > hm2[4].length ? hm1 : hm2;
-//
-////A half-match was found, sort out the return data.
-//if (text1.length > text2.length) {
-//var text1_a = hm[0];
-//var text1_b = hm[1];
-//var text2_a = hm[2];
-//var text2_b = hm[3];
-//} else {
-//var text2_a = hm[0];
-//var text2_b = hm[1];
-//var text1_a = hm[2];
-//var text1_b = hm[3];
-//}
-//var mid_common = hm[4];
-//return [text1_a, text1_b, text2_a, text2_b, mid_common];
-}
+// Do the two texts share a substring which is at least half the length of the longer text?
+public static CommonMiddle halfmatch(String text1, String text2)
+{
+    String longtext = text1.length() > text2.length() ? text1 : text2;
+    String shorttext = text1.length() > text2.length() ? text2 : text1;
+    if (longtext.length() < 10 || shorttext.length() < 1)
+    {
+        return null; // Pointless.
+    }
 
+    // First check if the second quarter is the seed for a half-match.
+    CommonMiddle hm1 = halfmatch_i(longtext, shorttext, (int) Math.ceil(longtext.length()/4));
+    // Check again based on the third quarter.
+    CommonMiddle hm2 = halfmatch_i(longtext, shorttext, (int) Math.ceil(longtext.length()/2));
+    CommonMiddle hm = null;
+    if (hm1 == null && hm2 == null)
+    {
+        return null;
+    }
+    else if (hm2 == null)
+    {
+        hm = hm1;
+    }
+    else if (hm1 == null)
+    {
+        hm = hm2;
+    }
+    else // Both matched.  Select the longest.
+    {
+        hm = hm1.getCommonality().length() > hm2.getCommonality().length() ? hm1 : hm2;
+    }
 
-////Reduce the number of edits by eliminating semantically trivial equalities.
-public static void cleanup_semantic(List diff) {
-//var changes = false;
-//var equalities = []; // Stack of indices where equalities are found.
-//var lastequality = null; // Always equal to equalities[equalities.length-1][1]
-//var pointer = 0; // Index of current position.
-//var length_changes1 = 0; // Number of characters that changed prior to the equality.
-//var length_changes2 = 0; // Number of characters that changed after the equality.
-//while (pointer < diff.length) {
-//if (diff[pointer][0] == DIFF_EQUAL) { // equality found
-//  equalities.push(pointer);
-//  length_changes1 = length_changes2;
-//  length_changes2 = 0;
-//  lastequality = diff[pointer][1];
-//} else { // an insertion or deletion
-//  length_changes2 += diff[pointer][1].length;
-//  if (lastequality != null && (lastequality.length <= length_changes1) && (lastequality.length <= length_changes2)) {
-//    //alert("Splitting: '"+lastequality+"'");
-//    diff.splice(equalities[equalities.length-1], 0, [DIFF_DELETE, lastequality]); // Duplicate record
-//    diff[equalities[equalities.length-1]+1][0] = DIFF_INSERT; // Change second copy to insert.
-//    equalities.pop();  // Throw away the equality we just deleted;
-//    equalities.pop();  // Throw away the previous equality;
-//    pointer = equalities.length ? equalities[equalities.length-1] : -1;
-//    length_changes1 = 0; // Reset the counters.
-//    length_changes2 = 0;
-//    lastequality = null;
-//    changes = true;
-//  }
-//}
-//pointer++;
-//}
-//
-//if (changes)
-//diff_cleanup_merge(diff);
+    String text1_a = ""; //$NON-NLS-1$
+    String text1_b = ""; //$NON-NLS-1$
+    String text2_a = ""; //$NON-NLS-1$
+    String text2_b = ""; //$NON-NLS-1$
+
+    // A half-match was found, sort out the return data.
+    if (text1.length() > text2.length())
+    {
+        text1_a = hm.getLeftStart();
+        text1_b = hm.getRightStart();
+        text2_a = hm.getLeftEnd();
+        text2_b = hm.getRightEnd();
+    }
+    else
+    {
+        text2_a = hm.getLeftStart();
+        text2_b = hm.getRightStart();
+        text1_a = hm.getRightEnd();
+        text1_b = hm.getRightEnd();
+    }
+    String mid_common = hm.getCommonality();
+    return new CommonMiddle(text1_a, text1_b, mid_common, text2_a, text2_b);
 }
 
+    private static CommonMiddle halfmatch_i(String longtext, String shorttext, int i)
+    {
+        // Start with a 1/4 length substring at position i as a seed.
+        String seed = longtext.substring(i, i + (int) Math.floor(longtext.length()/4));
+        int j = -1;
+        String best_common = ""; //$NON-NLS-1$
+        String best_longtext_a = ""; //$NON-NLS-1$
+        String best_longtext_b = ""; //$NON-NLS-1$
+        String best_shorttext_a = ""; //$NON-NLS-1$
+        String best_shorttext_b = ""; //$NON-NLS-1$
+        while ((j = shorttext.indexOf(seed, j + 1)) != -1)
+        {
+            CommonEnd my_prefix = Diff.prefix(longtext.substring(i), shorttext.substring(j));
+            CommonEnd my_suffix = Diff.suffix(longtext.substring(0, i), shorttext.substring(0, j));
+            if (best_common.length() < (my_suffix.getCommonality() + my_prefix.getCommonality()).length())
+            {
+                best_common = my_suffix.getCommonality() + my_prefix.getCommonality();
+                best_longtext_a = my_suffix.getLeft();
+                best_longtext_b = my_prefix.getLeft();
+                best_shorttext_a = my_suffix.getRight();
+                best_shorttext_b = my_prefix.getRight();
+            }
+        }
 
+        if (best_common.length() >= longtext.length()/2)
+        {
+            return new CommonMiddle(best_longtext_a, best_longtext_b, best_common, best_shorttext_a, best_shorttext_b);
+        }
+
+        return null;
+    }
+
+
+    //Reduce the number of edits by eliminating semantically trivial equalities.
+    public static void cleanup_semantic(List diffs)
+    {
+        boolean changes = false;
+        Stack equalities = new Stack(); // Stack of indices where equalities are found.
+        String lastequality = ""; //$NON-NLS-1$ // Always equal to diff[equalities[equalities.length-1]].getText()
+        int length_changes1 = 0; // Number of characters that changed prior to the equality.
+        int length_changes2 = 0; // Number of characters that changed after the equality.
+        ListIterator pointer = diffs.listIterator();
+        Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
+        while (curDiff != null)
+        {
+            EditType editType =  curDiff.getEditType();
+            if (EditType.EQUAL.equals(editType))
+            {
+                // equality found
+                equalities.push(curDiff);
+                length_changes1 = length_changes2;
+                length_changes2 = 0;
+                lastequality = curDiff.getText();
+            }
+            else
+            {
+                // an insertion or deletion
+                length_changes2 += curDiff.getText().length();
+                int lastLen = lastequality.length();
+                if (lastequality != null && (lastLen <= length_changes1) && (lastLen <= length_changes2))
+                {
+                    // position pointer to the element after the one at the end of the stack
+                    while (curDiff != equalities.lastElement())
+                    {
+                        curDiff = (Difference) pointer.previous();
+                    }
+                    pointer.next();
+
+                    // Replace equality with a delete.
+                    pointer.set(new Difference(EditType.DELETE, lastequality));
+                    // Insert a coresponding an insert.
+                    pointer.add(new Difference(EditType.INSERT, lastequality));
+                    equalities.pop();  // Throw away the equality we just deleted;
+                    if (!equalities.empty())
+                    {
+                        // Throw away the previous equality (it needs to be reevaluated).
+                        equalities.pop();
+                    }
+                    if (equalities.empty())
+                    {
+                        // There are no previous equalities, walk back to the start.
+                        while (pointer.hasPrevious())
+                        {
+                          pointer.previous();
+                        }
+                    }
+                    else
+                    {
+                        // There is a safe equality we can fall back to.
+                        curDiff = (Difference) equalities.lastElement();
+                        while (curDiff != pointer.previous())
+                        {
+                          // Intentionally empty loop.
+                        }
+                    }
+
+                    length_changes1 = 0; // Reset the counters.
+                    length_changes2 = 0;
+                    lastequality = null;
+                    changes = true;
+                }
+            }
+            curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
+        }
+
+        if (changes)
+        {
+            Diff.cleanup_merge(diffs);
+        }
+    }
+
+
     // Reduce the number of edits by eliminating operationally trivial equalities.
-public static void cleanup_efficiency(List diff)
-{
-//var changes = false;
-//var equalities = []; // Stack of indices where equalities are found.
-//var lastequality = ''; // Always equal to equalities[equalities.length-1][1]
-//var pointer = 0; // Index of current position.
-//var pre_ins = false; // Is there an insertion operation before the last equality.
-//var pre_del = false; // Is there an deletion operation before the last equality.
-//var post_ins = false; // Is there an insertion operation after the last equality.
-//var post_del = false; // Is there an deletion operation after the last equality.
-//while (pointer < diff.length) {
-//if (diff[pointer][0] == DIFF_EQUAL) { // equality found
-//  if (diff[pointer][1].length < DIFF_EDIT_COST && (post_ins || post_del)) {
-//    // Candidate found.
-//    equalities.push(pointer);
-//    pre_ins = post_ins;
-//    pre_del = post_del;
-//    lastequality = diff[pointer][1];
-//  } else {
-//    // Not a candidate, and can never become one.
-//    equalities = [];
-//    lastequality = '';
-//  }
-//  post_ins = post_del = false;
-//} else { // an insertion or deletion
-//  if (diff[pointer][0] == DIFF_DELETE)
-//    post_del = true;
-//  else
-//    post_ins = true;
-//  // Five types to be split:
-//  // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
-//  // <ins>A</ins>X<ins>C</ins><del>D</del>
-//  // <ins>A</ins><del>B</del>X<ins>C</ins>
-//  // <ins>A</del>X<ins>C</ins><del>D</del>
-//  // <ins>A</ins><del>B</del>X<del>C</del>
-//  if (lastequality && ((pre_ins && pre_del && post_ins && post_del) || ((lastequality.length < DIFF_EDIT_COST/2) && (pre_ins + pre_del + post_ins + post_del) == 3))) {
-//    //alert("Splitting: '"+lastequality+"'");
-//    diff.splice(equalities[equalities.length-1], 0, [DIFF_DELETE, lastequality]); // Duplicate record
-//    diff[equalities[equalities.length-1]+1][0] = DIFF_INSERT; // Change second copy to insert.
-//    equalities.pop();  // Throw away the equality we just deleted;
-//    lastequality = '';
-//    if (pre_ins && pre_del) {
-//      // No changes made which could affect previous entry, keep going.
-//      post_ins = post_del = true;
-//      equalities = [];
-//    } else {
-//      equalities.pop();  // Throw away the previous equality;
-//      pointer = equalities.length ? equalities[equalities.length-1] : -1;
-//      post_ins = post_del = false;
-//    }
-//    changes = true;
-//  }
-//}
-//pointer++;
-//}
-//
-//if (changes)
-//diff_cleanup_merge(diff);
-}
+    public static void cleanup_efficiency(List diff)
+    {
+        boolean changes = false;
+        IntStack equalities = new IntStack(); // Stack of indices where equalities are found.
+        String lastequality = ""; //$NON-NLS-1$ // Always equal to diff[equalities[equalities.length-1]].getText();
+        int pointer = 0; // Index of current position.
+        int pre_ins = 0; // Is there an insertion operation before the last equality.
+        int pre_del = 0; // Is there an deletion operation before the last equality.
+        int post_ins = 0; // Is there an insertion operation after the last equality.
+        int post_del = 0; // Is there an deletion operation after the last equality.
 
+        while (pointer < diff.size())
+        {
+            Difference curDiff = (Difference) diff.get(pointer);
+            EditType diff_type = curDiff.getEditType();
+            if (EditType.EQUAL.equals(diff_type)) // equality found
+            {
+                if (curDiff.getText().length() < DIFF_EDIT_COST && (post_ins == 1 || post_del == 1))
+                {
+                    // Candidate found.
+                    equalities.push(pointer);
+                    pre_ins = post_ins;
+                    pre_del = post_del;
+                    lastequality = curDiff.getText();
+                }
+                else
+                {
+                    // Not a candidate, and can never become one.
+                    equalities.clear();
+                    lastequality = ""; //$NON-NLS-1$
+                }
+                post_ins = 0;
+                post_del = 0;
+            }
+            else
+            {
+                // an insertion or deletion
+                if (EditType.DELETE.equals(diff_type))
+                {
+                    post_del = 1;
+                }
+                else
+                {
+                    post_ins = 1;
+                }
 
+                // Five types to be split:
+                // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
+                // <ins>A</ins>X<ins>C</ins><del>D</del>
+                // <ins>A</ins><del>B</del>X<ins>C</ins>
+                // <ins>A</del>X<ins>C</ins><del>D</del>
+                // <ins>A</ins><del>B</del>X<del>C</del>
+                if (lastequality.length() > 0 && (((pre_ins + pre_del + post_ins + post_del) > 0) || ((lastequality.length() < DIFF_EDIT_COST/2) && (pre_ins + pre_del + post_ins + post_del) == 3)))
+                {
+                    int newLoc = equalities.peek();
+                    Difference newLocDifference = (Difference) diff.get(newLoc);
+                    diff.add(newLoc, new Difference(EditType.DELETE, lastequality)); // Duplicate record
+                    newLocDifference.setEditType(EditType.INSERT); // Change following to insert.
+                    equalities.pop();  // Throw away the equality we just deleted;
+                    lastequality = ""; //$NON-NLS-1$
+                    if (pre_ins == 1 && pre_del == 1)
+                    {
+                        // No changes made which could affect previous entry, keep going.
+                        post_ins = 1;
+                        post_del = 1;
+                        equalities.clear();
+                    }
+                    else
+                    {
+                        equalities.pop();  // Throw away the previous equality;
+                        pointer = equalities.empty() ? -1 : equalities.peek();
+                        post_ins = 0;
+                        post_del = 0;
+                    }
+                    changes = true;
+                }
+            }
+            pointer++;
+        }
+
+        if (changes)
+        {
+            Diff.cleanup_merge(diff);
+        }
+    }
+
+
     // Reorder and merge like edit sections.  Merge equalities.
     // Any edit section can move as long as it doesn't cross an equality.
-public static void cleanup_merge(List diff)
-{
-//diff.push([DIFF_EQUAL, '']);  // Add a dummy entry at the end.
-//var pointer = 0;
-//var count_delete = 0;
-//var count_insert = 0;
-//var text_delete = '';
-//var text_insert = '';
-//var record_insert, record_delete;
-//var my_xfix;
-//while(pointer < diff.length) {
-//if (diff[pointer][0] == DIFF_INSERT) {
-//  count_insert++;
-//  text_insert += diff[pointer][1];
-//  pointer++;
-//} else if (diff[pointer][0] == DIFF_DELETE) {
-//  count_delete++;
-//  text_delete += diff[pointer][1];
-//  pointer++;
-//} else {  // Upon reaching an equality, check for prior redundancies.
-//  if (count_delete != 0 || count_insert != 0) {
-//    if (count_delete != 0 && count_insert != 0) {
-//      // Factor out any common prefixies.
-//      my_xfix = diff_prefix(text_insert, text_delete);
-//      if (my_xfix[2] != '') {
-//        if ((pointer - count_delete - count_insert) > 0 && diff[pointer - count_delete - count_insert - 1][0] == DIFF_EQUAL) {
-//          diff[pointer - count_delete - count_insert - 1][1] += my_xfix[2];
-//        } else {
-//          diff.splice(0, 0, [DIFF_EQUAL, my_xfix[2]]);
-//          pointer++;
-//        }
-//        text_insert = my_xfix[0];
-//        text_delete = my_xfix[1];
-//      }
-//      // Factor out any common suffixies.
-//      my_xfix = diff_suffix(text_insert, text_delete);
-//      if (my_xfix[2] != '') {
-//        text_insert = my_xfix[0];
-//        text_delete = my_xfix[1];
-//        diff[pointer][1] = my_xfix[2] + diff[pointer][1];
-//      }
-//    }
-//    // Delete the offending records and add the merged ones.
-//    if (count_delete == 0)
-//      diff.splice(pointer - count_delete - count_insert, count_delete + count_insert, [DIFF_INSERT, text_insert]);
-//    else if (count_insert == 0)
-//      diff.splice(pointer - count_delete - count_insert, count_delete + count_insert, [DIFF_DELETE, text_delete]);
-//    else
-//      diff.splice(pointer - count_delete - count_insert, count_delete + count_insert, [DIFF_DELETE, text_delete], [DIFF_INSERT, text_insert]);
-//    pointer = pointer - count_delete - count_insert + (count_delete ? 1 : 0) + (count_insert ? 1 : 0) + 1;
-//  } else if (pointer != 0 && diff[pointer-1][0] == DIFF_EQUAL) {
-//    // Merge this equality with the previous one.
-//    diff[pointer-1][1] += diff[pointer][1];
-//    diff.splice(pointer, 1);
-//  } else {
-//    pointer++;
-//  }
-//  count_insert = 0;
-//  count_delete = 0;
-//  text_delete = '';
-//  text_insert = '';
-//}
-//}
-//if (diff[diff.length-1][1] == '')
-//diff.pop();  // Remove the dummy entry at the end.
-}
+    public static void cleanup_merge(List diff)
+    {
+        // Add a dummy entry at the end.
+        diff.add(new Difference(EditType.EQUAL, ""));  //$NON-NLS-1$
+        int pointer = 0;
+        int count_delete = 0;
+        int count_insert = 0;
+        String text_delete = ""; //$NON-NLS-1$
+        String text_insert = ""; //$NON-NLS-1$
+        CommonEnd my_xfix = null;
+        while (pointer < diff.size())
+        {
+            Difference curDiff = (Difference) diff.get(pointer);
+            if (curDiff.getEditType() == EditType.INSERT)
+            {
+                count_insert++;
+                text_insert += curDiff.getText();
+                pointer++;
+            }
+            else if (curDiff.getEditType() == EditType.DELETE)
+            {
+                count_delete++;
+                text_delete += curDiff.getText();
+                pointer++;
+            }
+            else
+            {
+                // Upon reaching an equality, check for prior redundancies.
+                if (count_delete != 0 || count_insert != 0)
+                {
+                    int modSize = count_delete + count_insert;
+                    int modStart = pointer - modSize;
+                    if (count_delete != 0 && count_insert != 0)
+                    {
+                        // Factor out any common prefixies.
+                        my_xfix = Diff.prefix(text_insert, text_delete);
+                        if (my_xfix.getCommonality().length() > 0)
+                        {
+                            Difference preModDifference = null;
+                            if (modStart > 0)
+                            {
+                                preModDifference = (Difference) diff.get(modStart - 1);
+                            }
+                            if (preModDifference != null && EditType.EQUAL.equals(preModDifference.getEditType()))
+                            {
+                                preModDifference.appendText(my_xfix.getCommonality());
+                            }
+                            else
+                            {
+                                // TODO(DMS): Is this correct? shouldn't it be modStart??? was splice(0,0,...)
+                                diff.add(0, new Difference(EditType.EQUAL, my_xfix.getCommonality()));
+                                pointer++;
+                            }
+                            text_insert = my_xfix.getLeft();
+                            text_delete = my_xfix.getRight();
+                        }
 
+                        // Factor out any common suffixies.
+                        my_xfix = Diff.suffix(text_insert, text_delete);
+                        if (my_xfix.getCommonality().length() > 0)
+                        {
+                            text_insert = my_xfix.getLeft();
+                            text_delete = my_xfix.getRight();
+                            curDiff.setText(my_xfix.getCommonality() + curDiff.getText());
+                        }
+                    }
 
+                    // Delete the offending records and add the merged ones.
+                    diff.subList(modStart, modSize).clear();
+                    if (count_delete == 0)
+                    {
+                        diff.add(modStart, new Difference(EditType.INSERT, text_insert));
+                    }
+                    else if (count_insert == 0)
+                    {
+                        diff.add(modStart, new Difference(EditType.DELETE, text_delete));
+                    }
+                    else
+                    {
+                        diff.add(modStart, new Difference(EditType.DELETE, text_delete));
+                        diff.add(modStart + 1, new Difference(EditType.INSERT, text_insert));
+                    }
 
+                    // Adjust pointer to account for the delete and the addition of one of two Differences
+                    // And to point to the next
+                    pointer = pointer - modSize + (count_delete > 0 ? 1 : 0) + (count_insert > 0 ? 1 : 0) + 1;
+                }
+                else
+                {
+                    Difference prevDiff = null;
+                    if (pointer > 0)
+                    {
+                        prevDiff = (Difference) diff.get(pointer - 1);
+                    }
+
+                    if (prevDiff != null && EditType.EQUAL.equals(prevDiff.getEditType()))
+                    {
+                        // Merge this equality with the previous one.
+                        prevDiff.appendText(curDiff.getText());
+                        diff.remove(pointer);
+                    }
+                    else
+                    {
+                        pointer++;
+                    }
+                }
+
+                count_insert = 0;
+                count_delete = 0;
+                text_delete = ""; //$NON-NLS-1$
+                text_insert = ""; //$NON-NLS-1$
+            }
+        }
+
+        Difference lastDiff = (Difference) diff.get(diff.size() - 1);
+        if (lastDiff.getText().length() == 0)
+        {
+            diff.remove(diff.size() - 1);  // Remove the dummy entry at the end.
+        }
+    }
+
     //Add an index to each tuple, represents where the tuple is located in text2.
-    //e.g. [[DIFF_DELETE, 'h', 0], [DIFF_INSERT, 'c', 0], [DIFF_EQUAL, 'at', 1]]
+    //e.g. [new Difference(EditType.DELETE, 'h', 0], new Difference(EditType.INSERT, 'c', 0], new Difference(EditType.EQUAL, 'at', 1]]
     private static void addindex(List diffs)
     {
         int i = 0;
@@ -755,6 +923,154 @@
     }
 
     /**
+     * Represents a common prefix or a common suffix.
+     */
+    public static class CommonEnd
+    {
+        /**
+         * @param left
+         * @param right
+         * @param commonality
+         */
+        public CommonEnd(String left, String right, String commonality)
+        {
+            this.left = left;
+            this.right = right;
+            this.commonality = commonality;
+        }
+
+        /**
+         * @return the left
+         */
+        public String getLeft()
+        {
+            return left;
+        }
+
+        /**
+         * @return the right
+         */
+        public String getRight()
+        {
+            return right;
+        }
+
+        /**
+         * @return the commonality
+         */
+        public String getCommonality()
+        {
+            return commonality;
+        }
+
+        private String left;
+        private String right;
+        private String commonality;
+    }
+
+    /**
+     * Represents a common prefix
+     */
+    public static class CommonPrefix extends CommonEnd
+    {
+        /**
+         * @param left
+         * @param right
+         * @param commonality
+         */
+        public CommonPrefix(String left, String right, String commonality)
+        {
+            super(left, right, commonality);
+        }
+        
+    }
+
+    /**
+     * Represents a common suffix.
+     */
+    public static class CommonSuffix extends CommonEnd
+    {
+        /**
+         * @param left
+         * @param right
+         * @param commonality
+         */
+        public CommonSuffix(String left, String right, String commonality)
+        {
+            super(left, right, commonality);
+        }
+    }
+
+    /**
+     * Represents a common middle.
+     */
+    public static class CommonMiddle
+    {
+        /**
+         * @param leftStart
+         * @param rightStart
+         * @param commonality
+         * @param leftEnd
+         * @param rightEnd
+         */
+        public CommonMiddle(String leftStart, String rightStart, String commonality, String leftEnd, String rightEnd)
+        {
+            super();
+            this.leftStart = leftStart;
+            this.rightStart = rightStart;
+            this.commonality = commonality;
+            this.leftEnd = leftEnd;
+            this.rightEnd = rightEnd;
+        }
+
+        /**
+         * @return the left start
+         */
+        public String getLeftStart()
+        {
+            return leftStart;
+        }
+
+        /**
+         * @return the right start
+         */
+        public String getRightStart()
+        {
+            return rightStart;
+        }
+
+        /**
+         * @return the commonality
+         */
+        public String getCommonality()
+        {
+            return commonality;
+        }
+
+        /**
+         * @return the left end
+         */
+        public String getLeftEnd()
+        {
+            return leftEnd;
+        }
+
+        /**
+         * @return the right end
+         */
+        public String getRightEnd()
+        {
+            return rightEnd;
+        }
+
+        private String leftStart;
+        private String rightStart;
+        private String commonality;
+        private String leftEnd;
+        private String rightEnd;
+    }
+
+    /**
      * Number of seconds to map a diff before giving up.  (0 for infinity)
      */
     public static final double DIFF_TIMEOUT = 1.0;
@@ -765,33 +1081,3 @@
     public static final int DIFF_EDIT_COST = 4;
 
 }
-//
-//
-////Defaults.
-////Redefine these in your program to override the defaults.
-//
-////Number of seconds to map a diff before giving up.  (0 for infinity)
-//var DIFF_TIMEOUT = 1.0;
-////Cost of an empty edit operation in terms of edit characters.
-//var DIFF_EDIT_COST = 4;
-////Tweak the relative importance (0.0 = accuracy, 1.0 = proximity)
-//var MATCH_BALANCE = 0.5;
-////At what point is no match declared (0.0 = perfection, 1.0 = very loose)
-//var MATCH_THRESHOLD = 0.5;
-////The min and max cutoffs used when computing text lengths.
-//var MATCH_MINLENGTH = 100;
-//var MATCH_MAXLENGTH = 1000;
-////Chunk size for context length.
-//var PATCH_MARGIN = 4;
-//
-//
-////////////////////////////////////////////////////////////////////////
-////  Diff                                                            //
-////////////////////////////////////////////////////////////////////////
-//
-////The data structure representing a diff is an array of tuples:
-////[[DIFF_DELETE, "Hello"], [DIFF_INSERT, "Goodbye"], [DIFF_EQUAL, " world."]]
-////which means: delete "Hello", add "Goodbye" and keep " world."
-//var DIFF_DELETE = -1;
-//var DIFF_INSERT = 1;
-//var DIFF_EQUAL = 0;

Modified: trunk/common/src/main/java/org/crosswire/common/diff/Match.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/Match.java	2007-05-21 11:37:00 UTC (rev 1335)
+++ trunk/common/src/main/java/org/crosswire/common/diff/Match.java	2007-05-21 20:43:38 UTC (rev 1336)
@@ -54,14 +54,13 @@
         if (!text.substring(newLoc, newLoc + pattern.length()).equals(pattern))
         {
             // Do a fuzzy compare.
-            return Match.bitap(text, pattern, newLoc);
+            return bitap(text, pattern, newLoc);
         }
 
         // else Perfect match at the perfect spot!  (Includes case of null pattern)
         return newLoc;
     }
 
-
     // Locate the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm.
     private static int bitap(String text, String pattern, int loc)
     {
@@ -82,8 +81,7 @@
         {
             score_threshold = Math.min(Match.bitap_score(pattern, score_text_length, loc, 0, best_loc), score_threshold);
         }
-
-        
+  
        // What about in the other direction? (speedup)
         best_loc = text.lastIndexOf(pattern, loc + pattern.length());
         if (best_loc != -1)
@@ -118,7 +116,7 @@
                 {
                     bin_max = bin_mid;
                 }
-                bin_mid = (int) Math.floor((bin_max - bin_min) / 2 + bin_min);
+                bin_mid = (bin_max - bin_min) / 2 + bin_min;
             }
 
             bin_max = bin_mid; // Use the result from this iteration as the maximum for the next.
@@ -136,7 +134,8 @@
 
             for (int j = finish - 1; j >= start; j--)
             {
-                int mask = ((Integer) s.get(new Character(text.charAt(j)))).intValue();
+                Character curChar = new Character(text.charAt(j));
+                int mask = s.containsKey(curChar) ? ((Integer) s.get(curChar)).intValue() : 0;
                 if (d == 0) // First pass: exact match.
                 {
                     rd[j] = ((rd[j + 1] << 1) | 1) & mask;
@@ -171,6 +170,7 @@
 
             if (Match.bitap_score(pattern, score_text_length, loc, d + 1, loc) > score_threshold) // No hope for a (better) match at greater error levels.
             {
+                // No hope for a (better) match at greater error levels.
                 break;
             }
 
@@ -184,7 +184,7 @@
     {
         // Compute and return the score for a match with e errors and x location.
         int d = Math.abs(loc - x);
-        return (e / pattern.length() / Match.BALANCE) + (d / score_text_length / (1.0 - Match.BALANCE));
+        return (e / (float)pattern.length() / Match.BALANCE) + (d / (float) score_text_length / (1.0 - Match.BALANCE));
     }
 
     // Initialize the alphabet for the Bitap algorithm.

Modified: trunk/common/src/main/java/org/crosswire/common/diff/Patch.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/Patch.java	2007-05-21 11:37:00 UTC (rev 1335)
+++ trunk/common/src/main/java/org/crosswire/common/diff/Patch.java	2007-05-21 20:43:38 UTC (rev 1336)
@@ -3,225 +3,112 @@
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+import java.util.ListIterator;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 public class Patch
 {
-    //  Constructor for a patch object.
-    public Patch()
+    /**
+     * Compute a list of patches to turn text1 into text2.
+     * A set of diffs will be computed.
+     * @param text1 Old text
+     * @param text2 New text
+     * @return List of PatchEntry objects.
+     */
+    public static List make(String text1, String text2)
     {
-        this.diffs = new ArrayList();
-        this.start1 = 0;
-        this.start2 = 0;
-        this.length1 = 0;
-        this.length2 = 0;
-    }
-
-    //  Emmulate GNU diff's format.
-    //  Header: @@ -382,8 +481,9 @@
-    //  Indicies are printed as 1-based, not 0-based.
-    public String toString()
-    {
-        StringBuffer txt = new StringBuffer();
-        txt.append("@@ -"); //$NON-NLS-1$
-        txt.append(getCoordinates(start1, length1));
-        txt.append(" +"); //$NON-NLS-1$
-        txt.append(getCoordinates(start2, length2));
-        txt.append(" @@\n"); //$NON-NLS-1$
-
-        Iterator iter = diffs.iterator();
-        while (iter.hasNext())
+        List diffs = Diff.main(text1, text2, true);
+        if (diffs.size() > 2)
         {
-            Difference diff = (Difference) iter.next();
-            txt.append(diff.getEditType().getSymbol());
-            txt.append(diff.getText());
-            txt.append('\n');
+            Diff.cleanup_semantic(diffs);
+            Diff.cleanup_efficiency(diffs);
         }
-        return txt.toString();
+        return make(text1, text2, diffs);
     }
 
-    private String getCoordinates(int start, int length)
-    {
-        StringBuffer buf = new StringBuffer();
-
-        buf.append(start);
-        if (length == 0)
-        {
-            buf.append(start);
-            buf.append(".0"); //$NON-NLS-1$
-        }
-        else if (length == 1)
-        {
-            buf.append(start1 + 1);
-        }
-        else
-        {
-            buf.append(start + 1);
-            buf.append(',');
-            buf.append(length);
-        }
-
-        return buf.toString();
-    }
-
-    //  Compute and return the source text (all equalities and deletions).
-    private String getText1()
-    {
-        StringBuffer txt = new StringBuffer();
-        Iterator iter = diffs.iterator();
-        while (iter.hasNext())
-        {
-            Difference diff = (Difference) iter.next();
-            if (!EditType.INSERT.equals(diff.getEditType()))
-            {
-                txt.append(diff.getText());
-            }
-        }
-        return txt.toString();
-    }
-
-    // Compute and return the destination text (all equalities and insertions).
-    private String getText2()
-    {
-        StringBuffer txt = new StringBuffer();
-        Iterator iter = diffs.iterator();
-        while (iter.hasNext())
-        {
-            Difference diff = (Difference) iter.next();
-            if (!EditType.DELETE.equals(diff.getEditType()))
-            {
-                txt.append(diff.getText());
-            }
-        }
-        return txt.toString();
-    }
-
-
-    private void addContext(String text)
-    {
-        String pattern = text.substring(start2, start2+length1);
-        int padding = 0;
-
-        // Increase the context until we're unique (but don't let the pattern expand beyond Match.MAXBITS).
-        int end = Match.MAXBITS - Patch.MARGIN - Patch.MARGIN;
-        while (text.indexOf(pattern) != text.lastIndexOf(pattern) && pattern.length() < end)
-        {
-            padding += Patch.MARGIN;
-            pattern = text.substring(start2 - padding, start2 + length1 + padding);
-        }
-
-        // Add one chunk for good luck.
-        padding += Patch.MARGIN;
-
-        // Add the prefix.
-        String prefix = text.substring(start2 - padding, start2);
-        int prefixLength = prefix.length();
-        if (prefixLength > 0)
-        {
-            diffs.add(0, new Difference(EditType.EQUAL, prefix));
-        }
-
-        // Add the suffix
-        String suffix = text.substring(start2 + length1, start2 + length1 + padding);
-        int suffixLength = suffix.length();
-        if (suffixLength > 0)
-        {
-            diffs.add(new Difference(EditType.EQUAL, suffix));
-        }
-
-        // Roll back the start points.
-        start1 -= prefixLength;
-        start2 -= prefixLength;
-
-        // Extend the lengths.
-        length1 += prefixLength + suffixLength;
-        length2 += prefixLength + suffixLength;
-    }
-
-    // Compute a list of patches to turn text1 into text2.
+    /**
+     * Compute a list of patches to turn text1 into text2.
+     * Use the diffs provided.
+     * @param text1 Old text
+     * @param text2 New text
+     * @param diffs Optional array of diff tuples for text1 to text2.
+     * @return LinkedList of Patch objects.
+     */
     public static List make(String text1, String text2, List diffList)
     {
         List patches = new ArrayList();
         List diffs = diffList;
 
-        // Use diff if provided, otherwise compute it ourselves.
-        if (diffs == null)
-        {
-            diffs = Diff.main(text1, text2, true);
-            if (diffs.size() > 2)
-            {
-                Diff.cleanup_semantic(diffs);
-                Diff.cleanup_efficiency(diffs);
-            }
-        }
+        assert diffs != null;
 
         if (diffs.size() == 0)
         {
             return patches; // Get rid of the null case.
         }
 
-        Patch patch = new Patch();
+        PatchEntry patch = new PatchEntry();
         int char_count1 = 0; // Number of characters into the text1 string.
         int char_count2 = 0; // Number of characters into the text2 string.
-        String prepatch_text = text1; // Recreate the patches to determine context info.
+        // Recreate the patches to determine context info.
+        String prepatch_text = text1;
         String postpatch_text = text1;
         Iterator iter = diffs.iterator();
         int x = 0;
         while (iter.hasNext())
         {
             Difference diff = (Difference) iter.next();
-            EditType diff_type = diff.getEditType();
-            String diff_text = diff.getText();
-            int len = diff_text.length();
+            EditType editType = diff.getEditType();
+            String diffText = diff.getText();
+            int len = diffText.length();
 
-            if (patch.diffs.size() == 0 && !EditType.EQUAL.equals(diff_type))
+            if (!patch.hasDifferences() && !EditType.EQUAL.equals(editType))
             {
                 // A new patch starts here.
-                patch.start1 = char_count1;
-                patch.start2 = char_count2;
+                patch.setLeftStart(char_count1);
+                patch.setRightStart(char_count2);
             }
 
-            if (EditType.INSERT.equals(diff_type))
+            if (EditType.INSERT.equals(editType))
             {
                 // Insertion
-                patch.diffs.add(diff);
-                patch.length2 += len;
-                postpatch_text = postpatch_text.substring(0, char_count2) + diff_text + postpatch_text.substring(char_count2);
+                patch.addDifference(diff);
+                patch.adjustLength2(len);
+                postpatch_text = postpatch_text.substring(0, char_count2) + diffText + postpatch_text.substring(char_count2);
             }
-            else if (EditType.DELETE.equals(diff_type))
+            else if (EditType.DELETE.equals(editType))
             {
                 // Deletion.
-                patch.length1 += len;
-                patch.diffs.add(diff);
+                patch.adjustLength1(len);
+                patch.addDifference(diff);
                 postpatch_text = postpatch_text.substring(0, char_count2) + postpatch_text.substring(char_count2 + len);
             }
-            else if (EditType.EQUAL.equals(diff_type) && len <= 2 * Patch.MARGIN && patch.diffs.size() != 0 && diffs.size() != x + 1)
+            else if (EditType.EQUAL.equals(editType) && len <= 2 * Patch.MARGIN && patch.hasDifferences() && diffs.size() != x + 1)
             {
                 // Small equality inside a patch.
-                patch.diffs.add(diff);
-                patch.length1 += len;
-                patch.length2 += len;
+                patch.addDifference(diff);
+                patch.adjustLength1(len);
+                patch.adjustLength2(len);
             }
 
-            if (EditType.EQUAL.equals(diff_type) && len >= 2 * Patch.MARGIN) {
+            if (EditType.EQUAL.equals(editType) && len >= 2 * Patch.MARGIN) {
                 // Time for a new patch.
-                if (patch.diffs.size() != 0)
+                if (patch.hasDifferences())
                 {
                     patch.addContext(prepatch_text);
                     patches.add(patch);
-                    patch = new Patch();
+                    patch = new PatchEntry();
                     prepatch_text = postpatch_text;
                 }
             }
 
             // Update the current character count.
-            if (!EditType.INSERT.equals(diff_type))
+            if (!EditType.INSERT.equals(editType))
             {
                 char_count1 += len;
             }
 
-            if (!EditType.DELETE.equals(diff_type))
+            if (!EditType.DELETE.equals(editType))
             {
                 char_count2 += len;
             }
@@ -230,7 +117,8 @@
         }
 
         // Pick up the leftover patch if not empty.
-        if (patch.diffs.size() != 0) {
+        if (patch.hasDifferences())
+        {
             patch.addContext(prepatch_text);
             patches.add(patch);
         }
@@ -239,195 +127,203 @@
     }
 
 
-    //  Merge a set of patches onto the text.
-    //  Return a patched text, as well as a list of true/false values indicating which patches were applied.
+    /**
+     * Merge a set of patches onto the text.  Return a patched text, as well
+     * as an array of true/false values indicating which patches were applied.
+     * @param patches Array of patch objects
+     * @param text Old text
+     * @return the patch result
+     */
     public static PatchResults apply(List patches, String text)
     {
-        patches = Patch.splitmax(patches);
-        List results = new ArrayList();
+        Patch.splitMax(patches);
+        boolean[] results = new boolean[patches.size()];
+        String resultText = text;
         int delta = 0;
         int expected_loc = 0;
         int start_loc = -1;
         String text1 = ""; //$NON-NLS-1$
         String text2 = ""; //$NON-NLS-1$
-        List diff;
-        Difference mod = null;
+        List diffs;
         int index1 = 0;
         int index2 = 0;
-        for (int x = 0; x < patches.size(); x++)
+        int x = 0;
+        Iterator patchIter = patches.iterator();
+        while (patchIter.hasNext())
         {
-            Patch curPatch = (Patch) patches.get(x);
-            expected_loc = curPatch.start2 + delta;
-            text1 = curPatch.getText1();
-            start_loc = Match.main(text, text1, expected_loc);
+            PatchEntry aPatch = (PatchEntry) patchIter.next();
+            expected_loc = aPatch.getRightStart() + delta;
+            text1 = aPatch.getLeftText();
+            start_loc = Match.main(resultText, text1, expected_loc);
             if (start_loc == -1)
             {
                 // No match found.  :(
-                results.add(Boolean.FALSE);
+                results[x] = false;
             }
             else
             {
                 // Found a match.  :)
-                results.add(Boolean.TRUE);
+                results[x] = true;
                 delta = start_loc - expected_loc;
-                text2 = text.substring(start_loc, start_loc + text1.length());
-                if (text1 == text2)
+                text2 = resultText.substring(start_loc, start_loc + text1.length());
+                if (text1.equals(text2))
                 {
                     // Perfect match, just shove the replacement text in.
-                    text = text.substring(0, start_loc) + curPatch.getText2() + text.substring(start_loc + text1.length());
+                    resultText = resultText.substring(0, start_loc) + aPatch.getRightText() + resultText.substring(start_loc + text1.length());
                 }
                 else
                 {
                     // Imperfect match.  Run a diff to get a framework of equivalent indicies.
-                    diff = Diff.main(text1, text2, false);
+                    diffs = Diff.main(text1, text2, false);
                     index1 = 0;
-                    for (int y = 0; y < curPatch.diffs.size(); y++) {
-                        mod = (Difference) curPatch.diffs.get(y);
-                        EditType diff_type = mod.getEditType();
-                        if (!EditType.EQUAL.equals(diff_type))
+                    Iterator diffIter = aPatch.iterator();
+                    while (diffIter.hasNext())
+                    {
+                        Difference aDiff = (Difference) diffIter.next();
+                        EditType editType = aDiff.getEditType();
+                        if (!EditType.EQUAL.equals(editType))
                         {
-                            index2 = Diff.xindex(diff, index1);
+                            index2 = Diff.xindex(diffs, index1);
                         }
 
-                        if (EditType.INSERT.equals(diff_type)) // Insertion
+                        if (EditType.INSERT.equals(editType)) // Insertion
                         {
-                            text = text.substring(0, start_loc + index2) + mod.getText() + text.substring(start_loc + index2);
+                            resultText = resultText.substring(0, start_loc + index2) + aDiff.getText() + resultText.substring(start_loc + index2);
                         }
-                        else if (EditType.DELETE.equals(diff_type)) // Deletion
+                        else if (EditType.DELETE.equals(editType)) // Deletion
                         {
-                            text = text.substring(0, start_loc + index2) + text.substring(start_loc + Diff.xindex(diff, index1 + mod.getText().length()));
+                            resultText = resultText.substring(0, start_loc + index2) + resultText.substring(start_loc + Diff.xindex(diffs, index1 + aDiff.getText().length()));
                         }
 
-                        if (!EditType.DELETE.equals(diff_type))
+                        if (!EditType.DELETE.equals(editType))
                         {
-                            index1 += mod.getText().length();
+                            index1 += aDiff.getText().length();
                         }
                     }
                 }
             }
         }
-        return new PatchResults(text, results);
+        return new PatchResults(resultText, results);
     }
 
-
-    //Look through the patches and break up any which are longer than the maximum limit of the match algorithm.
-    static public List splitmax(List patches)
+    /**
+     * Look through the patches and break up any which are longer than the maximum
+     * limit of the match algorithm.
+     * @param patches List of Patch objects.
+     */
+    static public void splitMax(List patches)
     {
-        Patch bigpatch = null;
-        Patch patch = null;
-        int patch_size = 0;
-        EditType diff_type = null;
-        int start1 = 0;
-        int start2 = 0;
-        String diff_text = ""; //$NON-NLS-1$
-        String precontext = ""; //$NON-NLS-1$
-        String postcontext = ""; //$NON-NLS-1$
-        boolean empty = true;
-        for (int x = 0; x < patches.size(); x++)
+        ListIterator pointer = patches.listIterator();
+        PatchEntry bigpatch = pointer.hasNext() ? (PatchEntry) pointer.next() : null;
+        while (bigpatch != null)
         {
-            Patch curPatch = (Patch) patches.get(x);
-            if (curPatch.length1 > Match.MAXBITS)
+            if (bigpatch.getLeftLength() <= Match.MAXBITS)
             {
-                bigpatch = curPatch;
-                // Remove the big old patch.
-                patches.remove(x);
-                patch_size = Match.MAXBITS;
-                start1 = bigpatch.start1;
-                start2 = bigpatch.start2;
-                precontext = ""; //$NON-NLS-1$
-                while (bigpatch.diffs.size() != 0)
+                bigpatch = pointer.hasNext() ? (PatchEntry) pointer.next() : null;
+            }
+
+            // Remove the big old patch.
+            pointer.remove();
+            int patch_size = Match.MAXBITS;
+            int start1 = bigpatch.getLeftStart();
+            int start2 = bigpatch.getRightStart();
+            String precontext = ""; //$NON-NLS-1$
+            while (bigpatch.hasDifferences())
+            {
+                // Create one of several smaller patches.
+                PatchEntry patch = new PatchEntry();
+                boolean empty = true;
+
+                int len = precontext.length();
+                patch.setLeftStart(start1 - len);
+                patch.setRightStart(start2 - len);
+                if (len > 0)
                 {
-                    // Create one of several smaller patches.
-                    patch = new Patch();
-                    empty = true;
+                    patch.setLeftLength(len);
+                    patch.setRightLength(len);
+                    patch.addDifference(new Difference(EditType.EQUAL, precontext));
+                }
 
-                    int len = precontext.length();
-                    patch.start1 = start1 - len;
-                    patch.start2 = start2 - len;
-                    if (len > 0)
+                while (bigpatch.hasDifferences() && patch.getLeftLength() < patch_size - Patch.MARGIN)
+                {
+                    Difference bigDiff = bigpatch.getFirstDifference();
+                    EditType diff_type = bigDiff.getEditType();
+                    String diff_text = bigDiff.getText();
+                    if (EditType.INSERT.equals(diff_type))
                     {
-                        patch.length1 = len;
-                        patch.length2 = len;
-                        patch.diffs.add(new Difference(EditType.EQUAL, precontext));
+                        // Insertions are harmless.
+                        len = diff_text.length();
+                        patch.adjustLength2(len);
+                        start2 += len;
+                        patch.addDifference(bigpatch.removeFirstDifference());
+                        empty = false;
                     }
-
-                    while (bigpatch.diffs.size() != 0 && patch.length1 < patch_size - Patch.MARGIN)
+                    else
                     {
-                        Difference bigDiff = (Difference) bigpatch.diffs.get(0);
-                        diff_type = bigDiff.getEditType();
-                        diff_text = bigDiff.getText();
-                        if (EditType.INSERT.equals(diff_type))
+                        // Deletion or equality.  Only take as much as we can stomach.
+                        len = diff_text.length();
+                        diff_text = diff_text.substring(0, Math.min(len, patch_size - patch.getLeftLength() - Patch.MARGIN));
+                        patch.adjustLength1(len);
+                        start1 += len;
+                        if (EditType.EQUAL.equals(diff_type))
                         {
-                            // Insertions are harmless.
-                            len = diff_text.length();
-                            patch.length2 += len;
+                            patch.adjustLength2(len);
                             start2 += len;
-                            patch.diffs.add(bigpatch.diffs.remove(0));
-                            empty = false;
                         }
                         else
                         {
-                            // Deletion or equality.  Only take as much as we can stomach.
-                            diff_text = diff_text.substring(0, patch_size - patch.length1 - Patch.MARGIN);
-                            len = diff_text.length();
-                            patch.length1 += len;
-                            start1 += len;
-                            if (EditType.EQUAL.equals(diff_type))
-                            {
-                                patch.length2 += len;
-                                start2 += len;
-                            }
-                            else
-                            {
-                                empty = false;
-                            }
-
-                            patch.diffs.add(new Difference(diff_type, diff_text));
-
-                            if (diff_text.equals(bigDiff.getText()))
-                            {
-                                bigpatch.diffs.remove(0);
-                            }
-                            else
-                            {
-                                bigDiff.setText(bigDiff.getText().substring(len));
-                            }
+                            empty = false;
                         }
-                    }
 
-                    // Compute the head context for the next patch.
-                    precontext = patch.getText2();
-                    precontext = precontext.substring(precontext.length() - Patch.MARGIN);
+                        patch.addDifference(new Difference(diff_type, diff_text));
 
-                    // Append the end context for this patch.
-                    postcontext = bigpatch.getText1().substring(0, Patch.MARGIN);
-                    if (postcontext.length() > 0)
-                    {
-                        patch.length1 += postcontext.length();
-                        patch.length2 += postcontext.length();
-                        if (patch.diffs.size() > 0 && EditType.EQUAL.equals(((Difference) patch.diffs.get(patch.diffs.size() - 1)).getEditType()))
+                        if (diff_text.equals(bigDiff.getText()))
                         {
-                            Difference diff = (Difference) patch.diffs.get(patch.diffs.size() - 1);
-                            diff.appendText(postcontext);
+                            bigpatch.removeFirstDifference();
                         }
                         else
                         {
-                            patch.diffs.add(new Difference(EditType.EQUAL, postcontext));
+                            bigDiff.setText(bigDiff.getText().substring(len));
                         }
                     }
+                }
 
-                    if (!empty)
+                // Compute the head context for the next patch.
+                precontext = patch.getRightText();
+                precontext = precontext.substring(precontext.length() - Patch.MARGIN);
+
+                // Append the end context for this patch.
+                String postcontext = bigpatch.getLeftText().substring(0, Patch.MARGIN);
+                if (postcontext.length() > 0)
+                {
+                    patch.adjustLength1(postcontext.length());
+                    patch.adjustLength2(postcontext.length());
+                    if (patch.getDifferenceCount() > 0 && EditType.EQUAL.equals(patch.getLastDifference().getEditType()))
                     {
-                        patches.add(x++, patch);
+                        Difference diff = patch.getLastDifference();
+                        diff.appendText(postcontext);
                     }
+                    else
+                    {
+                        patch.addDifference(new Difference(EditType.EQUAL, postcontext));
+                    }
                 }
+
+                if (!empty)
+                {
+                    pointer.add(patch);
+                }
             }
-        }
 
-        return patches;
+            bigpatch = pointer.hasNext() ? (PatchEntry) pointer.next() : null;
+        }
     }
 
-    // Take a list of patches and return a textual representation.
+    /**
+     * Take a list of patches and return a textual representation.
+     * @param patches List of Patch objects.
+     * @return Text representation of patches.
+     */
     public static String toText(List patches) {
         StringBuffer text = new StringBuffer();
         Iterator iter = patches.iterator();
@@ -438,12 +334,17 @@
         return text.toString();
     }
 
-    // Take a textual representation of patches and return a list of patch objects.
+    /**
+     * Parse a textual representation of patches and return a List of Patch
+     * objects.
+     * @param textline Text representation of patches
+     * @return List of Patch objects
+     */
     public static List fromText(String input)
     {
         List patches = new ArrayList();
         String[] text = input.split("\n"); //$NON-NLS-1$
-        Patch patch = null;
+        PatchEntry patch = null;
         char sign = '\0';
         String line = ""; //$NON-NLS-1$
 
@@ -451,83 +352,72 @@
         while (lineCount < text.length)
         {
             Matcher matcher = patchPattern.matcher(text[lineCount]);
-            if (!matcher.find())
-            {
-                throw new RuntimeException("Invalid patch string:\n"+text[lineCount]); //$NON-NLS-1$
-            }
+            matcher.matches();
+            assert matcher.groupCount() == 4 : "Invalid patch string:\n"+text[lineCount]; //$NON-NLS-1$
             // m = text[0].match(/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/);
 
-
-            patch = new Patch();
+            patch = new PatchEntry();
             patches.add(patch);
-            patch.start1 = Integer.parseInt(matcher.group(1));
+            patch.setLeftStart(Integer.parseInt(matcher.group(1)));
 
             if (matcher.group(2).length() == 0)
             {
-                patch.start1--;
-                patch.length1 = 1;
+                patch.adjustStart1(-1);
+                patch.setLeftLength(1);
             }
             else if (matcher.group(2).charAt(0) == '0')
             {
-                patch.length1 = 0;
+                patch.setLeftLength(0);
             }
             else
             {
-                patch.start1--;
-                patch.length1 = Integer.parseInt(matcher.group(2));
+                patch.adjustStart1(-1);
+                patch.setLeftLength(Integer.parseInt(matcher.group(2)));
             }
 
-            patch.start2 = Integer.parseInt(matcher.group(3));
+            patch.setRightStart(Integer.parseInt(matcher.group(3)));
             if (matcher.group(4).length() == 0)
             {
-                patch.start2--;
-                patch.length2 = 1;
+                patch.adjustStart2(-1);
+                patch.setRightLength(1);
             }
             else if (matcher.group(4).charAt(0) == '0')
             {
-                patch.length2 = 0;
+                patch.setRightLength(0);
             }
             else
             {
-                patch.start2--;
-                patch.length2 = Integer.parseInt(matcher.group(4));
+                patch.adjustStart2(-1);
+                patch.setRightLength(Integer.parseInt(matcher.group(4)));
             }
             lineCount++;
 
-            while (lineCount < text.length)
-            {
-                if (text[lineCount].length() > 0)
+            consume:
+                while (lineCount < text.length)
                 {
-                    sign = text[lineCount].charAt(0);
-                    line = text[lineCount].substring(1);
-                    if (sign == '-')
+                    if (text[lineCount].length() > 0)
                     {
-                        // Deletion.
-                        patch.diffs.add(new Difference(EditType.DELETE, line));
+                        sign = text[lineCount].charAt(0);
+                        line = text[lineCount].substring(1);
+                        switch (sign)
+                        {
+                            case '-': // Deletion.
+                                patch.addDifference(new Difference(EditType.DELETE, line));
+                                break;
+                            case '+': // Insertion.
+                                patch.addDifference(new Difference(EditType.INSERT, line));
+                                break;
+                            case ' ': // Minor equality.
+                                patch.addDifference(new Difference(EditType.EQUAL, line));
+                                break;
+                            case '@': // start of next patch
+                                break consume;
+                            default: // What!!!
+                                assert false : "Invalid patch mode: '"+sign+"'\n"+line; //$NON-NLS-1$ //$NON-NLS-2$
+                        }
                     }
-                    else if (sign == '+')
-                    {
-                        // Insertion.
-                        patch.diffs.add(new Difference(EditType.INSERT, line));
-                    }
-                    else if (sign == ' ')
-                    {
-                        // Minor equality.
-                        patch.diffs.add(new Difference(EditType.EQUAL, line));
-                    }
-                    else if (sign == '@')
-                    {
-                        // Start of next patch.
-                        break;
-                    }
-                    else
-                    {
-                        // What!!!
-                        throw new RuntimeException("Invalid patch mode: '"+sign+"'\n"+line); //$NON-NLS-1$ //$NON-NLS-2$
-                    }
+                    lineCount++;
                 }
-                lineCount++;
-            }
         }
         return patches;
     }
@@ -538,18 +428,18 @@
          * @param text
          * @param results
          */
-        public PatchResults(String text, List results)
+        public PatchResults(String text, boolean[] results)
         {
             this.text = text;
-            this.results = results;
+            this.results = (boolean[]) results.clone();
         }
 
         /**
          * @return the results
          */
-        public List getResults()
+        public boolean[] getResults()
         {
-            return results;
+            return (boolean[]) results.clone();
         }
 
         /**
@@ -561,19 +451,14 @@
         }
 
         private String text;
-        private List results;
+        private boolean[] results;
     }
+
     /**
      * Chunk size for context length.
      */
     public static final int MARGIN = 4;
 
-    private List diffs;
-    private int start1;
-    private int start2;
-    private int length1;
-    private int length2;
-    
     private static String patchRE = "^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"; //$NON-NLS-1$
     private static Pattern patchPattern = Pattern.compile(patchRE);
 

Added: trunk/common/src/main/java/org/crosswire/common/diff/PatchEntry.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/PatchEntry.java	                        (rev 0)
+++ trunk/common/src/main/java/org/crosswire/common/diff/PatchEntry.java	2007-05-21 20:43:38 UTC (rev 1336)
@@ -0,0 +1,292 @@
+package org.crosswire.common.diff;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+public class PatchEntry
+{
+    //  Constructor for a patch object.
+    public PatchEntry()
+    {
+        this.diffs = new ArrayList();
+        this.leftStart = 0;
+        this.rightStart = 0;
+        this.leftLength = 0;
+        this.rightLength = 0;
+    }
+
+    /**
+     * @return the leftStart
+     */
+    public int getLeftStart()
+    {
+        return leftStart;
+    }
+
+    /**
+     * @param leftStart the leftStart to set
+     */
+    public void setLeftStart(int start1)
+    {
+        this.leftStart = start1;
+    }
+
+    /**
+     * @param adjustment the adjustment to leftStart
+     */
+    public void adjustStart1(int adjustment)
+    {
+        this.leftStart += adjustment;
+    }
+
+    /**
+     * @return the rightStart
+     */
+    public int getRightStart()
+    {
+        return rightStart;
+    }
+
+    /**
+     * @param rightStart the rightStart to set
+     */
+    public void setRightStart(int start2)
+    {
+        this.rightStart = start2;
+    }
+
+    /**
+     * @param adjustment the adjustment to rightStart
+     */
+    public void adjustStart2(int adjustment)
+    {
+        this.rightStart += adjustment;
+    }
+
+    /**
+     * @return the leftLength
+     */
+    public int getLeftLength()
+    {
+        return leftLength;
+    }
+
+    /**
+     * @param leftLength the leftLength to set
+     */
+    public void setLeftLength(int length1)
+    {
+        this.leftLength = length1;
+    }
+
+    /**
+     * @param adjustment the adjustment to leftLength
+     */
+    public void adjustLength1(int adjustment)
+    {
+        this.leftLength += adjustment;
+    }
+
+    /**
+     * @return the rightLength
+     */
+    public int getRightLength()
+    {
+        return rightLength;
+    }
+
+    /**
+     * @param rightLength the rightLength to set
+     */
+    public void setRightLength(int length2)
+    {
+        this.rightLength = length2;
+    }
+
+    /**
+     * @param adjustment the adjustment to rightLength
+     */
+    public void adjustLength2(int adjustment)
+    {
+        this.rightLength += adjustment;
+    }
+
+    //  Emmulate GNU diff's format.
+    //  Header: @@ -382,8 +481,9 @@
+    //  Indicies are printed as 1-based, not 0-based.
+    public String toString()
+    {
+        StringBuffer txt = new StringBuffer();
+        txt.append("@@ -"); //$NON-NLS-1$
+        txt.append(getCoordinates(leftStart, leftLength));
+        txt.append(" +"); //$NON-NLS-1$
+        txt.append(getCoordinates(rightStart, rightLength));
+        txt.append(" @@\n"); //$NON-NLS-1$
+
+        Iterator iter = diffs.iterator();
+        while (iter.hasNext())
+        {
+            Difference diff = (Difference) iter.next();
+            txt.append(diff.getEditType().getSymbol());
+            txt.append(diff.getText());
+            txt.append('\n');
+        }
+        return txt.toString();
+    }
+
+    //  Compute and return the source text (all equalities and deletions).
+    public String getLeftText()
+    {
+        StringBuffer txt = new StringBuffer();
+        Iterator iter = diffs.iterator();
+        while (iter.hasNext())
+        {
+            Difference diff = (Difference) iter.next();
+            if (!EditType.INSERT.equals(diff.getEditType()))
+            {
+                txt.append(diff.getText());
+            }
+        }
+        return txt.toString();
+    }
+
+    // Compute and return the destination text (all equalities and insertions).
+    public String getRightText()
+    {
+        StringBuffer txt = new StringBuffer();
+        Iterator iter = diffs.iterator();
+        while (iter.hasNext())
+        {
+            Difference diff = (Difference) iter.next();
+            if (!EditType.DELETE.equals(diff.getEditType()))
+            {
+                txt.append(diff.getText());
+            }
+        }
+        return txt.toString();
+    }
+
+    public void addContext(String text)
+    {
+        String pattern = text.substring(rightStart, rightStart + leftLength);
+        int padding = 0;
+
+        // Increase the context until we're unique (but don't let the pattern expand beyond Match.MAXBITS).
+        int end = Match.MAXBITS - PatchEntry.MARGIN - PatchEntry.MARGIN;
+        while (text.indexOf(pattern) != text.lastIndexOf(pattern) && pattern.length() < end)
+        {
+            padding += PatchEntry.MARGIN;
+            pattern = text.substring(rightStart - padding, rightStart + leftLength + padding);
+        }
+
+        // Add one chunk for good luck.
+        padding += PatchEntry.MARGIN;
+
+        // Add the prefix.
+        String prefix = text.substring(rightStart - padding, rightStart);
+        int prefixLength = prefix.length();
+        if (prefixLength > 0)
+        {
+            diffs.add(0, new Difference(EditType.EQUAL, prefix));
+        }
+
+        // Add the suffix
+        String suffix = text.substring(rightStart + leftLength, rightStart + leftLength + padding);
+        int suffixLength = suffix.length();
+        if (suffixLength > 0)
+        {
+            diffs.add(new Difference(EditType.EQUAL, suffix));
+        }
+
+        // Roll back the start points.
+        leftStart -= prefixLength;
+        rightStart -= prefixLength;
+
+        // Extend the lengths.
+        leftLength += prefixLength + suffixLength;
+        rightLength += prefixLength + suffixLength;
+    }
+
+    public void addDifference(Difference diff)
+    {
+        diffs.add(diff);
+    }
+
+    public int getDifferenceCount()
+    {
+        return diffs.size();
+    }
+
+    public boolean hasDifferences()
+    {
+        return diffs.size() != 0;
+    }
+
+    public Iterator iterator()
+    {
+        return diffs.iterator();
+    }
+
+    public Difference getFirstDifference()
+    {
+        if (diffs.size() == 0)
+        {
+            return null;
+        }
+        return (Difference) diffs.get(0);
+    }
+
+    public Difference removeFirstDifference()
+    {
+        if (diffs.size() == 0)
+        {
+            return null;
+        }
+        return (Difference) diffs.remove(0);
+    }
+
+    public Difference getLastDifference()
+    {
+        if (diffs.size() == 0)
+        {
+            return null;
+        }
+        return (Difference) diffs.get(diffs.size() - 1);
+    }
+
+    private String getCoordinates(int start, int length)
+    {
+        StringBuffer buf = new StringBuffer();
+
+        buf.append(start);
+        if (length == 0)
+        {
+            buf.append(start);
+            buf.append(".0"); //$NON-NLS-1$
+        }
+        else if (length == 1)
+        {
+            buf.append(leftStart + 1);
+        }
+        else
+        {
+            buf.append(start + 1);
+            buf.append(',');
+            buf.append(length);
+        }
+
+        return buf.toString();
+    }
+
+    /**
+     * Chunk size for context length.
+     */
+    private static final int MARGIN = 4;
+
+    private List diffs;
+    private int leftStart;
+    private int rightStart;
+    private int leftLength;
+    private int rightLength;
+}




More information about the jsword-svn mailing list