[jsword-svn] r1337 - trunk/common/src/main/java/org/crosswire/common/diff

dmsmith at www.crosswire.org dmsmith at www.crosswire.org
Mon May 21 18:05:58 MST 2007


Author: dmsmith
Date: 2007-05-21 18:05:57 -0700 (Mon, 21 May 2007)
New Revision: 1337

Modified:
   trunk/common/src/main/java/org/crosswire/common/diff/Diff.java
   trunk/common/src/main/java/org/crosswire/common/diff/Difference.java
   trunk/common/src/main/java/org/crosswire/common/diff/Patch.java
Log:
getting closer on diff

Modified: trunk/common/src/main/java/org/crosswire/common/diff/Diff.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/Diff.java	2007-05-21 20:43:38 UTC (rev 1336)
+++ trunk/common/src/main/java/org/crosswire/common/diff/Diff.java	2007-05-22 01:05:57 UTC (rev 1337)
@@ -21,13 +21,17 @@
  */
 package org.crosswire.common.diff;
 
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.Map;
+import java.util.Set;
 import java.util.Stack;
 
-import org.crosswire.common.util.IntStack;
-
 /**
  * Computes the difference between two texts to create a patch.
  * Applies the patch onto another text, allowing for errors.
@@ -40,467 +44,713 @@
  */
 public class Diff
 {
-    //Find the differences between two texts.  Return an array of changes.
-  public static List main(String text1, String text2, boolean checklines)
-  {
-      return null;
-    ////If checklines is present and false, then don't run a line-level diff first to identify the changed areas.
-    ////Check for equality (speedup)
-//if (text1 == text2)
-//return [new Difference(EditType.EQUAL, text1]];
-//
-//if (typeof checklines == 'undefined')
-//checklines = true;
-//
-//var a;
-    ////Trim off common prefix (speedup)
-//a = Diff.prefix(text1, text2);
-//text1 = a[0];
-//text2 = a[1];
-//var commonprefix = a[2];
-//
-    ////Trim off common suffix (speedup)
-//a = Diff.suffix(text1, text2);
-//text1 = a[0];
-//text2 = a[1];
-//var commonsuffix = a[2];
-//
-//var diff, i;
-//var longtext = text1.length > text2.length ? text1 : text2;
-//var shorttext = text1.length > text2.length ? text2 : text1;
-//
-//if (!text1) {  // Just add some text (speedup)
-//diff = [new Difference(EditType.INSERT, text2]];
-//} else if (!text2) { // Just delete some text (speedup)
-//diff = [new Difference(EditType.DELETE, text1]];
-//} else if ((i = longtext.indexOf(shorttext)) != -1) {
-//// Shorter text is inside the longer text (speedup)
-//diff = [new Difference(EditType.INSERT, longtext.substring(0, i)], new Difference(EditType.EQUAL, shorttext], new Difference(EditType.INSERT, longtext.substring(i+shorttext.length)]];
-//// Swap insertions for deletions if diff is reversed.
-//if (text1.length > text2.length)
-//  diff[0][0] = diff[2][0] = EditType.DELETE;
-//} else {
-//longtext = shorttext = null; // Garbage collect
-//// Check to see if the problem can be split in two.
-//var hm = Diff.halfmatch(text1, text2);
-//if (hm) {
-//  // A half-match was found, sort out the return data.
-//  var text1_a = hm[0];
-//  var text1_b = hm[1];
-//  var text2_a = hm[2];
-//  var text2_b = hm[3];
-//  var mid_common = hm[4];
-//  // Send both pairs off for separate processing.
-//  var diff_a = Diff.main(text1_a, text2_a, checklines);
-//  var diff_b = Diff.main(text1_b, text2_b, checklines);
-//  // Merge the results.
-//  diff = diff_a.concat([new Difference(EditType.EQUAL, mid_common]], diff_b);
-//} else {
-//  // Perform a real diff.
-//  if (checklines && text1.length + text2.length < 250)
-//    checklines = false; // Too trivial for the overhead.
-//  if (checklines) {
-//    // Scan the text on a line-by-line basis first.
-//    a = Diff.lines2chars(text1, text2);
-//    text1 = a[0];
-//    text2 = a[1];
-//    var linearray = a[2];
-//  }
-//  diff = Diff.map(text1, text2);
-//  if (!diff) // No acceptable result.
-//    diff = [new Difference(EditType.DELETE, text1], new Difference(EditType.INSERT, text2]];
-//  if (checklines) {
-//    Diff.chars2lines(diff, linearray); // Convert the diff back to original text.
-//    Diff.cleanup_semantic(diff); // Eliminate freak matches (e.g. blank lines)
-//
-//    // Rediff any replacement blocks, this time on character-by-character basis.
-//    diff.push(new Difference(EditType.EQUAL, ""]);  // Add a dummy entry at the end.
-//    var pointer = 0;
-//    var count_delete = 0;
-//    var count_insert = 0;
-//    var text_delete = "";
-//    var text_insert = "";
-//    while(pointer < diff.length) {
-//      if (diff[pointer][0] == EditType.INSERT) {
-//        count_insert++;
-//        text_insert += diff[pointer][1];
-//      } else if (diff[pointer][0] == EditType.DELETE) {
-//        count_delete++;
-//        text_delete += diff[pointer][1];
-//      } else {  // Upon reaching an equality, check for prior redundancies.
-//        if (count_delete >= 1 && count_insert >= 1) {
-//          // Delete the offending records and add the merged ones.
-//          a = Diff.main(text_delete, text_insert, false);
-//          diff.splice(pointer - count_delete - count_insert, count_delete + count_insert);
-//          pointer = pointer - count_delete - count_insert;
-//          for (i=a.length-1; i>=0; i--)
-//            diff.splice(pointer, 0, a[i]);
-//          pointer = pointer + a.length;
-//        }
-//        count_insert = 0;
-//        count_delete = 0;
-//        text_delete = "";
-//        text_insert = "";
-//      }
-//      pointer++;
-//    }
-//    diff.pop();  // Remove the dummy entry at the end.
-//
-//  }
-//}
-//}
-//
-//if (commonprefix)
-//diff.unshift(new Difference(EditType.EQUAL, commonprefix]);
-//if (commonsuffix)
-//diff.push(new Difference(EditType.EQUAL, commonsuffix]);
-//Diff.cleanup_merge(diff);
-//return diff;
-}
+    /**
+     * Find the differences between two texts.
+     * Run a faster slightly less optimal diff
+     * This method allows the 'checklines' of diff_main() to be optional.
+     * Most of the time checklines is wanted, so default to true.
+     * @param text1 Old string to be diffed
+     * @param text2 New string to be diffed
+     * @return List of Diff objects
+     */
+    public static List main(String text1, String text2)
+    {
+      return main(text1, text2, true);
+    }
 
+    /**
+     * Find the differences between two texts.  Simplifies the problem by
+     * stripping any common prefix or suffix off the texts before diffing.
+     * @param text1 Old string to be diffed
+     * @param text2 New string to be diffed
+     * @param checklines Speedup flag.  If false, then don't run a
+     *     line-level diff first to identify the changed areas.
+     *     If true, then run a faster slightly less optimal diff
+     * @return List of Diff objects
+     */
+    public static List main(String text1, String text2, boolean checklines)
+    {
+      // Check for equality (speedup)
+      List diffs;
+      if (text1.equals(text2)) {
+        diffs = new LinkedList();
+        diffs.add(new Difference(EditType.EQUAL, text1));
+        return diffs;
+      }
 
-  //Split text into an array of strings.
-  //Reduce the texts to a string of hashes where each character represents one line.
-  public static List lines2chars(String text1, String text2)
-  {
-      return null;
-//var linearray = new Array();  // linearray[4] == "Hello\n"
-//var linehash = new Object();  // linehash["Hello\n"] == 4
-//
-////"\x00" is a valid JavaScript character, but the Venkman debugger doesn't like it (bug 335098)
-////So we'll insert a junk entry to avoid generating a null character.
-//linearray.push("");
-//
-//function lines2chars_munge(text) {
-//// My first ever closure!
-//var i, line;
-//var chars = "";
-//while (text) {
-//  i = text.indexOf('\n');
-//  if (i == -1)
-//    i = text.length;
-//  line = text.substring(0, i+1);
-//  text = text.substring(i+1);
-//  if (linehash.hasOwnProperty ? linehash.hasOwnProperty(line) : (linehash[line] !== undefined)) {
-//    chars += String.fromCharCode(linehash[line]);
-//  } else {
-//    linearray.push(line);
-//    linehash[line] = linearray.length - 1;
-//    chars += String.fromCharCode(linearray.length - 1);
-//  }
-//}
-//return chars;
-//}
-//
-//var chars1 = Diff.lines2chars_munge(text1);
-//var chars2 = Diff.lines2chars_munge(text2);
-//return [chars1, chars2, linearray];
-}
+      // Trim off common prefix (speedup)
+      int commonlength = commonPrefix(text1, text2);
+      String commonprefix = text1.substring(0, commonlength);
+      String work1 = text1.substring(commonlength);
+      String work2 = text2.substring(commonlength);
 
+      // Trim off common suffix (speedup)
+      commonlength = commonSuffix(work1, work2);
+      String commonsuffix = work1.substring(work1.length() - commonlength);
+      work1 = work1.substring(0, work1.length() - commonlength);
+      work2 = work2.substring(0, work2.length() - commonlength);
 
-  //Rehydrate the text in a diff from a string of line hashes to real lines of text.
-  public static void chars2lines(List diffs, List linearray) {
-      String chars;
-      StringBuffer text = new StringBuffer();
-      for (int x = 0; x < diffs.size(); x++)
+      // Compute the diff on the middle block
+      diffs = compute(work1, work2, checklines);
+
+      // Restore the prefix and suffix
+      if (!commonprefix.equals("")) //$NON-NLS-1$
       {
-          Difference diff = (Difference) diffs.get(x);
-          chars = diff.getText();
+        diffs.add(0, new Difference(EditType.EQUAL, commonprefix));
+      }
 
-          for (int y = 0; y < chars.length(); y++)
-          {
-              text.append(linearray.get(chars.charAt(y)));
-          }
+      if (!commonsuffix.equals("")) //$NON-NLS-1$
+      {
+        diffs.add(new Difference(EditType.EQUAL, commonsuffix));
+      }
 
-          diff.setText(text.toString());
+      cleanupMerge(diffs);
+
+      return diffs;
+    }
+
+    /**
+     * Find the differences between two texts.
+     * @param text1 Old string to be diffed
+     * @param text2 New string to be diffed
+     * @param checklines Speedup flag.  If false, then don't run a
+     *     line-level diff first to identify the changed areas.
+     *     If true, then run a faster slightly less optimal diff
+     * @return Linked List of Diff objects
+     */
+    public static List compute(String text1, String text2,
+                                      boolean checklines) {
+      List diffs = new ArrayList();
+
+      if (text1.equals("")) //$NON-NLS-1$
+      {
+        // Just add some text (speedup)
+        diffs.add(new Difference(EditType.INSERT, text2));
+        return diffs;
       }
-  }
 
+      if (text2.equals("")) //$NON-NLS-1$
+      {
+        // Just delete some text (speedup)
+        diffs.add(new Difference(EditType.DELETE, text1));
+        return diffs;
+      }
 
-  //Explore the intersection points between the two texts.
-  public static String map(String text1, String text2)
-  {
-      return null;
-//var now = new Date();
-//var ms_end = now.getTime() + DIFF_TIMEOUT * 1000; // Don't run for too long.
-//var max = (text1.length + text2.length) / 2;
-//var v_map1 = new Array();
-//var v_map2 = new Array();
-//var v1 = new Object();
-//var v2 = new Object();
-//v1[1] = 0;
-//v2[1] = 0;
-//var x, y;
-//var footstep; // Used to track overlapping paths.
-//var footsteps = new Object();
-//var done = false;
-//var hasOwnProperty = !!(footsteps.hasOwnProperty);
-////If the total number of characters is odd, then the front path will collide with the reverse path.
-//var front = (text1.length + text2.length) % 2;
-//for (var d=0; d<max; d++) {
-//now = new Date();
-//if (DIFF_TIMEOUT > 0 && now.getTime() > ms_end) // Timeout reached
-//  return null;
-//
-//// Walk the front path one step.
-//v_map1[d] = new Object();
-//for (var k=-d; k<=d; k+=2) {
-//  if (k == -d || k != d && v1[k-1] < v1[k+1])
-//    x = v1[k+1];
-//  else
-//    x = v1[k-1]+1;
-//  y = x - k;
-//  footstep = x+","+y;
-//  if (front && (hasOwnProperty ? footsteps.hasOwnProperty(footstep) : (footsteps[footstep] !== undefined)))
-//    done = true;
-//  if (!front)
-//    footsteps[footstep] = d;
-//  while (!done && x < text1.length && y < text2.length && text1.charAt(x) == text2.charAt(y)) {
-//    x++; y++;
-//    footstep = x+","+y;
-//    if (front && (hasOwnProperty ? footsteps.hasOwnProperty(footstep) : (footsteps[footstep] !== undefined)))
-//      done = true;
-//    if (!front)
-//      footsteps[footstep] = d;
-//  }
-//  v1[k] = x;
-//  v_map1[d][x+","+y] = true;
-//  if (done) {
-//    // Front path ran over reverse path.
-//    v_map2 = v_map2.slice(0, footsteps[footstep]+1);
-//    var a = Diff.path1(v_map1, text1.substring(0, x), text2.substring(0, y));
-//    return a.concat(Diff.path2(v_map2, text1.substring(x), text2.substring(y)));
-//  }
-//}
-//
-//// Walk the reverse path one step.
-//v_map2[d] = new Object();
-//for (var k=-d; k<=d; k+=2) {
-//  if (k == -d || k != d && v2[k-1] < v2[k+1])
-//    x = v2[k+1];
-//  else
-//    x = v2[k-1]+1;
-//  y = x - k;
-//  footstep = (text1.length-x)+","+(text2.length-y);
-//  if (!front && (hasOwnProperty ? footsteps.hasOwnProperty(footstep) : (footsteps[footstep] !== undefined)))
-//    done = true;
-//  if (front)
-//    footsteps[footstep] = d;
-//  while (!done && x < text1.length && y < text2.length && text1.charAt(text1.length-x-1) == text2.charAt(text2.length-y-1)) {
-//    x++; y++;
-//    footstep = (text1.length-x)+","+(text2.length-y);
-//    if (!front && (hasOwnProperty ? footsteps.hasOwnProperty(footstep) : (footsteps[footstep] !== undefined)))
-//      done = true;
-//    if (front)
-//      footsteps[footstep] = d;
-//  }
-//  v2[k] = x;
-//  v_map2[d][x+","+y] = true;
-//  if (done) {
-//    // Reverse path ran over front path.
-//    v_map1 = v_map1.slice(0, footsteps[footstep]+1);
-//    var a = Diff.path1(v_map1, text1.substring(0, text1.length-x), text2.substring(0, text2.length-y));
-//    return a.concat(Diff.path2(v_map2, text1.substring(text1.length-x), text2.substring(text2.length-y)));
-//  }
-//}
-//}
-////Number of diffs equals number of characters, no commonality at all.
-//return null;
-}
+      String longtext = text1.length() > text2.length() ? text1 : text2;
+      String shorttext = text1.length() > text2.length() ? text2 : text1;
+      int i = longtext.indexOf(shorttext);
+      if (i != -1) {
+        // Shorter text is inside the longer text (speedup)
+        EditType op = (text1.length() > text2.length()) ?
+                       EditType.DELETE : EditType.INSERT;
+        diffs.add(new Difference(op, longtext.substring(0, i)));
+        diffs.add(new Difference(EditType.EQUAL, shorttext));
+        diffs.add(new Difference(op, longtext.substring(i + shorttext.length())));
+      }
+      longtext = shorttext = null; // Garbage collect
 
+      // Check to see if the problem can be split in two.
+      CommonMiddle hm = halfMatch(text1, text2);
+      if (hm != null)
+      {
+        // A half-match was found, sort out the return data.
+        // Send both pairs off for separate processing.
+        List diffs_a = main(hm.getLeftStart(), hm.getRightStart(), checklines);
+        List diffs_b = main(hm.getLeftEnd(), hm.getRightEnd(), checklines);
+        // Merge the results.
+        diffs = diffs_a;
+        diffs.add(new Difference(EditType.EQUAL, hm.getCommonality()));
+        diffs.addAll(diffs_b);
+        return diffs;
+      }
 
-  // Work from the middle back to the start to determine the path.
-  public static List path1(Map v_map, String text1, String text2) {
-      return null;
-//var path = [];
-//var x = text1.length;
-//var y = text2.length;
-//var last_op = null;
-//for (var d=v_map.length-2; d>=0; d--) {
-//while(1) {
-//  if (v_map[d].hasOwnProperty ? v_map[d].hasOwnProperty((x-1)+","+y) : (v_map[d][(x-1)+","+y] !== undefined)) {
-//    x--;
-//    if (last_op === EditType.DELETE)
-//      path[0][1] = text1.charAt(x) + path[0][1];
-//    else
-//      path.unshift(new Difference(EditType.DELETE, text1.charAt(x)]);
-//    last_op = EditType.DELETE;
-//    break;
-//  } else if (v_map[d].hasOwnProperty ? v_map[d].hasOwnProperty(x+","+(y-1)) : (v_map[d][x+","+(y-1)] !== undefined)) {
-//    y--;
-//    if (last_op === EditType.INSERT)
-//      path[0][1] = text2.charAt(y) + path[0][1];
-//    else
-//      path.unshift(new Difference(EditType.INSERT, text2.charAt(y)]);
-//    last_op = EditType.INSERT;
-//    break;
-//  } else {
-//    x--;
-//    y--;
-//    //if (text1.charAt(x) != text2.charAt(y))
-//    //  return alert("No diagonal.  Can't happen. (Diff.path1)");
-//    if (last_op === EditType.EQUAL)
-//      path[0][1] = text1.charAt(x) + path[0][1];
-//    else
-//      path.unshift(new Difference(EditType.EQUAL, text1.charAt(x)]);
-//    last_op = EditType.EQUAL;
-//  }
-//}
-//}
-//return path;
-}
+      // Perform a real diff.
+      if (checklines && text1.length() + text2.length() < 250)
+      {
+        checklines = false; // Too trivial for the overhead.
+      }
 
+      ArrayList linearray = null;
+      if (checklines)
+      {
+        // Scan the text on a line-by-line basis first.
+        Object b[] = linesToChars(text1, text2);
+        text1 = (String) b[0];
+        text2 = (String) b[1];
+        linearray = (ArrayList) b[2];
+       }
 
-  // Work from the middle back to the end to determine the path.
-public static List path2(Map v_map, String text1, String text2) {
-    return null;
-//var path = [];
-//var x = text1.length;
-//var y = text2.length;
-//var last_op = null;
-//for (var d=v_map.length-2; d>=0; d--) {
-//while(1) {
-//  if (v_map[d].hasOwnProperty ? v_map[d].hasOwnProperty((x-1)+","+y) : (v_map[d][(x-1)+","+y] !== undefined)) {
-//    x--;
-//    if (last_op === EditType.DELETE)
-//      path[path.length-1][1] += text1.charAt(text1.length-x-1);
-//    else
-//      path.push(new Difference(EditType.DELETE, text1.charAt(text1.length-x-1)]);
-//    last_op = EditType.DELETE;
-//    break;
-//  } else if (v_map[d].hasOwnProperty ? v_map[d].hasOwnProperty(x+","+(y-1)) : (v_map[d][x+","+(y-1)] !== undefined)) {
-//    y--;
-//    if (last_op === EditType.INSERT)
-//      path[path.length-1][1] += text2.charAt(text2.length-y-1);
-//    else
-//      path.push(new Difference(EditType.INSERT, text2.charAt(text2.length-y-1)]);
-//    last_op = EditType.INSERT;
-//    break;
-//  } else {
-//    x--;
-//    y--;
-//    //if (text1.charAt(text1.length-x-1) != text2.charAt(text2.length-y-1))
-//    //  return alert("No diagonal.  Can't happen. (Diff.path2)");
-//    if (last_op === EditType.EQUAL)
-//      path[path.length-1][1] += text1.charAt(text1.length-x-1);
-//    else
-//      path.push(new Difference(EditType.EQUAL, text1.charAt(text1.length-x-1)]);
-//    last_op = EditType.EQUAL;
-//  }
-//}
-//}
-//return path;
-}
+      diffs = map(text1, text2);
+      if (diffs == null)
+      {
+        // No acceptable result.
+        diffs = new ArrayList();
+        diffs.add(new Difference(EditType.DELETE, text1));
+        diffs.add(new Difference(EditType.INSERT, text2));
+      }
 
-// Trim off common prefix
-public static CommonEnd prefix(String text1, String text2) {
-    int pointermin = 0;
-    int pointermax = Math.min(text1.length(), text2.length());
-    int pointermid = pointermax;
-    while (pointermin < pointermid)
-    {
-        if (text1.substring(0, pointermid) == text2.substring(0, pointermid))
+      if (checklines)
+      {
+        // Convert the diff back to original text.
+        charsToLines(diffs, linearray);
+        // Eliminate freak matches (e.g. blank lines)
+        cleanupSemantic(diffs);
+
+        // Rediff any replacement blocks, this time character-by-character.
+        // Add a dummy entry at the end.
+        diffs.add(new Difference(EditType.EQUAL, "")); //$NON-NLS-1$
+        int count_delete = 0;
+        int count_insert = 0;
+        String text_delete = ""; //$NON-NLS-1$
+        String text_insert = ""; //$NON-NLS-1$
+        ListIterator pointer = diffs.listIterator();
+        Difference thisDiff = (Difference) pointer.next();
+        while (thisDiff != null)
         {
-            pointermin = pointermid;
+          if (thisDiff.getEditType() == EditType.INSERT)
+          {
+            count_insert++;
+            text_insert += thisDiff.getText();
+          }
+          else if (thisDiff.getEditType() == EditType.DELETE)
+          {
+            count_delete++;
+            text_delete += thisDiff.getText();
+          }
+          else
+          {
+            // Upon reaching an equality, check for prior redundancies.
+            if (count_delete >= 1 && count_insert >= 1)
+            {
+              // Delete the offending records and add the merged ones.
+              pointer.previous();
+              for (int j = 0; j < count_delete + count_insert; j++) {
+                pointer.previous();
+                pointer.remove();
+              }
+              List newDiffs = main(text_delete, text_insert, false);
+              Iterator iter = newDiffs.iterator();
+              while (iter.hasNext())
+              {
+                pointer.add(iter.next());
+              }
+            }
+            count_insert = 0;
+            count_delete = 0;
+            text_delete = ""; //$NON-NLS-1$
+            text_insert = ""; //$NON-NLS-1$
+          }
+          thisDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
         }
-        else
-        {
-            pointermax = pointermid;
-        }
-        pointermid = (int) Math.floor((pointermax - pointermin) / 2 + pointermin);
+        diffs.remove(diffs.size() - 1);  // Remove the dummy entry at the end.
+      }
+      return diffs;
     }
-    String commonprefix = text1.substring(0, pointermid);
-    String left = text1.substring(pointermid);
-    String right = text2.substring(pointermid);
-    return new CommonPrefix(left, right, commonprefix);
-}
 
 
-    // Trim off common suffix
-public static CommonEnd suffix(String text1, String text2)
-{
-    int pointermin = 0;
-    int pointermax = Math.min(text1.length(), text2.length());
-    int pointermid = pointermax;
-    while (pointermin < pointermid)
+    /**
+     * Split two texts into a list of strings.  Reduce the texts to a string of
+     * hashes where each Unicode character represents one line.
+     * @param text1 First string
+     * @param text2 Second string
+     * @return Three element Object array, containing the encoded text1, the
+     *     encoded text2 and the List of unique strings.  The zeroth element
+     *     of the List of unique strings is intentionally blank.
+     */
+    public static Object[] linesToChars(String text1, String text2)
     {
-        if (text1.substring(text1.length() - pointermid) == text2.substring(text2.length() - pointermid))
+        List linearray = new ArrayList();
+        Map linehash = new HashMap();
+        // e.g. linearray[4] == "Hello\n"
+        // e.g. linehash.get("Hello\n") == 4
+
+        // "\x00" is a valid character, but various debuggers don't like it.
+        // So we'll insert a junk entry to avoid generating a null character.
+        linearray.add(""); //$NON-NLS-1$
+
+        String chars1 = linesToCharsMunge(text1, linearray, linehash);
+        String chars2 = linesToCharsMunge(text2, linearray, linehash);
+        return new Object[]{chars1, chars2, linearray};
+    }
+
+    /**
+     * Split a text into a list of strings.  Reduce the texts to a string of
+     * hashes where each Unicode character represents one line.
+     * @param text String to encode
+     * @param linearray List of unique strings
+     * @param linehash Map of strings to indices
+     * @return Encoded string
+     */
+    private static String linesToCharsMunge(String text, List linearray, Map linehash)
+    {
+        int i;
+        String line;
+        String chars = ""; //$NON-NLS-1$
+        String work = text;
+        // text.split('\n') would work fine, but would temporarily double our
+        // memory footprint for minimal speed improvement.
+        while (work.length() != 0)
         {
-            pointermin = pointermid;
+            i = work.indexOf('\n');
+            if (i == -1)
+            {
+                i = work.length() - 1;
+            }
+            line = work.substring(0, i + 1);
+            work = work.substring(i + 1);
+            if (linehash.containsKey(line))
+            {
+                Integer charInt = (Integer) linehash.get(line);
+                chars += String.valueOf((char) charInt.intValue());
+            }
+            else
+            {
+                linearray.add(line);
+                linehash.put(line, new Integer(linearray.size() - 1));
+                chars += String.valueOf((char) (linearray.size() - 1));
+            }
         }
-        else
+        return chars;
+    }
+
+    /**
+     * Rehydrate the text in a diff from a string of line hashes to real lines of
+     * text.
+     * @param diffs LinkedList of Diff objects
+     * @param linearray List of unique strings
+     */
+    public static void charsToLines(List diffs, List linearray)
+    {
+        String chars;
+        StringBuffer text = new StringBuffer();
+        for (int x = 0; x < diffs.size(); x++)
         {
-            pointermax = pointermid;
+            Difference diff = (Difference) diffs.get(x);
+            chars = diff.getText();
+
+            for (int y = 0; y < chars.length(); y++)
+            {
+                text.append(linearray.get(chars.charAt(y)));
+            }
+
+            diff.setText(text.toString());
         }
-        pointermid = (int) Math.floor((pointermax - pointermin) / 2 + pointermin);
     }
-    String commonsuffix = text1.substring(text1.length() - pointermid);
-    String left = text1.substring(0, text1.length() - pointermid);
-    String right = text2.substring(0, text2.length() - pointermid);
-    return new CommonSuffix(left, right, commonsuffix);
-}
 
-
-// Do the two texts share a substring which is at least half the length of the longer text?
-public static CommonMiddle halfmatch(String text1, String text2)
-{
-    String longtext = text1.length() > text2.length() ? text1 : text2;
-    String shorttext = text1.length() > text2.length() ? text2 : text1;
-    if (longtext.length() < 10 || shorttext.length() < 1)
+    /**
+     * Explore the intersection points between the two texts.
+     * @param text1 Old string to be diffed
+     * @param text2 New string to be diffed
+     * @return LinkedList of Diff objects or null if no diff available
+     */
+    public static List map(String text1, String text2)
     {
-        return null; // Pointless.
-    }
+        long ms_end = System.currentTimeMillis() + (long) (TIMEOUT * 1000);
+        int max_d = (text1.length() + text2.length()) / 2;
+        List v_map1 = new ArrayList();
+        List v_map2 = new ArrayList();
+        Map v1 = new HashMap();
+        Map v2 = new HashMap();
+        v1.put(new Integer(1), new Integer(0));
+        v2.put(new Integer(1), new Integer(0));
+        int x, y;
+        String footstep; // Used to track overlapping paths.
+        Map footsteps = new HashMap();
+        boolean done = false;
+        // If the total number of characters is odd, then the front path will
+        // collide with the reverse path.
+        boolean front = ((text1.length() + text2.length()) % 2 == 1);
+        for (int d = 0; d < max_d; d++)
+        {
+            // Bail out if timeout reached.
+            if (TIMEOUT > 0 && System.currentTimeMillis() > ms_end)
+            {
+                return null;
+            }
 
-    // First check if the second quarter is the seed for a half-match.
-    CommonMiddle hm1 = halfmatch_i(longtext, shorttext, (int) Math.ceil(longtext.length()/4));
-    // Check again based on the third quarter.
-    CommonMiddle hm2 = halfmatch_i(longtext, shorttext, (int) Math.ceil(longtext.length()/2));
-    CommonMiddle hm = null;
-    if (hm1 == null && hm2 == null)
-    {
+            // Walk the front path one step.
+            v_map1.add(new HashSet()); // Adds at index 'd'.
+            for (int k = -d; k <= d; k += 2)
+            {
+                Integer kPlus1Key = new Integer(k + 1);
+                Integer kPlus1Value = (Integer) v1.get(kPlus1Key);
+                Integer kMinus1Key = new Integer(k + 1);
+                Integer kMinus1Value = (Integer) v1.get(kMinus1Key);
+                if (k == -d || k != d && kMinus1Value.intValue() < kPlus1Value.intValue())
+                {
+                    x = kPlus1Value.intValue();
+                }
+                else
+                {
+                    x = kMinus1Value.intValue() + 1;
+                }
+                y = x - k;
+                footstep = x + "," + y; //$NON-NLS-1$
+                if (front && (footsteps.containsKey(footstep)))
+                {
+                    done = true;
+                }
+                if (!front)
+                {
+                    footsteps.put(footstep, new Integer(d));
+                }
+                while (!done && x < text1.length() && y < text2.length()
+                                && text1.charAt(x) == text2.charAt(y))
+                {
+                    x++;
+                    y++;
+                    footstep = x + "," + y; //$NON-NLS-1$
+                    if (front && (footsteps.containsKey(footstep)))
+                    {
+                        done = true;
+                    }
+                    if (!front)
+                    {
+                        footsteps.put(footstep, new Integer(d));
+                    }
+                }
+                v1.put(new Integer(k), new Integer(x));
+                Set s = (Set) v_map1.get(d);
+                s.add(x + "," + y); //$NON-NLS-1$
+                if (done)
+                {
+                    // Front path ran over reverse path.
+                    Integer footstepValue = (Integer) footsteps.get(footstep);
+                    v_map2 = v_map2.subList(0, footstepValue.intValue() + 1);
+                    List a = path1(v_map1, text1.substring(0, x),
+                                   text2.substring(0, y));
+                    a.addAll(path2(v_map2, text1.substring(x), text2.substring(y)));
+                    return a;
+                }
+            }
+
+            // Walk the reverse path one step.
+            v_map2.add(new HashSet()); // Adds at index 'd'.
+            for (int k = -d; k <= d; k += 2)
+            {
+                Integer kPlus1Key = new Integer(k + 1);
+                Integer kPlus1Value = (Integer) v2.get(kPlus1Key);
+                Integer kMinus1Key = new Integer(k + 1);
+                Integer kMinus1Value = (Integer) v2.get(kMinus1Key);
+                if (k == -d || k != d && kMinus1Value.intValue() < kPlus1Value.intValue())
+                {
+                    x = kPlus1Value.intValue();
+                }
+                else
+                {
+                    x = kMinus1Value.intValue() + 1;
+                }
+                y = x - k;
+                footstep = (text1.length() - x) + "," + (text2.length() - y); //$NON-NLS-1$
+                if (!front && (footsteps.containsKey(footstep)))
+                {
+                    done = true;
+                }
+                if (front)
+                {
+                    footsteps.put(footstep, new Integer(d));
+                }
+                while (!done && x < text1.length() && y < text2.length()
+                                && text1.charAt(text1.length() - x - 1)
+                                == text2.charAt(text2.length() - y - 1))
+                {
+                    x++;
+                    y++;
+                    footstep = (text1.length() - x) + "," + (text2.length() - y); //$NON-NLS-1$
+                    if (!front && (footsteps.containsKey(footstep)))
+                    {
+                        done = true;
+                    }
+                    if (front)
+                    {
+                        footsteps.put(footstep, new Integer(d));
+                    }
+                }
+
+                v2.put(new Integer(k), new Integer(x));
+                Set s = (Set) v_map2.get(d);
+                s.add(x + "," + y); //$NON-NLS-1$
+                if (done)
+                {
+                    // Reverse path ran over front path.
+                    Integer footstepValue = (Integer) footsteps.get(footstep);
+                    v_map1 = v_map1.subList(0, footstepValue.intValue() + 1);
+                    List a
+                    = path1(v_map1, text1.substring(0, text1.length() - x),
+                            text2.substring(0, text2.length() - y));
+                    a.addAll(path2(v_map2, text1.substring(text1.length() - x),
+                                   text2.substring(text2.length() - y)));
+                    return a;
+                }
+            }
+        }
+
+        // Number of diffs equals number of characters, no commonality at all.
         return null;
     }
-    else if (hm2 == null)
+
+    /**
+     * Work from the middle back to the start to determine the path.
+     * @param v_map List of path sets.
+     * @param text1 Old string fragment to be diffed
+     * @param text2 New string fragment to be diffed
+     * @return List of Diff objects
+     */
+    public static List path1(List v_map, String text1, String text2)
     {
-        hm = hm1;
+        List path = new ArrayList();
+        int x = text1.length();
+        int y = text2.length();
+        EditType last_op = null;
+        for (int d = v_map.size() - 2; d >= 0; d--)
+        {
+            while (true)
+            {
+                Set set = (Set) v_map.get(d);
+                if (set.contains((x - 1) + "," + y)) //$NON-NLS-1$
+                {
+                    x--;
+                    if (last_op == EditType.DELETE)
+                    {
+                        Difference firstDiff = (Difference) path.get(0);
+                        firstDiff.prependText(text1.charAt(x));
+                    }
+                    else
+                    {
+                        path.add(0, new Difference(EditType.DELETE, text1.substring(x, x + 1)));
+                    }
+                    last_op = EditType.DELETE;
+                    break;
+                }
+                else if (set.contains(x + "," + (y - 1))) //$NON-NLS-1$
+                {
+                    y--;
+                    if (last_op == EditType.INSERT)
+                    {
+                        Difference firstDiff = (Difference) path.get(0);
+                        firstDiff.prependText(text2.charAt(y));
+                    }
+                    else
+                    {
+                        path.add(0, new Difference(EditType.INSERT, text2.substring(y, y + 1)));
+                    }
+                    last_op = EditType.INSERT;
+                    break;
+                }
+                else
+                {
+                    x--;
+                    y--;
+                    assert (text1.charAt(x) == text2.charAt(y)) : "No diagonal.  Can't happen. (diff_path1)"; //$NON-NLS-1$
+                    if (last_op == EditType.EQUAL)
+                    {
+                        Difference firstDiff = (Difference) path.get(0);
+                        firstDiff.prependText(text1.charAt(x));
+                    }
+                    else
+                    {
+                        path.add(0, new Difference(EditType.EQUAL, text1.substring(x, x + 1)));
+                    }
+                    last_op = EditType.EQUAL;
+                }
+            }
+        }
+        return path;
     }
-    else if (hm1 == null)
+
+
+    /**
+     * Work from the middle back to the end to determine the path.
+     * @param v_map List of path sets.
+     * @param text1 Old string fragment to be diffed
+     * @param text2 New string fragment to be diffed
+     * @return List of Diff objects
+     */
+    public static List path2(List v_map, String text1, String text2)
     {
-        hm = hm2;
+        List path = new ArrayList();
+        int x = text1.length();
+        int y = text2.length();
+        EditType last_op = null;
+        for (int d = v_map.size() - 2; d >= 0; d--)
+        {
+            while (true)
+            {
+                Set set = (Set) v_map.get(d);
+                if (set.contains((x - 1) + "," + y)) //$NON-NLS-1$
+                {
+                    x--;
+                    if (last_op == EditType.DELETE)
+                    {
+                        Difference lastDiff = (Difference) path.get(path.size() - 1);
+                        lastDiff.appendText(text1.charAt(text1.length() - x - 1));
+                    }
+                    else
+                    {
+                        path.add(new Difference(EditType.DELETE,
+                                                text1.substring(text1.length() - x - 1, text1.length() - x)));
+                    }
+                    last_op = EditType.DELETE;
+                    break;
+                }
+                else if (set.contains(x + "," + (y - 1))) //$NON-NLS-1$
+                {
+                    y--;
+                    if (last_op == EditType.INSERT)
+                    {
+                        Difference lastDiff = (Difference) path.get(path.size() - 1);
+                        lastDiff.appendText(text2.charAt(text2.length() - y - 1));
+                    }
+                    else
+                    {
+                        path.add(new Difference(EditType.INSERT,
+                                                text2.substring(text2.length() - y - 1, text2.length() - y)));
+                    }
+                    last_op = EditType.INSERT;
+                    break;
+                }
+                else
+                {
+                    x--;
+                    y--;
+                    assert (text1.charAt(text1.length() - x - 1)
+                            == text2.charAt(text2.length() - y - 1))
+                            : "No diagonal.  Can't happen. (diff_path2)"; //$NON-NLS-1$
+
+                    if (last_op == EditType.EQUAL)
+                    {
+                        Difference lastDiff = (Difference) path.get(path.size() - 1);
+                        lastDiff.appendText(text1.charAt(text1.length() - x - 1));
+                    }
+                    else
+                    {
+                        path.add(new Difference(EditType.EQUAL,
+                                                text1.substring(text1.length() - x - 1, text1.length() - x)));
+                    }
+                    last_op = EditType.EQUAL;
+                }
+            }
+        }
+        return path;
     }
-    else // Both matched.  Select the longest.
+
+    /**
+     * Trim off common prefix
+     * @param text1 First string
+     * @param text2 Second string
+     * @return The number of characters common to the start of each string.
+     */
+    public static int commonPrefix(String text1, String text2)
     {
-        hm = hm1.getCommonality().length() > hm2.getCommonality().length() ? hm1 : hm2;
+        int pointermin = 0;
+        int pointermax = Math.min(text1.length(), text2.length());
+        int pointermid = pointermax;
+        while (pointermin < pointermid)
+        {
+            if (text1.substring(0, pointermid) == text2.substring(0, pointermid))
+            {
+                pointermin = pointermid;
+            }
+            else
+            {
+                pointermax = pointermid;
+            }
+            pointermid = (pointermax - pointermin) / 2 + pointermin;
+        }
+        return pointermid;
     }
 
-    String text1_a = ""; //$NON-NLS-1$
-    String text1_b = ""; //$NON-NLS-1$
-    String text2_a = ""; //$NON-NLS-1$
-    String text2_b = ""; //$NON-NLS-1$
 
-    // A half-match was found, sort out the return data.
-    if (text1.length() > text2.length())
+    /**
+     * Trim off common suffix
+     * @param text1 First string
+     * @param text2 Second string
+     * @return The number of characters common to the end of each string.
+     */
+    public static int commonSuffix(String text1, String text2)
     {
-        text1_a = hm.getLeftStart();
-        text1_b = hm.getRightStart();
-        text2_a = hm.getLeftEnd();
-        text2_b = hm.getRightEnd();
+        int pointermin = 0;
+        int pointermax = Math.min(text1.length(), text2.length());
+        int pointermid = pointermax;
+        while (pointermin < pointermid)
+        {
+            if (text1.substring(text1.length() - pointermid) == text2.substring(text2.length() - pointermid))
+            {
+                pointermin = pointermid;
+            }
+            else
+            {
+                pointermax = pointermid;
+            }
+            pointermid = (pointermax - pointermin) / 2 + pointermin;
+        }
+        return pointermid;
     }
-    else
+
+
+    /**
+     * Do the two texts share a substring which is at least half the length of the
+     * longer text?
+     * @param text1 First string
+     * @param text2 Second string
+     * @return Five element String array, containing the prefix of text1, the
+     *     suffix of text1, the prefix of text2, the suffix of text2 and the
+     *     common middle.  Or null if there was no match.
+     */
+    public static CommonMiddle halfMatch(String text1, String text2)
     {
-        text2_a = hm.getLeftStart();
-        text2_b = hm.getRightStart();
-        text1_a = hm.getRightEnd();
-        text1_b = hm.getRightEnd();
+        String longtext = text1.length() > text2.length() ? text1 : text2;
+        String shorttext = text1.length() > text2.length() ? text2 : text1;
+        int longtextLength = longtext.length();
+        if (longtextLength < 10 || shorttext.length() < 1)
+        {
+            return null; // Pointless.
+        }
+
+        // First check if the second quarter is the seed for a half-match.
+        CommonMiddle hm1 = halfMatch_i(longtext, shorttext, (int) Math.ceil(longtextLength/4));
+        // Check again based on the third quarter.
+        CommonMiddle hm2 = halfMatch_i(longtext, shorttext, (int) Math.ceil(longtextLength/2));
+        CommonMiddle hm = null;
+        if (hm1 == null && hm2 == null)
+        {
+            return null;
+        }
+        else if (hm2 == null)
+        {
+            hm = hm1;
+        }
+        else if (hm1 == null)
+        {
+            hm = hm2;
+        }
+        else // Both matched.  Select the longest.
+        {
+            hm = hm1.getCommonality().length() > hm2.getCommonality().length() ? hm1 : hm2;
+        }
+
+        // A half-match was found, sort out the return data.
+        if (text1.length() > text2.length())
+        {
+            return hm;
+        }
+        return new CommonMiddle(hm.getRightEnd(), hm.getRightEnd(), hm.getCommonality(), hm.getLeftStart(), hm.getRightStart());
     }
-    String mid_common = hm.getCommonality();
-    return new CommonMiddle(text1_a, text1_b, mid_common, text2_a, text2_b);
-}
 
-    private static CommonMiddle halfmatch_i(String longtext, String shorttext, int i)
+    /**
+     * Does a substring of shorttext exist within longtext such that the substring
+     * is at least half the length of longtext?
+     * @param longtext Longer string
+     * @param shorttext Shorter string
+     * @param i Start index of quarter length substring within longtext
+     * @return Five element String array, containing the prefix of longtext, the
+     *     suffix of longtext, the prefix of shorttext, the suffix of shorttext
+     *     and the common middle.  Or null if there was no match.
+     */
+    private static CommonMiddle halfMatch_i(String longtext, String shorttext, int i)
     {
         // Start with a 1/4 length substring at position i as a seed.
-        String seed = longtext.substring(i, i + (int) Math.floor(longtext.length()/4));
+        String seed = longtext.substring(i, i + (longtext.length()/4));
         int j = -1;
         String best_common = ""; //$NON-NLS-1$
         String best_longtext_a = ""; //$NON-NLS-1$
@@ -509,15 +759,15 @@
         String best_shorttext_b = ""; //$NON-NLS-1$
         while ((j = shorttext.indexOf(seed, j + 1)) != -1)
         {
-            CommonEnd my_prefix = Diff.prefix(longtext.substring(i), shorttext.substring(j));
-            CommonEnd my_suffix = Diff.suffix(longtext.substring(0, i), shorttext.substring(0, j));
-            if (best_common.length() < (my_suffix.getCommonality() + my_prefix.getCommonality()).length())
+            int prefixLength = Diff.commonPrefix(longtext.substring(i), shorttext.substring(j));
+            int suffixLength = Diff.commonSuffix(longtext.substring(0, i), shorttext.substring(0, j));
+            if (best_common.length() < (prefixLength + suffixLength))
             {
-                best_common = my_suffix.getCommonality() + my_prefix.getCommonality();
-                best_longtext_a = my_suffix.getLeft();
-                best_longtext_b = my_prefix.getLeft();
-                best_shorttext_a = my_suffix.getRight();
-                best_shorttext_b = my_prefix.getRight();
+                best_common = shorttext.substring(j - suffixLength, j) + shorttext.substring(j, j + prefixLength);
+                best_longtext_a = longtext.substring(0, i - suffixLength);
+                best_longtext_b = longtext.substring(i + prefixLength);
+                best_shorttext_a = shorttext.substring(0, j - suffixLength);
+                best_shorttext_b = shorttext.substring(j + prefixLength);
             }
         }
 
@@ -530,12 +780,15 @@
     }
 
 
-    //Reduce the number of edits by eliminating semantically trivial equalities.
-    public static void cleanup_semantic(List diffs)
+    /**
+     * Reduce the number of edits by eliminating semantically trivial equalities.
+     * @param diffs LinkedList of Diff objects
+     */
+    public static void cleanupSemantic(List diffs)
     {
         boolean changes = false;
         Stack equalities = new Stack(); // Stack of indices where equalities are found.
-        String lastequality = ""; //$NON-NLS-1$ // Always equal to diff[equalities[equalities.length-1]].getText()
+        String lastequality = ""; //$NON-NLS-1$ // Always equal to equalities.lastElement().getText()
         int length_changes1 = 0; // Number of characters that changed prior to the equality.
         int length_changes2 = 0; // Number of characters that changed after the equality.
         ListIterator pointer = diffs.listIterator();
@@ -555,7 +808,7 @@
             {
                 // an insertion or deletion
                 length_changes2 += curDiff.getText().length();
-                int lastLen = lastequality.length();
+                int lastLen = lastequality != null ? lastequality.length() : 0;
                 if (lastequality != null && (lastLen <= length_changes1) && (lastLen <= length_changes2))
                 {
                     // position pointer to the element after the one at the end of the stack
@@ -580,7 +833,7 @@
                         // There are no previous equalities, walk back to the start.
                         while (pointer.hasPrevious())
                         {
-                          pointer.previous();
+                            pointer.previous();
                         }
                     }
                     else
@@ -604,33 +857,42 @@
 
         if (changes)
         {
-            Diff.cleanup_merge(diffs);
+            Diff.cleanupMerge(diffs);
         }
     }
 
+    /**
+     * Reduce the number of edits by eliminating operationally trivial equalities.
+     * @param diffs LinkedList of Diff objects
+     */
+    public static void cleanupEfficiency(List diffs)
+    {
+        if (diffs.isEmpty())
+        {
+            return;
+        }
 
-    // Reduce the number of edits by eliminating operationally trivial equalities.
-    public static void cleanup_efficiency(List diff)
-    {
         boolean changes = false;
-        IntStack equalities = new IntStack(); // Stack of indices where equalities are found.
-        String lastequality = ""; //$NON-NLS-1$ // Always equal to diff[equalities[equalities.length-1]].getText();
-        int pointer = 0; // Index of current position.
+        Stack equalities = new Stack(); // Stack of indices where equalities are found.
+        String lastequality = null; // Always equal to equalities.lastElement().getText();
         int pre_ins = 0; // Is there an insertion operation before the last equality.
         int pre_del = 0; // Is there an deletion operation before the last equality.
         int post_ins = 0; // Is there an insertion operation after the last equality.
         int post_del = 0; // Is there an deletion operation after the last equality.
 
-        while (pointer < diff.size())
+        ListIterator pointer = diffs.listIterator();
+        Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
+        Difference safeDiff = curDiff; // The last Diff that is known to be unsplitable.
+
+        while (curDiff != null)
         {
-            Difference curDiff = (Difference) diff.get(pointer);
             EditType diff_type = curDiff.getEditType();
             if (EditType.EQUAL.equals(diff_type)) // equality found
             {
-                if (curDiff.getText().length() < DIFF_EDIT_COST && (post_ins == 1 || post_del == 1))
+                if (curDiff.getText().length() < EDIT_COST && (post_ins + post_del) > 0)
                 {
                     // Candidate found.
-                    equalities.push(pointer);
+                    equalities.push(curDiff);
                     pre_ins = post_ins;
                     pre_del = post_del;
                     lastequality = curDiff.getText();
@@ -640,6 +902,7 @@
                     // Not a candidate, and can never become one.
                     equalities.clear();
                     lastequality = ""; //$NON-NLS-1$
+                    safeDiff = curDiff;
                 }
                 post_ins = 0;
                 post_del = 0;
@@ -662,166 +925,197 @@
                 // <ins>A</ins><del>B</del>X<ins>C</ins>
                 // <ins>A</del>X<ins>C</ins><del>D</del>
                 // <ins>A</ins><del>B</del>X<del>C</del>
-                if (lastequality.length() > 0 && (((pre_ins + pre_del + post_ins + post_del) > 0) || ((lastequality.length() < DIFF_EDIT_COST/2) && (pre_ins + pre_del + post_ins + post_del) == 3)))
+                if (lastequality != null && (((pre_ins + pre_del + post_ins + post_del) > 0) || ((lastequality.length() < EDIT_COST/2) && (pre_ins + pre_del + post_ins + post_del) == 3)))
                 {
-                    int newLoc = equalities.peek();
-                    Difference newLocDifference = (Difference) diff.get(newLoc);
-                    diff.add(newLoc, new Difference(EditType.DELETE, lastequality)); // Duplicate record
-                    newLocDifference.setEditType(EditType.INSERT); // Change following to insert.
+                    // position pointer to the element after the one at the end of the stack
+                    while (curDiff != equalities.lastElement()) {
+                        curDiff = (Difference) pointer.previous();
+                    }
+                    pointer.next();
+
+                    // Replace equality with a delete.
+                    pointer.set(new Difference(EditType.DELETE, lastequality));
+                    // Insert a coresponding an insert.
+                    curDiff = new Difference(EditType.INSERT, lastequality);
+                    pointer.add(curDiff);
+
                     equalities.pop();  // Throw away the equality we just deleted;
-                    lastequality = ""; //$NON-NLS-1$
+                    lastequality = null;
                     if (pre_ins == 1 && pre_del == 1)
                     {
                         // No changes made which could affect previous entry, keep going.
                         post_ins = 1;
                         post_del = 1;
                         equalities.clear();
+                        safeDiff = curDiff;
                     }
                     else
                     {
-                        equalities.pop();  // Throw away the previous equality;
-                        pointer = equalities.empty() ? -1 : equalities.peek();
+                        if (!equalities.empty())
+                        {
+                            // Throw away the previous equality;
+                            equalities.pop();
+                        }
+                        if (equalities.empty())
+                        {
+                            // There are no previous questionable equalities,
+                            // walk back to the last known safe diff.
+                            curDiff = safeDiff;
+                        }
+                        else
+                        {
+                            // There is an equality we can fall back to.
+                            curDiff = (Difference) equalities.lastElement();
+                        }
+                        while (curDiff != pointer.previous())
+                        {
+                            // Intentionally empty loop.
+                        }
+
                         post_ins = 0;
                         post_del = 0;
                     }
                     changes = true;
                 }
             }
-            pointer++;
+            curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
         }
 
         if (changes)
         {
-            Diff.cleanup_merge(diff);
+            Diff.cleanupMerge(diffs);
         }
     }
 
 
-    // Reorder and merge like edit sections.  Merge equalities.
-    // Any edit section can move as long as it doesn't cross an equality.
-    public static void cleanup_merge(List diff)
+    /**
+     * Reorder and merge like edit sections.  Merge equalities.
+     * Any edit section can move as long as it doesn't cross an equality.
+     * @param diffs LinkedList of Diff objects
+     */
+    public static void cleanupMerge(List diffs)
     {
         // Add a dummy entry at the end.
-        diff.add(new Difference(EditType.EQUAL, ""));  //$NON-NLS-1$
-        int pointer = 0;
+        diffs.add(new Difference(EditType.EQUAL, ""));  //$NON-NLS-1$
+
         int count_delete = 0;
         int count_insert = 0;
         String text_delete = ""; //$NON-NLS-1$
         String text_insert = ""; //$NON-NLS-1$
-        CommonEnd my_xfix = null;
-        while (pointer < diff.size())
+
+        int commonLength = 0;
+
+        ListIterator pointer = diffs.listIterator();
+        Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
+        Difference prevEqual = null;
+        while (curDiff != null)
         {
-            Difference curDiff = (Difference) diff.get(pointer);
-            if (curDiff.getEditType() == EditType.INSERT)
+            EditType diff_type = curDiff.getEditType();
+            if (EditType.INSERT.equals(diff_type))
             {
                 count_insert++;
                 text_insert += curDiff.getText();
-                pointer++;
+                prevEqual = null;
             }
-            else if (curDiff.getEditType() == EditType.DELETE)
+            else if (EditType.DELETE.equals(diff_type))
             {
                 count_delete++;
                 text_delete += curDiff.getText();
-                pointer++;
+                prevEqual = null;
             }
-            else
+            else if (EditType.EQUAL.equals(diff_type))
             {
                 // Upon reaching an equality, check for prior redundancies.
                 if (count_delete != 0 || count_insert != 0)
                 {
-                    int modSize = count_delete + count_insert;
-                    int modStart = pointer - modSize;
+                    // Delete the offending records.
+                    pointer.previous(); // Reverse direction.
+                    while (count_delete-- > 0) {
+                      pointer.previous();
+                      pointer.remove();
+                    }
+                    while (count_insert-- > 0) {
+                      pointer.previous();
+                      pointer.remove();
+                    }
+
                     if (count_delete != 0 && count_insert != 0)
                     {
                         // Factor out any common prefixies.
-                        my_xfix = Diff.prefix(text_insert, text_delete);
-                        if (my_xfix.getCommonality().length() > 0)
+                        commonLength = Diff.commonPrefix(text_insert, text_delete);
+                        if (commonLength > 0)
                         {
-                            Difference preModDifference = null;
-                            if (modStart > 0)
+                            if (pointer.hasPrevious())
                             {
-                                preModDifference = (Difference) diff.get(modStart - 1);
+                                curDiff = (Difference) pointer.previous();
+                                assert curDiff.getEditType() == EditType.EQUAL : "Previous diff should have been an equality."; //$NON-NLS-1$
+                                curDiff.appendText(text_insert.substring(0, commonLength));
+                                pointer.next();
                             }
-                            if (preModDifference != null && EditType.EQUAL.equals(preModDifference.getEditType()))
-                            {
-                                preModDifference.appendText(my_xfix.getCommonality());
-                            }
                             else
                             {
-                                // TODO(DMS): Is this correct? shouldn't it be modStart??? was splice(0,0,...)
-                                diff.add(0, new Difference(EditType.EQUAL, my_xfix.getCommonality()));
-                                pointer++;
+                                pointer.add(new Difference(EditType.EQUAL, text_insert.substring(0, commonLength)));
                             }
-                            text_insert = my_xfix.getLeft();
-                            text_delete = my_xfix.getRight();
+                            text_insert = text_insert.substring(commonLength);
+                            text_delete = text_delete.substring(commonLength);
                         }
 
                         // Factor out any common suffixies.
-                        my_xfix = Diff.suffix(text_insert, text_delete);
-                        if (my_xfix.getCommonality().length() > 0)
+                        commonLength = Diff.commonSuffix(text_insert, text_delete);
+                        if (commonLength > 0)
                         {
-                            text_insert = my_xfix.getLeft();
-                            text_delete = my_xfix.getRight();
-                            curDiff.setText(my_xfix.getCommonality() + curDiff.getText());
+                            curDiff = (Difference) pointer.next();
+                            curDiff.prependText(text_insert.substring(text_insert.length() - commonLength));
+                            text_insert = text_insert.substring(0, text_insert.length() - commonLength);
+                            text_delete = text_delete.substring(0, text_delete.length() - commonLength);
+                            pointer.previous();
                         }
                     }
 
-                    // Delete the offending records and add the merged ones.
-                    diff.subList(modStart, modSize).clear();
-                    if (count_delete == 0)
+                    // Insert the merged records.
+                    if (text_delete.length() != 0)
                     {
-                        diff.add(modStart, new Difference(EditType.INSERT, text_insert));
+                        pointer.add(new Difference(EditType.DELETE, text_delete));
                     }
-                    else if (count_insert == 0)
+
+                    if (text_insert.length() != 0)
                     {
-                        diff.add(modStart, new Difference(EditType.DELETE, text_delete));
+                        pointer.add(new Difference(EditType.INSERT, text_insert));
                     }
-                    else
-                    {
-                        diff.add(modStart, new Difference(EditType.DELETE, text_delete));
-                        diff.add(modStart + 1, new Difference(EditType.INSERT, text_insert));
-                    }
 
-                    // Adjust pointer to account for the delete and the addition of one of two Differences
-                    // And to point to the next
-                    pointer = pointer - modSize + (count_delete > 0 ? 1 : 0) + (count_insert > 0 ? 1 : 0) + 1;
+                    // Step forward to the equality.
+                    curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
                 }
-                else
+                else if (prevEqual != null)
                 {
-                    Difference prevDiff = null;
-                    if (pointer > 0)
-                    {
-                        prevDiff = (Difference) diff.get(pointer - 1);
-                    }
-
-                    if (prevDiff != null && EditType.EQUAL.equals(prevDiff.getEditType()))
-                    {
-                        // Merge this equality with the previous one.
-                        prevDiff.appendText(curDiff.getText());
-                        diff.remove(pointer);
-                    }
-                    else
-                    {
-                        pointer++;
-                    }
+                    // Merge this equality with the previous one.
+                    prevEqual.appendText(curDiff.getText());
+                    pointer.remove();
+                    curDiff = (Difference) pointer.previous();
+                    pointer.next(); // Forward direction
                 }
 
                 count_insert = 0;
                 count_delete = 0;
                 text_delete = ""; //$NON-NLS-1$
                 text_insert = ""; //$NON-NLS-1$
+                prevEqual = curDiff;
             }
+            curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
         }
 
-        Difference lastDiff = (Difference) diff.get(diff.size() - 1);
+        Difference lastDiff = (Difference) diffs.get(diffs.size() - 1);
         if (lastDiff.getText().length() == 0)
         {
-            diff.remove(diff.size() - 1);  // Remove the dummy entry at the end.
+            diffs.remove(diffs.size() - 1);  // Remove the dummy entry at the end.
         }
     }
 
-    //Add an index to each tuple, represents where the tuple is located in text2.
-    //e.g. [new Difference(EditType.DELETE, 'h', 0], new Difference(EditType.INSERT, 'c', 0], new Difference(EditType.EQUAL, 'at', 1]]
+    /**
+     * Add an index to each Diff, represents where the Diff is located in text2.
+     * e.g. [(DELETE, "h", 0), (INSERT, "c", 0), (EQUAL, "at", 1)]
+     * @param diffs LinkedList of Diff objects
+     */
     private static void addindex(List diffs)
     {
         int i = 0;
@@ -836,9 +1130,15 @@
         }
     }
 
-
-    //loc is a location in text1, compute and return the equivalent location in text2.
-    public static int xindex(List diffs, int loc)
+    /**
+     * loc is a location in text1, compute and return the equivalent location in
+     * text2.
+     * e.g. "The cat" vs "The big cat", 1->1, 5->8
+     * @param diffs LinkedList of Diff objects
+     * @param loc Location within text1
+     * @return Location within text2
+     */
+    public static int xIndex(List diffs, int loc)
     {
         // e.g. "The cat" vs "The big cat", 1->1, 5->8
         int chars1 = 0;
@@ -879,9 +1179,12 @@
         return last_chars2 + (loc - last_chars1);
     }
 
-
-    //Convert a diff array into a pretty HTML report.
-    public static String prettyhtml(List diffs)
+    /**
+     * Convert a Diff list into a pretty HTML report.
+     * @param diffs LinkedList of Diff objects
+     * @return HTML representation
+     */
+    public static String prettyHtml(List diffs)
     {
         Diff.addindex(diffs);
         StringBuffer buf = new StringBuffer();
@@ -923,85 +1226,6 @@
     }
 
     /**
-     * Represents a common prefix or a common suffix.
-     */
-    public static class CommonEnd
-    {
-        /**
-         * @param left
-         * @param right
-         * @param commonality
-         */
-        public CommonEnd(String left, String right, String commonality)
-        {
-            this.left = left;
-            this.right = right;
-            this.commonality = commonality;
-        }
-
-        /**
-         * @return the left
-         */
-        public String getLeft()
-        {
-            return left;
-        }
-
-        /**
-         * @return the right
-         */
-        public String getRight()
-        {
-            return right;
-        }
-
-        /**
-         * @return the commonality
-         */
-        public String getCommonality()
-        {
-            return commonality;
-        }
-
-        private String left;
-        private String right;
-        private String commonality;
-    }
-
-    /**
-     * Represents a common prefix
-     */
-    public static class CommonPrefix extends CommonEnd
-    {
-        /**
-         * @param left
-         * @param right
-         * @param commonality
-         */
-        public CommonPrefix(String left, String right, String commonality)
-        {
-            super(left, right, commonality);
-        }
-        
-    }
-
-    /**
-     * Represents a common suffix.
-     */
-    public static class CommonSuffix extends CommonEnd
-    {
-        /**
-         * @param left
-         * @param right
-         * @param commonality
-         */
-        public CommonSuffix(String left, String right, String commonality)
-        {
-            super(left, right, commonality);
-        }
-    }
-
-    /**
      * Represents a common middle.
      */
     public static class CommonMiddle
@@ -1073,11 +1297,11 @@
     /**
      * Number of seconds to map a diff before giving up.  (0 for infinity)
      */
-    public static final double DIFF_TIMEOUT = 1.0;
+    public static final double TIMEOUT = 1.0;
 
     /**
      * Cost of an empty edit operation in terms of edit characters.
      */
-    public static final int DIFF_EDIT_COST = 4;
+    public static final int EDIT_COST = 4;
 
 }

Modified: trunk/common/src/main/java/org/crosswire/common/diff/Difference.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/Difference.java	2007-05-21 20:43:38 UTC (rev 1336)
+++ trunk/common/src/main/java/org/crosswire/common/diff/Difference.java	2007-05-22 01:05:57 UTC (rev 1337)
@@ -50,7 +50,7 @@
      */
     public void setEditType(EditType newEditType)
     {
-        this.editType = newEditType;
+        editType = newEditType;
     }
 
     /**
@@ -66,7 +66,7 @@
      */
     public void setText(String newText)
     {
-        this.text = newText;
+        text = newText;
     }
 
     /**
@@ -82,7 +82,7 @@
      */
     public void setIndex(int newIndex)
     {
-        this.index = newIndex;
+        index = newIndex;
     }
 
     /**
@@ -90,10 +90,34 @@
      */
     public void appendText(String addText)
     {
-        this.text += addText;
+        text += addText;
     }
 
     /**
+     * @param text the text to set
+     */
+    public void appendText(char addText)
+    {
+        text += addText;
+    }
+
+    /**
+     * @param text the text to set
+     */
+    public void prependText(String addText)
+    {
+        text = addText + text;
+    }
+
+    /**
+     * @param text the text to set
+     */
+    public void prependText(char addText)
+    {
+        text = addText + text;
+    }
+
+    /**
      * The edit to perform
      */
     private EditType editType;

Modified: trunk/common/src/main/java/org/crosswire/common/diff/Patch.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/Patch.java	2007-05-21 20:43:38 UTC (rev 1336)
+++ trunk/common/src/main/java/org/crosswire/common/diff/Patch.java	2007-05-22 01:05:57 UTC (rev 1337)
@@ -21,8 +21,8 @@
         List diffs = Diff.main(text1, text2, true);
         if (diffs.size() > 2)
         {
-            Diff.cleanup_semantic(diffs);
-            Diff.cleanup_efficiency(diffs);
+            Diff.cleanupSemantic(diffs);
+            Diff.cleanupEfficiency(diffs);
         }
         return make(text1, text2, diffs);
     }
@@ -183,7 +183,7 @@
                         EditType editType = aDiff.getEditType();
                         if (!EditType.EQUAL.equals(editType))
                         {
-                            index2 = Diff.xindex(diffs, index1);
+                            index2 = Diff.xIndex(diffs, index1);
                         }
 
                         if (EditType.INSERT.equals(editType)) // Insertion
@@ -192,7 +192,7 @@
                         }
                         else if (EditType.DELETE.equals(editType)) // Deletion
                         {
-                            resultText = resultText.substring(0, start_loc + index2) + resultText.substring(start_loc + Diff.xindex(diffs, index1 + aDiff.getText().length()));
+                            resultText = resultText.substring(0, start_loc + index2) + resultText.substring(start_loc + Diff.xIndex(diffs, index1 + aDiff.getText().length()));
                         }
 
                         if (!EditType.DELETE.equals(editType))




More information about the jsword-svn mailing list