Coverage Report - org.crosswire.common.diff.DiffCleanup
 
Classes in this File Line Coverage Branch Coverage Complexity
DiffCleanup
0%
0/168
0%
0/110
11.8
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2005 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.common.diff;
 21  
 
 22  
 import java.util.List;
 23  
 import java.util.ListIterator;
 24  
 import java.util.Stack;
 25  
 
 26  
 /**
 27  
  * Various Strategies to cleanup a diff list.
 28  
  * 
 29  
  * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
 30  
  * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
 31  
  * 
 32  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 33  
  * @author DM Smith
 34  
  */
 35  0
 public final class DiffCleanup {
 36  
     /**
 37  
      * Utility class constructor.
 38  
      */
 39  0
     private DiffCleanup() {
 40  0
     }
 41  
 
 42  
     /**
 43  
      * Reduce the number of edits by eliminating semantically trivial
 44  
      * equalities.
 45  
      * 
 46  
      * @param diffs
 47  
      *            List of Difference objects
 48  
      */
 49  
     public static void cleanupSemantic(final List<Difference> diffs) {
 50  0
         boolean changes = false;
 51  0
         Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
 52  
         // found.
 53  0
         String lastEquality = null; // Always equal to
 54  
         // equalities.lastElement().getText()
 55  0
         int lengthChangesPre = 0; // Number of characters that changed prior to
 56  
         // the equality.
 57  0
         int lengthChangesPost = 0; // Number of characters that changed after
 58  
         // the equality.
 59  0
         ListIterator<Difference> pointer = diffs.listIterator();
 60  0
         Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
 61  0
         while (curDiff != null) {
 62  0
             EditType editType = curDiff.getEditType();
 63  0
             if (EditType.EQUAL.equals(editType)) {
 64  
                 // equality found
 65  0
                 equalities.push(curDiff);
 66  0
                 lengthChangesPre = lengthChangesPost;
 67  0
                 lengthChangesPost = 0;
 68  0
                 lastEquality = curDiff.getText();
 69  
             } else {
 70  
                 // an insertion or deletion
 71  0
                 lengthChangesPost += curDiff.getText().length();
 72  0
                 int lastLen = lastEquality != null ? lastEquality.length() : 0;
 73  0
                 if (lastEquality != null && lastLen <= lengthChangesPre && lastLen <= lengthChangesPost) {
 74  
                     // position pointer to the element after the one at the end
 75  
                     // of the stack
 76  0
                     while (curDiff != equalities.lastElement()) {
 77  0
                         curDiff = pointer.previous();
 78  
                     }
 79  0
                     pointer.next();
 80  
 
 81  
                     // Replace equality with a delete.
 82  0
                     pointer.set(new Difference(EditType.DELETE, lastEquality));
 83  
                     // Insert a corresponding an insert.
 84  0
                     pointer.add(new Difference(EditType.INSERT, lastEquality));
 85  0
                     equalities.pop(); // Throw away the equality we just
 86  
                     // deleted;
 87  0
                     if (!equalities.empty()) {
 88  
                         // Throw away the previous equality (it needs to be
 89  
                         // reevaluated).
 90  0
                         equalities.pop();
 91  
                     }
 92  0
                     if (equalities.empty()) {
 93  
                         // There are no previous equalities, walk back to the
 94  
                         // start.
 95  0
                         while (pointer.hasPrevious()) {
 96  0
                             pointer.previous();
 97  
                         }
 98  
                     } else {
 99  
                         // There is a safe equality we can fall back to.
 100  0
                         curDiff = equalities.lastElement();
 101  0
                         while (curDiff != pointer.previous()) {
 102  
                             // Intentionally empty loop.
 103  
                         }
 104  
                     }
 105  
 
 106  0
                     lengthChangesPre = 0; // Reset the counters.
 107  0
                     lengthChangesPost = 0;
 108  0
                     lastEquality = null;
 109  0
                     changes = true;
 110  
                 }
 111  
             }
 112  0
             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
 113  0
         }
 114  
 
 115  0
         if (changes) {
 116  0
             cleanupMerge(diffs);
 117  
         }
 118  0
     }
 119  
 
 120  
     /**
 121  
      * Reduce the number of edits by eliminating operationally trivial
 122  
      * equalities.
 123  
      * 
 124  
      * @param diffs
 125  
      *            List of Difference objects
 126  
      */
 127  
     public static void cleanupEfficiency(final List<Difference> diffs) {
 128  0
         if (diffs.isEmpty()) {
 129  0
             return;
 130  
         }
 131  
 
 132  0
         boolean changes = false;
 133  0
         Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
 134  
         // found.
 135  0
         String lastEquality = null; // Always equal to
 136  
         // equalities.lastElement().getText();
 137  0
         int preInsert = 0; // Is there an insertion operation before the last
 138  
         // equality.
 139  0
         int preDelete = 0; // Is there an deletion operation before the last
 140  
         // equality.
 141  0
         int postInsert = 0; // Is there an insertion operation after the last
 142  
         // equality.
 143  0
         int postDelete = 0; // Is there an deletion operation after the last
 144  
         // equality.
 145  
 
 146  0
         ListIterator<Difference> pointer = diffs.listIterator();
 147  0
         Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
 148  0
         Difference safeDiff = curDiff; // The last Difference that is known to
 149  
         // be unsplitable.
 150  
 
 151  0
         while (curDiff != null) {
 152  0
             EditType editType = curDiff.getEditType();
 153  0
             if (EditType.EQUAL.equals(editType)) {
 154  
                 // equality found
 155  0
                 if (curDiff.getText().length() < editCost && (postInsert + postDelete) > 0) {
 156  
                     // Candidate found.
 157  0
                     equalities.push(curDiff);
 158  0
                     preInsert = postInsert;
 159  0
                     preDelete = postDelete;
 160  0
                     lastEquality = curDiff.getText();
 161  
                 } else {
 162  
                     // Not a candidate, and can never become one.
 163  0
                     equalities.clear();
 164  0
                     lastEquality = null;
 165  0
                     safeDiff = curDiff;
 166  
                 }
 167  0
                 postInsert = 0;
 168  0
                 postDelete = 0;
 169  
             } else {
 170  
                 // an insertion or deletion
 171  0
                 if (EditType.DELETE.equals(editType)) {
 172  0
                     postDelete = 1;
 173  
                 } else {
 174  0
                     postInsert = 1;
 175  
                 }
 176  
 
 177  
                 // Five types to be split:
 178  
                 // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
 179  
                 // <ins>A</ins>X<ins>C</ins><del>D</del>
 180  
                 // <ins>A</ins><del>B</del>X<ins>C</ins>
 181  
                 // <ins>A</del>X<ins>C</ins><del>D</del>
 182  
                 // <ins>A</ins><del>B</del>X<del>C</del>
 183  0
                 if (lastEquality != null
 184  
                         && (((preInsert + preDelete + postInsert + postDelete) > 0)
 185  
                                 || ((lastEquality.length() < editCost / 2)
 186  
                                         && (preInsert + preDelete + postInsert + postDelete) == 3)))
 187  
                 {
 188  
                     // position pointer to the element after the one at the end
 189  
                     // of the stack
 190  0
                     while (curDiff != equalities.lastElement()) {
 191  0
                         curDiff = pointer.previous();
 192  
                     }
 193  0
                     pointer.next();
 194  
 
 195  
                     // Replace equality with a delete.
 196  0
                     pointer.set(new Difference(EditType.DELETE, lastEquality));
 197  
                     // Insert a corresponding an insert.
 198  0
                     curDiff = new Difference(EditType.INSERT, lastEquality);
 199  0
                     pointer.add(curDiff);
 200  
 
 201  0
                     equalities.pop(); // Throw away the equality we just
 202  
                     // deleted;
 203  0
                     lastEquality = null;
 204  0
                     if (preInsert == 1 && preDelete == 1) {
 205  
                         // No changes made which could affect previous entry,
 206  
                         // keep going.
 207  0
                         postInsert = 1;
 208  0
                         postDelete = 1;
 209  0
                         equalities.clear();
 210  0
                         safeDiff = curDiff;
 211  
                     } else {
 212  0
                         if (!equalities.empty()) {
 213  
                             // Throw away the previous equality;
 214  0
                             equalities.pop();
 215  
                         }
 216  0
                         if (equalities.empty()) {
 217  
                             // There are no previous questionable equalities,
 218  
                             // walk back to the last known safe diff.
 219  0
                             curDiff = safeDiff;
 220  
                         } else {
 221  
                             // There is an equality we can fall back to.
 222  0
                             curDiff = equalities.lastElement();
 223  
                         }
 224  0
                         while (curDiff != pointer.previous()) {
 225  
                             // Intentionally empty loop.
 226  
                         }
 227  
 
 228  0
                         postInsert = 0;
 229  0
                         postDelete = 0;
 230  
                     }
 231  0
                     changes = true;
 232  
                 }
 233  
             }
 234  0
             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
 235  0
         }
 236  
 
 237  0
         if (changes) {
 238  0
             cleanupMerge(diffs);
 239  
         }
 240  0
     }
 241  
 
 242  
     /**
 243  
      * Reorder and merge like edit sections. Merge equalities. Any edit section
 244  
      * can move as long as it doesn't cross an equality.
 245  
      * 
 246  
      * @param diffs
 247  
      *            List of Difference objects
 248  
      */
 249  
     public static void cleanupMerge(final List<Difference> diffs) {
 250  
         // Add a dummy entry at the end.
 251  0
         diffs.add(new Difference(EditType.EQUAL, ""));
 252  
 
 253  0
         int countDelete = 0;
 254  0
         int countInsert = 0;
 255  0
         StringBuilder textDelete = new StringBuilder();
 256  0
         StringBuilder textInsert = new StringBuilder();
 257  
 
 258  0
         int commonLength = 0;
 259  
 
 260  0
         ListIterator<Difference> pointer = diffs.listIterator();
 261  0
         Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
 262  0
         Difference prevEqual = null;
 263  0
         while (curDiff != null) {
 264  0
             EditType editType = curDiff.getEditType();
 265  0
             if (EditType.INSERT.equals(editType)) {
 266  0
                 countInsert++;
 267  0
                 textInsert.append(curDiff.getText());
 268  0
                 prevEqual = null;
 269  0
             } else if (EditType.DELETE.equals(editType)) {
 270  0
                 countDelete++;
 271  0
                 textDelete.append(curDiff.getText());
 272  0
                 prevEqual = null;
 273  0
             } else if (EditType.EQUAL.equals(editType)) {
 274  
                 // Upon reaching an equality, check for prior redundancies.
 275  0
                 if (countDelete != 0 || countInsert != 0) {
 276  
                     // Delete the offending records.
 277  0
                     pointer.previous(); // Reverse direction.
 278  0
                     while (countDelete-- > 0) {
 279  0
                         pointer.previous();
 280  0
                         pointer.remove();
 281  
                     }
 282  0
                     while (countInsert-- > 0) {
 283  0
                         pointer.previous();
 284  0
                         pointer.remove();
 285  
                     }
 286  
 
 287  0
                     if (countDelete != 0 && countInsert != 0) {
 288  
                         // Factor out any common prefixes.
 289  0
                         commonLength = Commonality.prefix(textInsert.toString(), textDelete.toString());
 290  0
                         if (commonLength > 0) {
 291  0
                             if (pointer.hasPrevious()) {
 292  0
                                 curDiff = pointer.previous();
 293  0
                                 assert EditType.EQUAL.equals(curDiff.getEditType()) : "Previous diff should have been an equality.";
 294  0
                                 curDiff.appendText(textInsert.substring(0, commonLength));
 295  0
                                 pointer.next();
 296  
                             } else {
 297  0
                                 pointer.add(new Difference(EditType.EQUAL, textInsert.substring(0, commonLength)));
 298  
                             }
 299  0
                             textInsert.replace(0, textInsert.length(), textInsert.substring(commonLength));
 300  0
                             textDelete.replace(0, textDelete.length(), textDelete.substring(commonLength));
 301  
                         }
 302  
 
 303  
                         // Factor out any common suffixes.
 304  0
                         commonLength = Commonality.suffix(textInsert.toString(), textDelete.toString());
 305  0
                         if (commonLength > 0) {
 306  0
                             curDiff = pointer.next();
 307  0
                             curDiff.prependText(textInsert.substring(textInsert.length() - commonLength));
 308  0
                             textInsert.replace(0, textInsert.length(), textInsert.substring(0, textInsert.length() - commonLength));
 309  0
                             textDelete.replace(0, textDelete.length(), textDelete.substring(0, textDelete.length() - commonLength));
 310  0
                             pointer.previous();
 311  
                         }
 312  
                     }
 313  
 
 314  
                     // Insert the merged records.
 315  0
                     if (textDelete.length() != 0) {
 316  0
                         pointer.add(new Difference(EditType.DELETE, textDelete.toString()));
 317  
                     }
 318  
 
 319  0
                     if (textInsert.length() != 0) {
 320  0
                         pointer.add(new Difference(EditType.INSERT, textInsert.toString()));
 321  
                     }
 322  
 
 323  
                     // Step forward to the equality.
 324  0
                     curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
 325  0
                 } else if (prevEqual != null) {
 326  
                     // Merge this equality with the previous one.
 327  0
                     prevEqual.appendText(curDiff.getText());
 328  0
                     pointer.remove();
 329  0
                     curDiff = pointer.previous();
 330  0
                     pointer.next(); // Forward direction
 331  
                 }
 332  
 
 333  0
                 countInsert = 0;
 334  0
                 countDelete = 0;
 335  0
                 textDelete.delete(0, textDelete.length());
 336  0
                 textInsert.delete(0, textInsert.length());
 337  0
                 prevEqual = curDiff;
 338  
             }
 339  0
             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
 340  0
         }
 341  
 
 342  0
         Difference lastDiff = diffs.get(diffs.size() - 1);
 343  0
         if (lastDiff.getText().length() == 0) {
 344  0
             diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
 345  
             // end.
 346  
         }
 347  0
     }
 348  
 
 349  
     /**
 350  
      * Set the edit cost for efficiency
 351  
      * 
 352  
      * @param newEditCost the edit cost
 353  
      */
 354  
     public static void setEditCost(int newEditCost) {
 355  0
         editCost = newEditCost;
 356  0
     }
 357  
 
 358  
     /**
 359  
      * Cost of an empty edit operation in terms of edit characters.
 360  
      */
 361  
     private static final int EDIT_COST = 4;
 362  0
     private static int editCost = EDIT_COST;
 363  
 }