1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: DiffCleanup.java 2149 2011-04-09 15:51:45Z dmsmith $
21   */
22  package org.crosswire.common.diff;
23  
24  import java.util.List;
25  import java.util.ListIterator;
26  import java.util.Stack;
27  
28  /**
29   * Various Strategies to cleanup a diff list.
30   * 
31   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
32   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
33   * 
34   * @see gnu.lgpl.License for license details.<br>
35   *      The copyright to this program is held by it's authors.
36   * @author DM Smith [dmsmith555 at yahoo dot com]
37   */
38  public final class DiffCleanup {
39      /**
40       * Utility class constructor.
41       */
42      private DiffCleanup() {
43      }
44  
45      /**
46       * Reduce the number of edits by eliminating semantically trivial
47       * equalities.
48       * 
49       * @param diffs
50       *            List of Difference objects
51       */
52      public static void cleanupSemantic(final List<Difference> diffs) {
53          boolean changes = false;
54          Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
55          // found.
56          String lastEquality = null; // Always equal to
57          // equalities.lastElement().getText()
58          int lengthChangesPre = 0; // Number of characters that changed prior to
59          // the equality.
60          int lengthChangesPost = 0; // Number of characters that changed after
61          // the equality.
62          ListIterator<Difference> pointer = diffs.listIterator();
63          Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
64          while (curDiff != null) {
65              EditType editType = curDiff.getEditType();
66              if (EditType.EQUAL.equals(editType)) {
67                  // equality found
68                  equalities.push(curDiff);
69                  lengthChangesPre = lengthChangesPost;
70                  lengthChangesPost = 0;
71                  lastEquality = curDiff.getText();
72              } else {
73                  // an insertion or deletion
74                  lengthChangesPost += curDiff.getText().length();
75                  int lastLen = lastEquality != null ? lastEquality.length() : 0;
76                  if (lastEquality != null && lastLen <= lengthChangesPre && lastLen <= lengthChangesPost) {
77                      // position pointer to the element after the one at the end
78                      // of the stack
79                      while (curDiff != equalities.lastElement()) {
80                          curDiff = pointer.previous();
81                      }
82                      pointer.next();
83  
84                      // Replace equality with a delete.
85                      pointer.set(new Difference(EditType.DELETE, lastEquality));
86                      // Insert a corresponding an insert.
87                      pointer.add(new Difference(EditType.INSERT, lastEquality));
88                      equalities.pop(); // Throw away the equality we just
89                      // deleted;
90                      if (!equalities.empty()) {
91                          // Throw away the previous equality (it needs to be
92                          // reevaluated).
93                          equalities.pop();
94                      }
95                      if (equalities.empty()) {
96                          // There are no previous equalities, walk back to the
97                          // start.
98                          while (pointer.hasPrevious()) {
99                              pointer.previous();
100                         }
101                     } else {
102                         // There is a safe equality we can fall back to.
103                         curDiff = equalities.lastElement();
104                         while (curDiff != pointer.previous()) {
105                             // Intentionally empty loop.
106                         }
107                     }
108 
109                     lengthChangesPre = 0; // Reset the counters.
110                     lengthChangesPost = 0;
111                     lastEquality = null;
112                     changes = true;
113                 }
114             }
115             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
116         }
117 
118         if (changes) {
119             cleanupMerge(diffs);
120         }
121     }
122 
123     /**
124      * Reduce the number of edits by eliminating operationally trivial
125      * equalities.
126      * 
127      * @param diffs
128      *            List of Difference objects
129      */
130     public static void cleanupEfficiency(final List<Difference> diffs) {
131         if (diffs.isEmpty()) {
132             return;
133         }
134 
135         boolean changes = false;
136         Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
137         // found.
138         String lastEquality = null; // Always equal to
139         // equalities.lastElement().getText();
140         int preInsert = 0; // Is there an insertion operation before the last
141         // equality.
142         int preDelete = 0; // Is there an deletion operation before the last
143         // equality.
144         int postInsert = 0; // Is there an insertion operation after the last
145         // equality.
146         int postDelete = 0; // Is there an deletion operation after the last
147         // equality.
148 
149         ListIterator<Difference> pointer = diffs.listIterator();
150         Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
151         Difference safeDiff = curDiff; // The last Difference that is known to
152         // be unsplitable.
153 
154         while (curDiff != null) {
155             EditType editType = curDiff.getEditType();
156             if (EditType.EQUAL.equals(editType)) {
157                 // equality found
158                 if (curDiff.getText().length() < editCost && (postInsert + postDelete) > 0) {
159                     // Candidate found.
160                     equalities.push(curDiff);
161                     preInsert = postInsert;
162                     preDelete = postDelete;
163                     lastEquality = curDiff.getText();
164                 } else {
165                     // Not a candidate, and can never become one.
166                     equalities.clear();
167                     lastEquality = null;
168                     safeDiff = curDiff;
169                 }
170                 postInsert = 0;
171                 postDelete = 0;
172             } else {
173                 // an insertion or deletion
174                 if (EditType.DELETE.equals(editType)) {
175                     postDelete = 1;
176                 } else {
177                     postInsert = 1;
178                 }
179 
180                 // Five types to be split:
181                 // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
182                 // <ins>A</ins>X<ins>C</ins><del>D</del>
183                 // <ins>A</ins><del>B</del>X<ins>C</ins>
184                 // <ins>A</del>X<ins>C</ins><del>D</del>
185                 // <ins>A</ins><del>B</del>X<del>C</del>
186                 if (lastEquality != null
187                         && (((preInsert + preDelete + postInsert + postDelete) > 0)
188                                 || ((lastEquality.length() < editCost / 2)
189                                         && (preInsert + preDelete + postInsert + postDelete) == 3)))
190                 {
191                     // position pointer to the element after the one at the end
192                     // of the stack
193                     while (curDiff != equalities.lastElement()) {
194                         curDiff = pointer.previous();
195                     }
196                     pointer.next();
197 
198                     // Replace equality with a delete.
199                     pointer.set(new Difference(EditType.DELETE, lastEquality));
200                     // Insert a corresponding an insert.
201                     curDiff = new Difference(EditType.INSERT, lastEquality);
202                     pointer.add(curDiff);
203 
204                     equalities.pop(); // Throw away the equality we just
205                     // deleted;
206                     lastEquality = null;
207                     if (preInsert == 1 && preDelete == 1) {
208                         // No changes made which could affect previous entry,
209                         // keep going.
210                         postInsert = 1;
211                         postDelete = 1;
212                         equalities.clear();
213                         safeDiff = curDiff;
214                     } else {
215                         if (!equalities.empty()) {
216                             // Throw away the previous equality;
217                             equalities.pop();
218                         }
219                         if (equalities.empty()) {
220                             // There are no previous questionable equalities,
221                             // walk back to the last known safe diff.
222                             curDiff = safeDiff;
223                         } else {
224                             // There is an equality we can fall back to.
225                             curDiff = equalities.lastElement();
226                         }
227                         while (curDiff != pointer.previous()) {
228                             // Intentionally empty loop.
229                         }
230 
231                         postInsert = 0;
232                         postDelete = 0;
233                     }
234                     changes = true;
235                 }
236             }
237             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
238         }
239 
240         if (changes) {
241             cleanupMerge(diffs);
242         }
243     }
244 
245     /**
246      * Reorder and merge like edit sections. Merge equalities. Any edit section
247      * can move as long as it doesn't cross an equality.
248      * 
249      * @param diffs
250      *            List of Difference objects
251      */
252     public static void cleanupMerge(final List<Difference> diffs) {
253         // Add a dummy entry at the end.
254         diffs.add(new Difference(EditType.EQUAL, ""));
255 
256         int countDelete = 0;
257         int countInsert = 0;
258         StringBuilder textDelete = new StringBuilder();
259         StringBuilder textInsert = new StringBuilder();
260 
261         int commonLength = 0;
262 
263         ListIterator<Difference> pointer = diffs.listIterator();
264         Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
265         Difference prevEqual = null;
266         while (curDiff != null) {
267             EditType editType = curDiff.getEditType();
268             if (EditType.INSERT.equals(editType)) {
269                 countInsert++;
270                 textInsert.append(curDiff.getText());
271                 prevEqual = null;
272             } else if (EditType.DELETE.equals(editType)) {
273                 countDelete++;
274                 textDelete.append(curDiff.getText());
275                 prevEqual = null;
276             } else if (EditType.EQUAL.equals(editType)) {
277                 // Upon reaching an equality, check for prior redundancies.
278                 if (countDelete != 0 || countInsert != 0) {
279                     // Delete the offending records.
280                     pointer.previous(); // Reverse direction.
281                     while (countDelete-- > 0) {
282                         pointer.previous();
283                         pointer.remove();
284                     }
285                     while (countInsert-- > 0) {
286                         pointer.previous();
287                         pointer.remove();
288                     }
289 
290                     if (countDelete != 0 && countInsert != 0) {
291                         // Factor out any common prefixes.
292                         commonLength = Commonality.prefix(textInsert.toString(), textDelete.toString());
293                         if (commonLength > 0) {
294                             if (pointer.hasPrevious()) {
295                                 curDiff = pointer.previous();
296                                 assert EditType.EQUAL.equals(curDiff.getEditType()) : "Previous diff should have been an equality.";
297                                 curDiff.appendText(textInsert.substring(0, commonLength));
298                                 pointer.next();
299                             } else {
300                                 pointer.add(new Difference(EditType.EQUAL, textInsert.substring(0, commonLength)));
301                             }
302                             textInsert.replace(0, textInsert.length(), textInsert.substring(commonLength));
303                             textDelete.replace(0, textDelete.length(), textDelete.substring(commonLength));
304                         }
305 
306                         // Factor out any common suffixes.
307                         commonLength = Commonality.suffix(textInsert.toString(), textDelete.toString());
308                         if (commonLength > 0) {
309                             curDiff = pointer.next();
310                             curDiff.prependText(textInsert.substring(textInsert.length() - commonLength));
311                             textInsert.replace(0, textInsert.length(), textInsert.substring(0, textInsert.length() - commonLength));
312                             textDelete.replace(0, textDelete.length(), textDelete.substring(0, textDelete.length() - commonLength));
313                             pointer.previous();
314                         }
315                     }
316 
317                     // Insert the merged records.
318                     if (textDelete.length() != 0) {
319                         pointer.add(new Difference(EditType.DELETE, textDelete.toString()));
320                     }
321 
322                     if (textInsert.length() != 0) {
323                         pointer.add(new Difference(EditType.INSERT, textInsert.toString()));
324                     }
325 
326                     // Step forward to the equality.
327                     curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
328                 } else if (prevEqual != null) {
329                     // Merge this equality with the previous one.
330                     prevEqual.appendText(curDiff.getText());
331                     pointer.remove();
332                     curDiff = pointer.previous();
333                     pointer.next(); // Forward direction
334                 }
335 
336                 countInsert = 0;
337                 countDelete = 0;
338                 textDelete.delete(0, textDelete.length());
339                 textInsert.delete(0, textInsert.length());
340                 prevEqual = curDiff;
341             }
342             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
343         }
344 
345         Difference lastDiff = diffs.get(diffs.size() - 1);
346         if (lastDiff.getText().length() == 0) {
347             diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
348             // end.
349         }
350     }
351 
352     /**
353      * Set the edit cost for efficiency
354      * 
355      * @param newEditCost
356      */
357     public static void setEditCost(int newEditCost) {
358         editCost = newEditCost;
359     }
360 
361     /**
362      * Cost of an empty edit operation in terms of edit characters.
363      */
364     private static final int EDIT_COST = 4;
365     private static int editCost = EDIT_COST;
366 }
367