1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2005 - 2016
18   *
19   */
20  package org.crosswire.common.diff;
21  
22  import java.util.List;
23  import java.util.ListIterator;
24  import java.util.Stack;
25  
26  /**
27   * Various Strategies to cleanup a diff list.
28   * 
29   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
30   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
31   * 
32   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
33   * @author DM Smith
34   */
35  public final class DiffCleanup {
36      /**
37       * Utility class constructor.
38       */
39      private DiffCleanup() {
40      }
41  
42      /**
43       * Reduce the number of edits by eliminating semantically trivial
44       * equalities.
45       * 
46       * @param diffs
47       *            List of Difference objects
48       */
49      public static void cleanupSemantic(final List<Difference> diffs) {
50          boolean changes = false;
51          Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
52          // found.
53          String lastEquality = null; // Always equal to
54          // equalities.lastElement().getText()
55          int lengthChangesPre = 0; // Number of characters that changed prior to
56          // the equality.
57          int lengthChangesPost = 0; // Number of characters that changed after
58          // the equality.
59          ListIterator<Difference> pointer = diffs.listIterator();
60          Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
61          while (curDiff != null) {
62              EditType editType = curDiff.getEditType();
63              if (EditType.EQUAL.equals(editType)) {
64                  // equality found
65                  equalities.push(curDiff);
66                  lengthChangesPre = lengthChangesPost;
67                  lengthChangesPost = 0;
68                  lastEquality = curDiff.getText();
69              } else {
70                  // an insertion or deletion
71                  lengthChangesPost += curDiff.getText().length();
72                  int lastLen = lastEquality != null ? lastEquality.length() : 0;
73                  if (lastEquality != null && lastLen <= lengthChangesPre && lastLen <= lengthChangesPost) {
74                      // position pointer to the element after the one at the end
75                      // of the stack
76                      while (curDiff != equalities.lastElement()) {
77                          curDiff = pointer.previous();
78                      }
79                      pointer.next();
80  
81                      // Replace equality with a delete.
82                      pointer.set(new Difference(EditType.DELETE, lastEquality));
83                      // Insert a corresponding an insert.
84                      pointer.add(new Difference(EditType.INSERT, lastEquality));
85                      equalities.pop(); // Throw away the equality we just
86                      // deleted;
87                      if (!equalities.empty()) {
88                          // Throw away the previous equality (it needs to be
89                          // reevaluated).
90                          equalities.pop();
91                      }
92                      if (equalities.empty()) {
93                          // There are no previous equalities, walk back to the
94                          // start.
95                          while (pointer.hasPrevious()) {
96                              pointer.previous();
97                          }
98                      } else {
99                          // There is a safe equality we can fall back to.
100                         curDiff = equalities.lastElement();
101                         while (curDiff != pointer.previous()) {
102                             // Intentionally empty loop.
103                         }
104                     }
105 
106                     lengthChangesPre = 0; // Reset the counters.
107                     lengthChangesPost = 0;
108                     lastEquality = null;
109                     changes = true;
110                 }
111             }
112             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
113         }
114 
115         if (changes) {
116             cleanupMerge(diffs);
117         }
118     }
119 
120     /**
121      * Reduce the number of edits by eliminating operationally trivial
122      * equalities.
123      * 
124      * @param diffs
125      *            List of Difference objects
126      */
127     public static void cleanupEfficiency(final List<Difference> diffs) {
128         if (diffs.isEmpty()) {
129             return;
130         }
131 
132         boolean changes = false;
133         Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
134         // found.
135         String lastEquality = null; // Always equal to
136         // equalities.lastElement().getText();
137         int preInsert = 0; // Is there an insertion operation before the last
138         // equality.
139         int preDelete = 0; // Is there an deletion operation before the last
140         // equality.
141         int postInsert = 0; // Is there an insertion operation after the last
142         // equality.
143         int postDelete = 0; // Is there an deletion operation after the last
144         // equality.
145 
146         ListIterator<Difference> pointer = diffs.listIterator();
147         Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
148         Difference safeDiff = curDiff; // The last Difference that is known to
149         // be unsplitable.
150 
151         while (curDiff != null) {
152             EditType editType = curDiff.getEditType();
153             if (EditType.EQUAL.equals(editType)) {
154                 // equality found
155                 if (curDiff.getText().length() < editCost && (postInsert + postDelete) > 0) {
156                     // Candidate found.
157                     equalities.push(curDiff);
158                     preInsert = postInsert;
159                     preDelete = postDelete;
160                     lastEquality = curDiff.getText();
161                 } else {
162                     // Not a candidate, and can never become one.
163                     equalities.clear();
164                     lastEquality = null;
165                     safeDiff = curDiff;
166                 }
167                 postInsert = 0;
168                 postDelete = 0;
169             } else {
170                 // an insertion or deletion
171                 if (EditType.DELETE.equals(editType)) {
172                     postDelete = 1;
173                 } else {
174                     postInsert = 1;
175                 }
176 
177                 // Five types to be split:
178                 // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
179                 // <ins>A</ins>X<ins>C</ins><del>D</del>
180                 // <ins>A</ins><del>B</del>X<ins>C</ins>
181                 // <ins>A</del>X<ins>C</ins><del>D</del>
182                 // <ins>A</ins><del>B</del>X<del>C</del>
183                 if (lastEquality != null
184                         && (((preInsert + preDelete + postInsert + postDelete) > 0)
185                                 || ((lastEquality.length() < editCost / 2)
186                                         && (preInsert + preDelete + postInsert + postDelete) == 3)))
187                 {
188                     // position pointer to the element after the one at the end
189                     // of the stack
190                     while (curDiff != equalities.lastElement()) {
191                         curDiff = pointer.previous();
192                     }
193                     pointer.next();
194 
195                     // Replace equality with a delete.
196                     pointer.set(new Difference(EditType.DELETE, lastEquality));
197                     // Insert a corresponding an insert.
198                     curDiff = new Difference(EditType.INSERT, lastEquality);
199                     pointer.add(curDiff);
200 
201                     equalities.pop(); // Throw away the equality we just
202                     // deleted;
203                     lastEquality = null;
204                     if (preInsert == 1 && preDelete == 1) {
205                         // No changes made which could affect previous entry,
206                         // keep going.
207                         postInsert = 1;
208                         postDelete = 1;
209                         equalities.clear();
210                         safeDiff = curDiff;
211                     } else {
212                         if (!equalities.empty()) {
213                             // Throw away the previous equality;
214                             equalities.pop();
215                         }
216                         if (equalities.empty()) {
217                             // There are no previous questionable equalities,
218                             // walk back to the last known safe diff.
219                             curDiff = safeDiff;
220                         } else {
221                             // There is an equality we can fall back to.
222                             curDiff = equalities.lastElement();
223                         }
224                         while (curDiff != pointer.previous()) {
225                             // Intentionally empty loop.
226                         }
227 
228                         postInsert = 0;
229                         postDelete = 0;
230                     }
231                     changes = true;
232                 }
233             }
234             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
235         }
236 
237         if (changes) {
238             cleanupMerge(diffs);
239         }
240     }
241 
242     /**
243      * Reorder and merge like edit sections. Merge equalities. Any edit section
244      * can move as long as it doesn't cross an equality.
245      * 
246      * @param diffs
247      *            List of Difference objects
248      */
249     public static void cleanupMerge(final List<Difference> diffs) {
250         // Add a dummy entry at the end.
251         diffs.add(new Difference(EditType.EQUAL, ""));
252 
253         int countDelete = 0;
254         int countInsert = 0;
255         StringBuilder textDelete = new StringBuilder();
256         StringBuilder textInsert = new StringBuilder();
257 
258         int commonLength = 0;
259 
260         ListIterator<Difference> pointer = diffs.listIterator();
261         Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
262         Difference prevEqual = null;
263         while (curDiff != null) {
264             EditType editType = curDiff.getEditType();
265             if (EditType.INSERT.equals(editType)) {
266                 countInsert++;
267                 textInsert.append(curDiff.getText());
268                 prevEqual = null;
269             } else if (EditType.DELETE.equals(editType)) {
270                 countDelete++;
271                 textDelete.append(curDiff.getText());
272                 prevEqual = null;
273             } else if (EditType.EQUAL.equals(editType)) {
274                 // Upon reaching an equality, check for prior redundancies.
275                 if (countDelete != 0 || countInsert != 0) {
276                     // Delete the offending records.
277                     pointer.previous(); // Reverse direction.
278                     while (countDelete-- > 0) {
279                         pointer.previous();
280                         pointer.remove();
281                     }
282                     while (countInsert-- > 0) {
283                         pointer.previous();
284                         pointer.remove();
285                     }
286 
287                     if (countDelete != 0 && countInsert != 0) {
288                         // Factor out any common prefixes.
289                         commonLength = Commonality.prefix(textInsert.toString(), textDelete.toString());
290                         if (commonLength > 0) {
291                             if (pointer.hasPrevious()) {
292                                 curDiff = pointer.previous();
293                                 assert EditType.EQUAL.equals(curDiff.getEditType()) : "Previous diff should have been an equality.";
294                                 curDiff.appendText(textInsert.substring(0, commonLength));
295                                 pointer.next();
296                             } else {
297                                 pointer.add(new Difference(EditType.EQUAL, textInsert.substring(0, commonLength)));
298                             }
299                             textInsert.replace(0, textInsert.length(), textInsert.substring(commonLength));
300                             textDelete.replace(0, textDelete.length(), textDelete.substring(commonLength));
301                         }
302 
303                         // Factor out any common suffixes.
304                         commonLength = Commonality.suffix(textInsert.toString(), textDelete.toString());
305                         if (commonLength > 0) {
306                             curDiff = pointer.next();
307                             curDiff.prependText(textInsert.substring(textInsert.length() - commonLength));
308                             textInsert.replace(0, textInsert.length(), textInsert.substring(0, textInsert.length() - commonLength));
309                             textDelete.replace(0, textDelete.length(), textDelete.substring(0, textDelete.length() - commonLength));
310                             pointer.previous();
311                         }
312                     }
313 
314                     // Insert the merged records.
315                     if (textDelete.length() != 0) {
316                         pointer.add(new Difference(EditType.DELETE, textDelete.toString()));
317                     }
318 
319                     if (textInsert.length() != 0) {
320                         pointer.add(new Difference(EditType.INSERT, textInsert.toString()));
321                     }
322 
323                     // Step forward to the equality.
324                     curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
325                 } else if (prevEqual != null) {
326                     // Merge this equality with the previous one.
327                     prevEqual.appendText(curDiff.getText());
328                     pointer.remove();
329                     curDiff = pointer.previous();
330                     pointer.next(); // Forward direction
331                 }
332 
333                 countInsert = 0;
334                 countDelete = 0;
335                 textDelete.delete(0, textDelete.length());
336                 textInsert.delete(0, textInsert.length());
337                 prevEqual = curDiff;
338             }
339             curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
340         }
341 
342         Difference lastDiff = diffs.get(diffs.size() - 1);
343         if (lastDiff.getText().length() == 0) {
344             diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
345             // end.
346         }
347     }
348 
349     /**
350      * Set the edit cost for efficiency
351      * 
352      * @param newEditCost the edit cost
353      */
354     public static void setEditCost(int newEditCost) {
355         editCost = newEditCost;
356     }
357 
358     /**
359      * Cost of an empty edit operation in terms of edit characters.
360      */
361     private static final int EDIT_COST = 4;
362     private static int editCost = EDIT_COST;
363 }
364