| DiffCleanup.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2005
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: DiffCleanup.java 2149 2011-04-09 15:51:45Z dmsmith $
21 */
22 package org.crosswire.common.diff;
23
24 import java.util.List;
25 import java.util.ListIterator;
26 import java.util.Stack;
27
28 /**
29 * Various Strategies to cleanup a diff list.
30 *
31 * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
32 * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
33 *
34 * @see gnu.lgpl.License for license details.<br>
35 * The copyright to this program is held by it's authors.
36 * @author DM Smith [dmsmith555 at yahoo dot com]
37 */
38 public final class DiffCleanup {
39 /**
40 * Utility class constructor.
41 */
42 private DiffCleanup() {
43 }
44
45 /**
46 * Reduce the number of edits by eliminating semantically trivial
47 * equalities.
48 *
49 * @param diffs
50 * List of Difference objects
51 */
52 public static void cleanupSemantic(final List<Difference> diffs) {
53 boolean changes = false;
54 Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
55 // found.
56 String lastEquality = null; // Always equal to
57 // equalities.lastElement().getText()
58 int lengthChangesPre = 0; // Number of characters that changed prior to
59 // the equality.
60 int lengthChangesPost = 0; // Number of characters that changed after
61 // the equality.
62 ListIterator<Difference> pointer = diffs.listIterator();
63 Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
64 while (curDiff != null) {
65 EditType editType = curDiff.getEditType();
66 if (EditType.EQUAL.equals(editType)) {
67 // equality found
68 equalities.push(curDiff);
69 lengthChangesPre = lengthChangesPost;
70 lengthChangesPost = 0;
71 lastEquality = curDiff.getText();
72 } else {
73 // an insertion or deletion
74 lengthChangesPost += curDiff.getText().length();
75 int lastLen = lastEquality != null ? lastEquality.length() : 0;
76 if (lastEquality != null && lastLen <= lengthChangesPre && lastLen <= lengthChangesPost) {
77 // position pointer to the element after the one at the end
78 // of the stack
79 while (curDiff != equalities.lastElement()) {
80 curDiff = pointer.previous();
81 }
82 pointer.next();
83
84 // Replace equality with a delete.
85 pointer.set(new Difference(EditType.DELETE, lastEquality));
86 // Insert a corresponding an insert.
87 pointer.add(new Difference(EditType.INSERT, lastEquality));
88 equalities.pop(); // Throw away the equality we just
89 // deleted;
90 if (!equalities.empty()) {
91 // Throw away the previous equality (it needs to be
92 // reevaluated).
93 equalities.pop();
94 }
95 if (equalities.empty()) {
96 // There are no previous equalities, walk back to the
97 // start.
98 while (pointer.hasPrevious()) {
99 pointer.previous();
100 }
101 } else {
102 // There is a safe equality we can fall back to.
103 curDiff = equalities.lastElement();
104 while (curDiff != pointer.previous()) {
105 // Intentionally empty loop.
106 }
107 }
108
109 lengthChangesPre = 0; // Reset the counters.
110 lengthChangesPost = 0;
111 lastEquality = null;
112 changes = true;
113 }
114 }
115 curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
116 }
117
118 if (changes) {
119 cleanupMerge(diffs);
120 }
121 }
122
123 /**
124 * Reduce the number of edits by eliminating operationally trivial
125 * equalities.
126 *
127 * @param diffs
128 * List of Difference objects
129 */
130 public static void cleanupEfficiency(final List<Difference> diffs) {
131 if (diffs.isEmpty()) {
132 return;
133 }
134
135 boolean changes = false;
136 Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
137 // found.
138 String lastEquality = null; // Always equal to
139 // equalities.lastElement().getText();
140 int preInsert = 0; // Is there an insertion operation before the last
141 // equality.
142 int preDelete = 0; // Is there an deletion operation before the last
143 // equality.
144 int postInsert = 0; // Is there an insertion operation after the last
145 // equality.
146 int postDelete = 0; // Is there an deletion operation after the last
147 // equality.
148
149 ListIterator<Difference> pointer = diffs.listIterator();
150 Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
151 Difference safeDiff = curDiff; // The last Difference that is known to
152 // be unsplitable.
153
154 while (curDiff != null) {
155 EditType editType = curDiff.getEditType();
156 if (EditType.EQUAL.equals(editType)) {
157 // equality found
158 if (curDiff.getText().length() < editCost && (postInsert + postDelete) > 0) {
159 // Candidate found.
160 equalities.push(curDiff);
161 preInsert = postInsert;
162 preDelete = postDelete;
163 lastEquality = curDiff.getText();
164 } else {
165 // Not a candidate, and can never become one.
166 equalities.clear();
167 lastEquality = null;
168 safeDiff = curDiff;
169 }
170 postInsert = 0;
171 postDelete = 0;
172 } else {
173 // an insertion or deletion
174 if (EditType.DELETE.equals(editType)) {
175 postDelete = 1;
176 } else {
177 postInsert = 1;
178 }
179
180 // Five types to be split:
181 // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
182 // <ins>A</ins>X<ins>C</ins><del>D</del>
183 // <ins>A</ins><del>B</del>X<ins>C</ins>
184 // <ins>A</del>X<ins>C</ins><del>D</del>
185 // <ins>A</ins><del>B</del>X<del>C</del>
186 if (lastEquality != null
187 && (((preInsert + preDelete + postInsert + postDelete) > 0)
188 || ((lastEquality.length() < editCost / 2)
189 && (preInsert + preDelete + postInsert + postDelete) == 3)))
190 {
191 // position pointer to the element after the one at the end
192 // of the stack
193 while (curDiff != equalities.lastElement()) {
194 curDiff = pointer.previous();
195 }
196 pointer.next();
197
198 // Replace equality with a delete.
199 pointer.set(new Difference(EditType.DELETE, lastEquality));
200 // Insert a corresponding an insert.
201 curDiff = new Difference(EditType.INSERT, lastEquality);
202 pointer.add(curDiff);
203
204 equalities.pop(); // Throw away the equality we just
205 // deleted;
206 lastEquality = null;
207 if (preInsert == 1 && preDelete == 1) {
208 // No changes made which could affect previous entry,
209 // keep going.
210 postInsert = 1;
211 postDelete = 1;
212 equalities.clear();
213 safeDiff = curDiff;
214 } else {
215 if (!equalities.empty()) {
216 // Throw away the previous equality;
217 equalities.pop();
218 }
219 if (equalities.empty()) {
220 // There are no previous questionable equalities,
221 // walk back to the last known safe diff.
222 curDiff = safeDiff;
223 } else {
224 // There is an equality we can fall back to.
225 curDiff = equalities.lastElement();
226 }
227 while (curDiff != pointer.previous()) {
228 // Intentionally empty loop.
229 }
230
231 postInsert = 0;
232 postDelete = 0;
233 }
234 changes = true;
235 }
236 }
237 curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
238 }
239
240 if (changes) {
241 cleanupMerge(diffs);
242 }
243 }
244
245 /**
246 * Reorder and merge like edit sections. Merge equalities. Any edit section
247 * can move as long as it doesn't cross an equality.
248 *
249 * @param diffs
250 * List of Difference objects
251 */
252 public static void cleanupMerge(final List<Difference> diffs) {
253 // Add a dummy entry at the end.
254 diffs.add(new Difference(EditType.EQUAL, ""));
255
256 int countDelete = 0;
257 int countInsert = 0;
258 StringBuilder textDelete = new StringBuilder();
259 StringBuilder textInsert = new StringBuilder();
260
261 int commonLength = 0;
262
263 ListIterator<Difference> pointer = diffs.listIterator();
264 Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
265 Difference prevEqual = null;
266 while (curDiff != null) {
267 EditType editType = curDiff.getEditType();
268 if (EditType.INSERT.equals(editType)) {
269 countInsert++;
270 textInsert.append(curDiff.getText());
271 prevEqual = null;
272 } else if (EditType.DELETE.equals(editType)) {
273 countDelete++;
274 textDelete.append(curDiff.getText());
275 prevEqual = null;
276 } else if (EditType.EQUAL.equals(editType)) {
277 // Upon reaching an equality, check for prior redundancies.
278 if (countDelete != 0 || countInsert != 0) {
279 // Delete the offending records.
280 pointer.previous(); // Reverse direction.
281 while (countDelete-- > 0) {
282 pointer.previous();
283 pointer.remove();
284 }
285 while (countInsert-- > 0) {
286 pointer.previous();
287 pointer.remove();
288 }
289
290 if (countDelete != 0 && countInsert != 0) {
291 // Factor out any common prefixes.
292 commonLength = Commonality.prefix(textInsert.toString(), textDelete.toString());
293 if (commonLength > 0) {
294 if (pointer.hasPrevious()) {
295 curDiff = pointer.previous();
296 assert EditType.EQUAL.equals(curDiff.getEditType()) : "Previous diff should have been an equality.";
297 curDiff.appendText(textInsert.substring(0, commonLength));
298 pointer.next();
299 } else {
300 pointer.add(new Difference(EditType.EQUAL, textInsert.substring(0, commonLength)));
301 }
302 textInsert.replace(0, textInsert.length(), textInsert.substring(commonLength));
303 textDelete.replace(0, textDelete.length(), textDelete.substring(commonLength));
304 }
305
306 // Factor out any common suffixes.
307 commonLength = Commonality.suffix(textInsert.toString(), textDelete.toString());
308 if (commonLength > 0) {
309 curDiff = pointer.next();
310 curDiff.prependText(textInsert.substring(textInsert.length() - commonLength));
311 textInsert.replace(0, textInsert.length(), textInsert.substring(0, textInsert.length() - commonLength));
312 textDelete.replace(0, textDelete.length(), textDelete.substring(0, textDelete.length() - commonLength));
313 pointer.previous();
314 }
315 }
316
317 // Insert the merged records.
318 if (textDelete.length() != 0) {
319 pointer.add(new Difference(EditType.DELETE, textDelete.toString()));
320 }
321
322 if (textInsert.length() != 0) {
323 pointer.add(new Difference(EditType.INSERT, textInsert.toString()));
324 }
325
326 // Step forward to the equality.
327 curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
328 } else if (prevEqual != null) {
329 // Merge this equality with the previous one.
330 prevEqual.appendText(curDiff.getText());
331 pointer.remove();
332 curDiff = pointer.previous();
333 pointer.next(); // Forward direction
334 }
335
336 countInsert = 0;
337 countDelete = 0;
338 textDelete.delete(0, textDelete.length());
339 textInsert.delete(0, textInsert.length());
340 prevEqual = curDiff;
341 }
342 curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
343 }
344
345 Difference lastDiff = diffs.get(diffs.size() - 1);
346 if (lastDiff.getText().length() == 0) {
347 diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
348 // end.
349 }
350 }
351
352 /**
353 * Set the edit cost for efficiency
354 *
355 * @param newEditCost
356 */
357 public static void setEditCost(int newEditCost) {
358 editCost = newEditCost;
359 }
360
361 /**
362 * Cost of an empty edit operation in terms of edit characters.
363 */
364 private static final int EDIT_COST = 4;
365 private static int editCost = EDIT_COST;
366 }
367