| DiffCleanup.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 or later
5 * as published by the Free Software Foundation. This program is distributed
6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * © CrossWire Bible Society, 2005 - 2016
18 *
19 */
20 package org.crosswire.common.diff;
21
22 import java.util.List;
23 import java.util.ListIterator;
24 import java.util.Stack;
25
26 /**
27 * Various Strategies to cleanup a diff list.
28 *
29 * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
30 * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
31 *
32 * @see gnu.lgpl.License The GNU Lesser General Public License for details.
33 * @author DM Smith
34 */
35 public final class DiffCleanup {
36 /**
37 * Utility class constructor.
38 */
39 private DiffCleanup() {
40 }
41
42 /**
43 * Reduce the number of edits by eliminating semantically trivial
44 * equalities.
45 *
46 * @param diffs
47 * List of Difference objects
48 */
49 public static void cleanupSemantic(final List<Difference> diffs) {
50 boolean changes = false;
51 Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
52 // found.
53 String lastEquality = null; // Always equal to
54 // equalities.lastElement().getText()
55 int lengthChangesPre = 0; // Number of characters that changed prior to
56 // the equality.
57 int lengthChangesPost = 0; // Number of characters that changed after
58 // the equality.
59 ListIterator<Difference> pointer = diffs.listIterator();
60 Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
61 while (curDiff != null) {
62 EditType editType = curDiff.getEditType();
63 if (EditType.EQUAL.equals(editType)) {
64 // equality found
65 equalities.push(curDiff);
66 lengthChangesPre = lengthChangesPost;
67 lengthChangesPost = 0;
68 lastEquality = curDiff.getText();
69 } else {
70 // an insertion or deletion
71 lengthChangesPost += curDiff.getText().length();
72 int lastLen = lastEquality != null ? lastEquality.length() : 0;
73 if (lastEquality != null && lastLen <= lengthChangesPre && lastLen <= lengthChangesPost) {
74 // position pointer to the element after the one at the end
75 // of the stack
76 while (curDiff != equalities.lastElement()) {
77 curDiff = pointer.previous();
78 }
79 pointer.next();
80
81 // Replace equality with a delete.
82 pointer.set(new Difference(EditType.DELETE, lastEquality));
83 // Insert a corresponding an insert.
84 pointer.add(new Difference(EditType.INSERT, lastEquality));
85 equalities.pop(); // Throw away the equality we just
86 // deleted;
87 if (!equalities.empty()) {
88 // Throw away the previous equality (it needs to be
89 // reevaluated).
90 equalities.pop();
91 }
92 if (equalities.empty()) {
93 // There are no previous equalities, walk back to the
94 // start.
95 while (pointer.hasPrevious()) {
96 pointer.previous();
97 }
98 } else {
99 // There is a safe equality we can fall back to.
100 curDiff = equalities.lastElement();
101 while (curDiff != pointer.previous()) {
102 // Intentionally empty loop.
103 }
104 }
105
106 lengthChangesPre = 0; // Reset the counters.
107 lengthChangesPost = 0;
108 lastEquality = null;
109 changes = true;
110 }
111 }
112 curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
113 }
114
115 if (changes) {
116 cleanupMerge(diffs);
117 }
118 }
119
120 /**
121 * Reduce the number of edits by eliminating operationally trivial
122 * equalities.
123 *
124 * @param diffs
125 * List of Difference objects
126 */
127 public static void cleanupEfficiency(final List<Difference> diffs) {
128 if (diffs.isEmpty()) {
129 return;
130 }
131
132 boolean changes = false;
133 Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are
134 // found.
135 String lastEquality = null; // Always equal to
136 // equalities.lastElement().getText();
137 int preInsert = 0; // Is there an insertion operation before the last
138 // equality.
139 int preDelete = 0; // Is there an deletion operation before the last
140 // equality.
141 int postInsert = 0; // Is there an insertion operation after the last
142 // equality.
143 int postDelete = 0; // Is there an deletion operation after the last
144 // equality.
145
146 ListIterator<Difference> pointer = diffs.listIterator();
147 Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
148 Difference safeDiff = curDiff; // The last Difference that is known to
149 // be unsplitable.
150
151 while (curDiff != null) {
152 EditType editType = curDiff.getEditType();
153 if (EditType.EQUAL.equals(editType)) {
154 // equality found
155 if (curDiff.getText().length() < editCost && (postInsert + postDelete) > 0) {
156 // Candidate found.
157 equalities.push(curDiff);
158 preInsert = postInsert;
159 preDelete = postDelete;
160 lastEquality = curDiff.getText();
161 } else {
162 // Not a candidate, and can never become one.
163 equalities.clear();
164 lastEquality = null;
165 safeDiff = curDiff;
166 }
167 postInsert = 0;
168 postDelete = 0;
169 } else {
170 // an insertion or deletion
171 if (EditType.DELETE.equals(editType)) {
172 postDelete = 1;
173 } else {
174 postInsert = 1;
175 }
176
177 // Five types to be split:
178 // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
179 // <ins>A</ins>X<ins>C</ins><del>D</del>
180 // <ins>A</ins><del>B</del>X<ins>C</ins>
181 // <ins>A</del>X<ins>C</ins><del>D</del>
182 // <ins>A</ins><del>B</del>X<del>C</del>
183 if (lastEquality != null
184 && (((preInsert + preDelete + postInsert + postDelete) > 0)
185 || ((lastEquality.length() < editCost / 2)
186 && (preInsert + preDelete + postInsert + postDelete) == 3)))
187 {
188 // position pointer to the element after the one at the end
189 // of the stack
190 while (curDiff != equalities.lastElement()) {
191 curDiff = pointer.previous();
192 }
193 pointer.next();
194
195 // Replace equality with a delete.
196 pointer.set(new Difference(EditType.DELETE, lastEquality));
197 // Insert a corresponding an insert.
198 curDiff = new Difference(EditType.INSERT, lastEquality);
199 pointer.add(curDiff);
200
201 equalities.pop(); // Throw away the equality we just
202 // deleted;
203 lastEquality = null;
204 if (preInsert == 1 && preDelete == 1) {
205 // No changes made which could affect previous entry,
206 // keep going.
207 postInsert = 1;
208 postDelete = 1;
209 equalities.clear();
210 safeDiff = curDiff;
211 } else {
212 if (!equalities.empty()) {
213 // Throw away the previous equality;
214 equalities.pop();
215 }
216 if (equalities.empty()) {
217 // There are no previous questionable equalities,
218 // walk back to the last known safe diff.
219 curDiff = safeDiff;
220 } else {
221 // There is an equality we can fall back to.
222 curDiff = equalities.lastElement();
223 }
224 while (curDiff != pointer.previous()) {
225 // Intentionally empty loop.
226 }
227
228 postInsert = 0;
229 postDelete = 0;
230 }
231 changes = true;
232 }
233 }
234 curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
235 }
236
237 if (changes) {
238 cleanupMerge(diffs);
239 }
240 }
241
242 /**
243 * Reorder and merge like edit sections. Merge equalities. Any edit section
244 * can move as long as it doesn't cross an equality.
245 *
246 * @param diffs
247 * List of Difference objects
248 */
249 public static void cleanupMerge(final List<Difference> diffs) {
250 // Add a dummy entry at the end.
251 diffs.add(new Difference(EditType.EQUAL, ""));
252
253 int countDelete = 0;
254 int countInsert = 0;
255 StringBuilder textDelete = new StringBuilder();
256 StringBuilder textInsert = new StringBuilder();
257
258 int commonLength = 0;
259
260 ListIterator<Difference> pointer = diffs.listIterator();
261 Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
262 Difference prevEqual = null;
263 while (curDiff != null) {
264 EditType editType = curDiff.getEditType();
265 if (EditType.INSERT.equals(editType)) {
266 countInsert++;
267 textInsert.append(curDiff.getText());
268 prevEqual = null;
269 } else if (EditType.DELETE.equals(editType)) {
270 countDelete++;
271 textDelete.append(curDiff.getText());
272 prevEqual = null;
273 } else if (EditType.EQUAL.equals(editType)) {
274 // Upon reaching an equality, check for prior redundancies.
275 if (countDelete != 0 || countInsert != 0) {
276 // Delete the offending records.
277 pointer.previous(); // Reverse direction.
278 while (countDelete-- > 0) {
279 pointer.previous();
280 pointer.remove();
281 }
282 while (countInsert-- > 0) {
283 pointer.previous();
284 pointer.remove();
285 }
286
287 if (countDelete != 0 && countInsert != 0) {
288 // Factor out any common prefixes.
289 commonLength = Commonality.prefix(textInsert.toString(), textDelete.toString());
290 if (commonLength > 0) {
291 if (pointer.hasPrevious()) {
292 curDiff = pointer.previous();
293 assert EditType.EQUAL.equals(curDiff.getEditType()) : "Previous diff should have been an equality.";
294 curDiff.appendText(textInsert.substring(0, commonLength));
295 pointer.next();
296 } else {
297 pointer.add(new Difference(EditType.EQUAL, textInsert.substring(0, commonLength)));
298 }
299 textInsert.replace(0, textInsert.length(), textInsert.substring(commonLength));
300 textDelete.replace(0, textDelete.length(), textDelete.substring(commonLength));
301 }
302
303 // Factor out any common suffixes.
304 commonLength = Commonality.suffix(textInsert.toString(), textDelete.toString());
305 if (commonLength > 0) {
306 curDiff = pointer.next();
307 curDiff.prependText(textInsert.substring(textInsert.length() - commonLength));
308 textInsert.replace(0, textInsert.length(), textInsert.substring(0, textInsert.length() - commonLength));
309 textDelete.replace(0, textDelete.length(), textDelete.substring(0, textDelete.length() - commonLength));
310 pointer.previous();
311 }
312 }
313
314 // Insert the merged records.
315 if (textDelete.length() != 0) {
316 pointer.add(new Difference(EditType.DELETE, textDelete.toString()));
317 }
318
319 if (textInsert.length() != 0) {
320 pointer.add(new Difference(EditType.INSERT, textInsert.toString()));
321 }
322
323 // Step forward to the equality.
324 curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
325 } else if (prevEqual != null) {
326 // Merge this equality with the previous one.
327 prevEqual.appendText(curDiff.getText());
328 pointer.remove();
329 curDiff = pointer.previous();
330 pointer.next(); // Forward direction
331 }
332
333 countInsert = 0;
334 countDelete = 0;
335 textDelete.delete(0, textDelete.length());
336 textInsert.delete(0, textInsert.length());
337 prevEqual = curDiff;
338 }
339 curDiff = pointer.hasNext() ? (Difference) pointer.next() : null;
340 }
341
342 Difference lastDiff = diffs.get(diffs.size() - 1);
343 if (lastDiff.getText().length() == 0) {
344 diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
345 // end.
346 }
347 }
348
349 /**
350 * Set the edit cost for efficiency
351 *
352 * @param newEditCost the edit cost
353 */
354 public static void setEditCost(int newEditCost) {
355 editCost = newEditCost;
356 }
357
358 /**
359 * Cost of an empty edit operation in terms of edit characters.
360 */
361 private static final int EDIT_COST = 4;
362 private static int editCost = EDIT_COST;
363 }
364