| Diff.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2007
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: Diff.java 2149 2011-04-09 15:51:45Z dmsmith $
21 */
22 package org.crosswire.common.diff;
23
24 import java.util.ArrayList;
25 import java.util.List;
26 import java.util.ListIterator;
27
28 /**
29 * Computes the difference between two texts to create a list of differences.
30 *
31 * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
32 * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
33 *
34 * @see gnu.lgpl.License for license details.<br>
35 * The copyright to this program is held by it's authors.
36 * @author DM Smith [dmsmith555 at yahoo dot com]
37 */
38 public class Diff {
39 /**
40 * Construct an object that can find the differences between two texts. Run
41 * a faster slightly less optimal diff. This constructor allows the
42 * 'checkLines' to be optional. Most of the time checkLines is wanted, so
43 * default to true.
44 *
45 * @param source
46 * Old string to be diffed
47 * @param target
48 * New string to be diffed
49 */
50 public Diff(final String source, final String target) {
51 this(source, target, true);
52 }
53
54 /**
55 * Construct an object that can find the differences between two texts.
56 *
57 * @param source
58 * Old string to be diffed
59 * @param target
60 * New string to be diffed
61 * @param checkLines
62 * Speedup flag. If false, then don't run a line-level diff first
63 * to identify the changed areas. If true, then run a faster
64 * slightly less optimal diff
65 */
66 public Diff(final String source, final String target, final boolean checkLines) {
67 this.source = source;
68 this.target = target;
69 this.checkLines = checkLines;
70 }
71
72 /**
73 * Find the differences between two texts. Simplifies the problem by
74 * stripping any common prefix or suffix off the texts before diffing.
75 *
76 * @return List of Difference objects
77 */
78 public List<Difference> compare() {
79 // Check for equality (speedup)
80 List<Difference> diffs;
81 if (source.equals(target)) {
82 diffs = new ArrayList<Difference>();
83 diffs.add(new Difference(EditType.EQUAL, source));
84 return diffs;
85 }
86
87 // Trim off common prefix (speedup)
88 int commonLength = Commonality.prefix(source, target);
89 String commonPrefix = source.substring(0, commonLength);
90 source = source.substring(commonLength);
91 target = target.substring(commonLength);
92
93 // Trim off common suffix (speedup)
94 commonLength = Commonality.suffix(source, target);
95 String commonSuffix = source.substring(source.length() - commonLength);
96 source = source.substring(0, source.length() - commonLength);
97 target = target.substring(0, target.length() - commonLength);
98
99 // Compute the diff on the middle block
100 diffs = compute();
101
102 // Restore the prefix and suffix
103 if (!"".equals(commonPrefix)) {
104 diffs.add(0, new Difference(EditType.EQUAL, commonPrefix));
105 }
106
107 if (!"".equals(commonSuffix)) {
108 diffs.add(new Difference(EditType.EQUAL, commonSuffix));
109 }
110
111 DiffCleanup.cleanupMerge(diffs);
112
113 return diffs;
114 }
115
116 /**
117 * Find the differences between two texts.
118 *
119 * @return List of Difference objects
120 */
121 private List<Difference> compute() {
122 List<Difference> diffs = new ArrayList<Difference>();
123
124 if ("".equals(source)) {
125 // Just add some text (speedup)
126 diffs.add(new Difference(EditType.INSERT, target));
127 return diffs;
128 }
129
130 if ("".equals(target)) {
131 // Just delete some text (speedup)
132 diffs.add(new Difference(EditType.DELETE, source));
133 return diffs;
134 }
135
136 String longText = source.length() > target.length() ? source : target;
137 String shortText = source.length() > target.length() ? target : source;
138 int i = longText.indexOf(shortText);
139 if (i != -1) {
140 // Shorter text is inside the longer text (speedup)
141 EditType editType = (source.length() > target.length()) ? EditType.DELETE : EditType.INSERT;
142 diffs.add(new Difference(editType, longText.substring(0, i)));
143 diffs.add(new Difference(EditType.EQUAL, shortText));
144 diffs.add(new Difference(editType, longText.substring(i + shortText.length())));
145 return diffs;
146 }
147
148 // Check to see if the problem can be split in two.
149 CommonMiddle middleMatch = Commonality.halfMatch(source, target);
150 if (middleMatch != null) {
151 // A half-match was found, sort out the return data.
152 // Send both pairs off for separate processing.
153 Diff startDiff = new Diff(middleMatch.getSourcePrefix(), middleMatch.getTargetPrefix(), checkLines);
154 Diff endDiff = new Diff(middleMatch.getSourceSuffix(), middleMatch.getTargetSuffix(), checkLines);
155 // Merge the results.
156 diffs = startDiff.compare();
157 diffs.add(new Difference(EditType.EQUAL, middleMatch.getCommonality()));
158 diffs.addAll(endDiff.compare());
159 return diffs;
160 }
161
162 // Perform a real diff.
163 if (checkLines && source.length() + target.length() < 250) {
164 checkLines = false; // Too trivial for the overhead.
165 }
166
167 LineMap lineMap = null;
168 if (checkLines) {
169 // Scan the text on a line-by-line basis first.
170 lineMap = new LineMap(source, target);
171 source = lineMap.getSourceMap();
172 target = lineMap.getTargetMap();
173 }
174
175 diffs = new DifferenceEngine(source, target).generate();
176
177 if (diffs == null) {
178 // No acceptable result.
179 diffs = new ArrayList<Difference>();
180 diffs.add(new Difference(EditType.DELETE, source));
181 diffs.add(new Difference(EditType.INSERT, target));
182 }
183
184 if (checkLines && lineMap != null) {
185 // Convert the diff back to original text.
186 lineMap.restore(diffs);
187 // Eliminate freak matches (e.g. blank lines)
188 DiffCleanup.cleanupSemantic(diffs);
189
190 // Rediff any replacement blocks, this time character-by-character.
191 // Add a dummy entry at the end.
192 diffs.add(new Difference(EditType.EQUAL, ""));
193 int countDeletes = 0;
194 int countInserts = 0;
195 StringBuilder textDelete = new StringBuilder();
196 StringBuilder textInsert = new StringBuilder();
197 ListIterator<Difference> pointer = diffs.listIterator();
198 Difference curDiff = pointer.next();
199 while (curDiff != null) {
200 EditType editType = curDiff.getEditType();
201 if (EditType.INSERT.equals(editType)) {
202 countInserts++;
203 textInsert.append(curDiff.getText());
204 } else if (EditType.DELETE.equals(editType)) {
205 countDeletes++;
206 textDelete.append(curDiff.getText());
207 } else {
208 // Upon reaching an equality, check for prior redundancies.
209 if (countDeletes >= 1 && countInserts >= 1) {
210 // Delete the offending records and add the merged ones.
211 pointer.previous();
212 for (int j = 0; j < countDeletes + countInserts; j++) {
213 pointer.previous();
214 pointer.remove();
215 }
216 Diff newDiff = new Diff(textDelete.toString(), textInsert.toString(), false);
217 for (Difference diff : newDiff.compare()) {
218 pointer.add(diff);
219 }
220 }
221 countInserts = 0;
222 countDeletes = 0;
223 textDelete.delete(0, textDelete.length());
224 textInsert.delete(0, textInsert.length());
225 }
226 curDiff = pointer.hasNext() ? pointer.next() : null;
227 }
228 diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
229 // end.
230 }
231 return diffs;
232 }
233
234 /**
235 * loc is a location in source, compute and return the equivalent location
236 * in target. e.g. "The cat" vs "The big cat", 1->1, 5->8
237 *
238 * @param diffs
239 * List of Difference objects
240 * @param loc
241 * Location within source
242 * @return Location within target
243 */
244 public int xIndex(final List<Difference> diffs, final int loc) {
245 int chars1 = 0;
246 int chars2 = 0;
247 int lastChars1 = 0;
248 int lastChars2 = 0;
249 Difference lastDiff = null;
250 for (Difference diff : diffs) {
251 EditType editType = diff.getEditType();
252
253 // Equality or deletion?
254 if (!EditType.INSERT.equals(editType)) {
255 chars1 += diff.getText().length();
256 }
257
258 // Equality or insertion?
259 if (!EditType.DELETE.equals(editType)) {
260 chars2 += diff.getText().length();
261 }
262
263 // Overshot the location?
264 if (chars1 > loc) {
265 lastDiff = diff;
266 break;
267 }
268 lastChars1 = chars1;
269 lastChars2 = chars2;
270 }
271
272 // Was the location was deleted?
273 if (lastDiff != null && EditType.DELETE.equals(lastDiff.getEditType())) {
274 return lastChars2;
275 }
276
277 // Add the remaining character length.
278 return lastChars2 + (loc - lastChars1);
279 }
280
281 /**
282 * Convert a Difference list into a pretty HTML report.
283 *
284 * @param diffs
285 * List of Difference objects
286 * @return HTML representation
287 */
288 public String prettyHtml(List<Difference> diffs) {
289 StringBuilder buf = new StringBuilder();
290 for (int x = 0; x < diffs.size(); x++) {
291 Difference diff = diffs.get(x);
292 EditType editType = diff.getEditType(); // Mode (delete, equal,
293 // insert)
294 String text = diff.getText(); // Text of change.
295 // TODO(DMS): Do replacements
296 // text = text.replace(/&/g, "&");
297 // text = text.replace(/</g, "<");
298 // text = text.replace(/>/g, ">");
299 // text = text.replace(/\n/g, "¶<BR>");
300 if (EditType.DELETE.equals(editType)) {
301 buf.append("<del style=\"background:#FFE6E6;\">");
302 buf.append(text);
303 buf.append("</del>");
304 } else if (EditType.INSERT.equals(editType)) {
305 buf.append("<ins style=\"background:#E6FFE6;\">");
306 buf.append(text);
307 buf.append("</ins>");
308 } else {
309 buf.append("<span>");
310 buf.append(text);
311 buf.append("</span>");
312 }
313 }
314 return buf.toString();
315 }
316
317 /**
318 * The baseline text.
319 */
320 private String source;
321
322 /**
323 * The changed text.
324 */
325 private String target;
326
327 /**
328 * Whether a slightly faster less optimal diff should be run.
329 */
330 private boolean checkLines;
331 }
332