| Diff.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 or later
5 * as published by the Free Software Foundation. This program is distributed
6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * © CrossWire Bible Society, 2007 - 2016
18 *
19 */
20 package org.crosswire.common.diff;
21
22 import java.util.ArrayList;
23 import java.util.List;
24 import java.util.ListIterator;
25
26 /**
27 * Computes the difference between two texts to create a list of differences.
28 *
29 * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
30 * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
31 *
32 * @see gnu.lgpl.License The GNU Lesser General Public License for details.
33 * @author DM Smith
34 */
35 public class Diff {
36 /**
37 * Construct an object that can find the differences between two texts. Run
38 * a faster slightly less optimal diff. This constructor allows the
39 * 'checkLines' to be optional. Most of the time checkLines is wanted, so
40 * default to true.
41 *
42 * @param source
43 * Old string to be diffed
44 * @param target
45 * New string to be diffed
46 */
47 public Diff(final String source, final String target) {
48 this(source, target, true);
49 }
50
51 /**
52 * Construct an object that can find the differences between two texts.
53 *
54 * @param source
55 * Old string to be diffed
56 * @param target
57 * New string to be diffed
58 * @param checkLines
59 * Speedup flag. If false, then don't run a line-level diff first
60 * to identify the changed areas. If true, then run a faster
61 * slightly less optimal diff
62 */
63 public Diff(final String source, final String target, final boolean checkLines) {
64 this.source = source;
65 this.target = target;
66 this.checkLines = checkLines;
67 }
68
69 /**
70 * Find the differences between two texts. Simplifies the problem by
71 * stripping any common prefix or suffix off the texts before diffing.
72 *
73 * @return List of Difference objects
74 */
75 public List<Difference> compare() {
76 // Check for equality (speedup)
77 List<Difference> diffs;
78 if (source.equals(target)) {
79 diffs = new ArrayList<Difference>();
80 diffs.add(new Difference(EditType.EQUAL, source));
81 return diffs;
82 }
83
84 // Trim off common prefix (speedup)
85 int commonLength = Commonality.prefix(source, target);
86 String commonPrefix = source.substring(0, commonLength);
87 source = source.substring(commonLength);
88 target = target.substring(commonLength);
89
90 // Trim off common suffix (speedup)
91 commonLength = Commonality.suffix(source, target);
92 String commonSuffix = source.substring(source.length() - commonLength);
93 source = source.substring(0, source.length() - commonLength);
94 target = target.substring(0, target.length() - commonLength);
95
96 // Compute the diff on the middle block
97 diffs = compute();
98
99 // Restore the prefix and suffix
100 if (!"".equals(commonPrefix)) {
101 diffs.add(0, new Difference(EditType.EQUAL, commonPrefix));
102 }
103
104 if (!"".equals(commonSuffix)) {
105 diffs.add(new Difference(EditType.EQUAL, commonSuffix));
106 }
107
108 DiffCleanup.cleanupMerge(diffs);
109
110 return diffs;
111 }
112
113 /**
114 * Find the differences between two texts.
115 *
116 * @return List of Difference objects
117 */
118 private List<Difference> compute() {
119 List<Difference> diffs = new ArrayList<Difference>();
120
121 if ("".equals(source)) {
122 // Just add some text (speedup)
123 diffs.add(new Difference(EditType.INSERT, target));
124 return diffs;
125 }
126
127 if ("".equals(target)) {
128 // Just delete some text (speedup)
129 diffs.add(new Difference(EditType.DELETE, source));
130 return diffs;
131 }
132
133 String longText = source.length() > target.length() ? source : target;
134 String shortText = source.length() > target.length() ? target : source;
135 int i = longText.indexOf(shortText);
136 if (i != -1) {
137 // Shorter text is inside the longer text (speedup)
138 EditType editType = (source.length() > target.length()) ? EditType.DELETE : EditType.INSERT;
139 diffs.add(new Difference(editType, longText.substring(0, i)));
140 diffs.add(new Difference(EditType.EQUAL, shortText));
141 diffs.add(new Difference(editType, longText.substring(i + shortText.length())));
142 return diffs;
143 }
144
145 // Check to see if the problem can be split in two.
146 CommonMiddle middleMatch = Commonality.halfMatch(source, target);
147 if (middleMatch != null) {
148 // A half-match was found, sort out the return data.
149 // Send both pairs off for separate processing.
150 Diff startDiff = new Diff(middleMatch.getSourcePrefix(), middleMatch.getTargetPrefix(), checkLines);
151 Diff endDiff = new Diff(middleMatch.getSourceSuffix(), middleMatch.getTargetSuffix(), checkLines);
152 // Merge the results.
153 diffs = startDiff.compare();
154 diffs.add(new Difference(EditType.EQUAL, middleMatch.getCommonality()));
155 diffs.addAll(endDiff.compare());
156 return diffs;
157 }
158
159 // Perform a real diff.
160 if (checkLines && source.length() + target.length() < 250) {
161 checkLines = false; // Too trivial for the overhead.
162 }
163
164 LineMap lineMap = null;
165 if (checkLines) {
166 // Scan the text on a line-by-line basis first.
167 lineMap = new LineMap(source, target);
168 source = lineMap.getSourceMap();
169 target = lineMap.getTargetMap();
170 }
171
172 diffs = new DifferenceEngine(source, target).generate();
173
174 if (diffs == null) {
175 // No acceptable result.
176 diffs = new ArrayList<Difference>();
177 diffs.add(new Difference(EditType.DELETE, source));
178 diffs.add(new Difference(EditType.INSERT, target));
179 }
180
181 if (checkLines && lineMap != null) {
182 // Convert the diff back to original text.
183 lineMap.restore(diffs);
184 // Eliminate freak matches (e.g. blank lines)
185 DiffCleanup.cleanupSemantic(diffs);
186
187 // Rediff any replacement blocks, this time character-by-character.
188 // Add a dummy entry at the end.
189 diffs.add(new Difference(EditType.EQUAL, ""));
190 int countDeletes = 0;
191 int countInserts = 0;
192 StringBuilder textDelete = new StringBuilder();
193 StringBuilder textInsert = new StringBuilder();
194 ListIterator<Difference> pointer = diffs.listIterator();
195 Difference curDiff = pointer.next();
196 while (curDiff != null) {
197 EditType editType = curDiff.getEditType();
198 if (EditType.INSERT.equals(editType)) {
199 countInserts++;
200 textInsert.append(curDiff.getText());
201 } else if (EditType.DELETE.equals(editType)) {
202 countDeletes++;
203 textDelete.append(curDiff.getText());
204 } else {
205 // Upon reaching an equality, check for prior redundancies.
206 if (countDeletes >= 1 && countInserts >= 1) {
207 // Delete the offending records and add the merged ones.
208 pointer.previous();
209 for (int j = 0; j < countDeletes + countInserts; j++) {
210 pointer.previous();
211 pointer.remove();
212 }
213 Diff newDiff = new Diff(textDelete.toString(), textInsert.toString(), false);
214 for (Difference diff : newDiff.compare()) {
215 pointer.add(diff);
216 }
217 }
218 countInserts = 0;
219 countDeletes = 0;
220 textDelete.delete(0, textDelete.length());
221 textInsert.delete(0, textInsert.length());
222 }
223 curDiff = pointer.hasNext() ? pointer.next() : null;
224 }
225 diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
226 // end.
227 }
228 return diffs;
229 }
230
231 /**
232 * loc is a location in source, compute and return the equivalent location
233 * in target. e.g. "The cat" vs "The big cat", 1->1, 5->8
234 *
235 * @param diffs
236 * List of Difference objects
237 * @param loc
238 * Location within source
239 * @return Location within target
240 */
241 public int xIndex(final List<Difference> diffs, final int loc) {
242 int chars1 = 0;
243 int chars2 = 0;
244 int lastChars1 = 0;
245 int lastChars2 = 0;
246 Difference lastDiff = null;
247 for (Difference diff : diffs) {
248 EditType editType = diff.getEditType();
249
250 // Equality or deletion?
251 if (!EditType.INSERT.equals(editType)) {
252 chars1 += diff.getText().length();
253 }
254
255 // Equality or insertion?
256 if (!EditType.DELETE.equals(editType)) {
257 chars2 += diff.getText().length();
258 }
259
260 // Overshot the location?
261 if (chars1 > loc) {
262 lastDiff = diff;
263 break;
264 }
265 lastChars1 = chars1;
266 lastChars2 = chars2;
267 }
268
269 // Was the location was deleted?
270 if (lastDiff != null && EditType.DELETE.equals(lastDiff.getEditType())) {
271 return lastChars2;
272 }
273
274 // Add the remaining character length.
275 return lastChars2 + (loc - lastChars1);
276 }
277
278 /**
279 * Convert a Difference list into a pretty HTML report.
280 *
281 * @param diffs
282 * List of Difference objects
283 * @return HTML representation
284 */
285 public String prettyHtml(List<Difference> diffs) {
286 StringBuilder buf = new StringBuilder();
287 for (int x = 0; x < diffs.size(); x++) {
288 Difference diff = diffs.get(x);
289 EditType editType = diff.getEditType(); // Mode (delete, equal,
290 // insert)
291 String text = diff.getText(); // Text of change.
292 // TODO(DMS): Do replacements
293 // text = text.replace(/&/g, "&");
294 // text = text.replace(/</g, "<");
295 // text = text.replace(/>/g, ">");
296 // text = text.replace(/\n/g, "¶<BR>");
297 if (EditType.DELETE.equals(editType)) {
298 buf.append("<del style=\"background:#FFE6E6;\">");
299 buf.append(text);
300 buf.append("</del>");
301 } else if (EditType.INSERT.equals(editType)) {
302 buf.append("<ins style=\"background:#E6FFE6;\">");
303 buf.append(text);
304 buf.append("</ins>");
305 } else {
306 buf.append("<span>");
307 buf.append(text);
308 buf.append("</span>");
309 }
310 }
311 return buf.toString();
312 }
313
314 /**
315 * The baseline text.
316 */
317 private String source;
318
319 /**
320 * The changed text.
321 */
322 private String target;
323
324 /**
325 * Whether a slightly faster less optimal diff should be run.
326 */
327 private boolean checkLines;
328 }
329