Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
Diff |
|
| 7.5;7.5 |
1 | /** | |
2 | * Distribution License: | |
3 | * JSword is free software; you can redistribute it and/or modify it under | |
4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
5 | * as published by the Free Software Foundation. This program is distributed | |
6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
8 | * See the GNU Lesser General Public License for more details. | |
9 | * | |
10 | * The License is available on the internet at: | |
11 | * http://www.gnu.org/copyleft/lgpl.html | |
12 | * or by writing to: | |
13 | * Free Software Foundation, Inc. | |
14 | * 59 Temple Place - Suite 330 | |
15 | * Boston, MA 02111-1307, USA | |
16 | * | |
17 | * © CrossWire Bible Society, 2007 - 2016 | |
18 | * | |
19 | */ | |
20 | package org.crosswire.common.diff; | |
21 | ||
22 | import java.util.ArrayList; | |
23 | import java.util.List; | |
24 | import java.util.ListIterator; | |
25 | ||
26 | /** | |
27 | * Computes the difference between two texts to create a list of differences. | |
28 | * | |
29 | * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br> | |
30 | * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a> | |
31 | * | |
32 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
33 | * @author DM Smith | |
34 | */ | |
35 | public class Diff { | |
36 | /** | |
37 | * Construct an object that can find the differences between two texts. Run | |
38 | * a faster slightly less optimal diff. This constructor allows the | |
39 | * 'checkLines' to be optional. Most of the time checkLines is wanted, so | |
40 | * default to true. | |
41 | * | |
42 | * @param source | |
43 | * Old string to be diffed | |
44 | * @param target | |
45 | * New string to be diffed | |
46 | */ | |
47 | public Diff(final String source, final String target) { | |
48 | 0 | this(source, target, true); |
49 | 0 | } |
50 | ||
51 | /** | |
52 | * Construct an object that can find the differences between two texts. | |
53 | * | |
54 | * @param source | |
55 | * Old string to be diffed | |
56 | * @param target | |
57 | * New string to be diffed | |
58 | * @param checkLines | |
59 | * Speedup flag. If false, then don't run a line-level diff first | |
60 | * to identify the changed areas. If true, then run a faster | |
61 | * slightly less optimal diff | |
62 | */ | |
63 | 0 | public Diff(final String source, final String target, final boolean checkLines) { |
64 | 0 | this.source = source; |
65 | 0 | this.target = target; |
66 | 0 | this.checkLines = checkLines; |
67 | 0 | } |
68 | ||
69 | /** | |
70 | * Find the differences between two texts. Simplifies the problem by | |
71 | * stripping any common prefix or suffix off the texts before diffing. | |
72 | * | |
73 | * @return List of Difference objects | |
74 | */ | |
75 | public List<Difference> compare() { | |
76 | // Check for equality (speedup) | |
77 | List<Difference> diffs; | |
78 | 0 | if (source.equals(target)) { |
79 | 0 | diffs = new ArrayList<Difference>(); |
80 | 0 | diffs.add(new Difference(EditType.EQUAL, source)); |
81 | 0 | return diffs; |
82 | } | |
83 | ||
84 | // Trim off common prefix (speedup) | |
85 | 0 | int commonLength = Commonality.prefix(source, target); |
86 | 0 | String commonPrefix = source.substring(0, commonLength); |
87 | 0 | source = source.substring(commonLength); |
88 | 0 | target = target.substring(commonLength); |
89 | ||
90 | // Trim off common suffix (speedup) | |
91 | 0 | commonLength = Commonality.suffix(source, target); |
92 | 0 | String commonSuffix = source.substring(source.length() - commonLength); |
93 | 0 | source = source.substring(0, source.length() - commonLength); |
94 | 0 | target = target.substring(0, target.length() - commonLength); |
95 | ||
96 | // Compute the diff on the middle block | |
97 | 0 | diffs = compute(); |
98 | ||
99 | // Restore the prefix and suffix | |
100 | 0 | if (!"".equals(commonPrefix)) { |
101 | 0 | diffs.add(0, new Difference(EditType.EQUAL, commonPrefix)); |
102 | } | |
103 | ||
104 | 0 | if (!"".equals(commonSuffix)) { |
105 | 0 | diffs.add(new Difference(EditType.EQUAL, commonSuffix)); |
106 | } | |
107 | ||
108 | 0 | DiffCleanup.cleanupMerge(diffs); |
109 | ||
110 | 0 | return diffs; |
111 | } | |
112 | ||
113 | /** | |
114 | * Find the differences between two texts. | |
115 | * | |
116 | * @return List of Difference objects | |
117 | */ | |
118 | private List<Difference> compute() { | |
119 | 0 | List<Difference> diffs = new ArrayList<Difference>(); |
120 | ||
121 | 0 | if ("".equals(source)) { |
122 | // Just add some text (speedup) | |
123 | 0 | diffs.add(new Difference(EditType.INSERT, target)); |
124 | 0 | return diffs; |
125 | } | |
126 | ||
127 | 0 | if ("".equals(target)) { |
128 | // Just delete some text (speedup) | |
129 | 0 | diffs.add(new Difference(EditType.DELETE, source)); |
130 | 0 | return diffs; |
131 | } | |
132 | ||
133 | 0 | String longText = source.length() > target.length() ? source : target; |
134 | 0 | String shortText = source.length() > target.length() ? target : source; |
135 | 0 | int i = longText.indexOf(shortText); |
136 | 0 | if (i != -1) { |
137 | // Shorter text is inside the longer text (speedup) | |
138 | 0 | EditType editType = (source.length() > target.length()) ? EditType.DELETE : EditType.INSERT; |
139 | 0 | diffs.add(new Difference(editType, longText.substring(0, i))); |
140 | 0 | diffs.add(new Difference(EditType.EQUAL, shortText)); |
141 | 0 | diffs.add(new Difference(editType, longText.substring(i + shortText.length()))); |
142 | 0 | return diffs; |
143 | } | |
144 | ||
145 | // Check to see if the problem can be split in two. | |
146 | 0 | CommonMiddle middleMatch = Commonality.halfMatch(source, target); |
147 | 0 | if (middleMatch != null) { |
148 | // A half-match was found, sort out the return data. | |
149 | // Send both pairs off for separate processing. | |
150 | 0 | Diff startDiff = new Diff(middleMatch.getSourcePrefix(), middleMatch.getTargetPrefix(), checkLines); |
151 | 0 | Diff endDiff = new Diff(middleMatch.getSourceSuffix(), middleMatch.getTargetSuffix(), checkLines); |
152 | // Merge the results. | |
153 | 0 | diffs = startDiff.compare(); |
154 | 0 | diffs.add(new Difference(EditType.EQUAL, middleMatch.getCommonality())); |
155 | 0 | diffs.addAll(endDiff.compare()); |
156 | 0 | return diffs; |
157 | } | |
158 | ||
159 | // Perform a real diff. | |
160 | 0 | if (checkLines && source.length() + target.length() < 250) { |
161 | 0 | checkLines = false; // Too trivial for the overhead. |
162 | } | |
163 | ||
164 | 0 | LineMap lineMap = null; |
165 | 0 | if (checkLines) { |
166 | // Scan the text on a line-by-line basis first. | |
167 | 0 | lineMap = new LineMap(source, target); |
168 | 0 | source = lineMap.getSourceMap(); |
169 | 0 | target = lineMap.getTargetMap(); |
170 | } | |
171 | ||
172 | 0 | diffs = new DifferenceEngine(source, target).generate(); |
173 | ||
174 | 0 | if (diffs == null) { |
175 | // No acceptable result. | |
176 | 0 | diffs = new ArrayList<Difference>(); |
177 | 0 | diffs.add(new Difference(EditType.DELETE, source)); |
178 | 0 | diffs.add(new Difference(EditType.INSERT, target)); |
179 | } | |
180 | ||
181 | 0 | if (checkLines && lineMap != null) { |
182 | // Convert the diff back to original text. | |
183 | 0 | lineMap.restore(diffs); |
184 | // Eliminate freak matches (e.g. blank lines) | |
185 | 0 | DiffCleanup.cleanupSemantic(diffs); |
186 | ||
187 | // Rediff any replacement blocks, this time character-by-character. | |
188 | // Add a dummy entry at the end. | |
189 | 0 | diffs.add(new Difference(EditType.EQUAL, "")); |
190 | 0 | int countDeletes = 0; |
191 | 0 | int countInserts = 0; |
192 | 0 | StringBuilder textDelete = new StringBuilder(); |
193 | 0 | StringBuilder textInsert = new StringBuilder(); |
194 | 0 | ListIterator<Difference> pointer = diffs.listIterator(); |
195 | 0 | Difference curDiff = pointer.next(); |
196 | 0 | while (curDiff != null) { |
197 | 0 | EditType editType = curDiff.getEditType(); |
198 | 0 | if (EditType.INSERT.equals(editType)) { |
199 | 0 | countInserts++; |
200 | 0 | textInsert.append(curDiff.getText()); |
201 | 0 | } else if (EditType.DELETE.equals(editType)) { |
202 | 0 | countDeletes++; |
203 | 0 | textDelete.append(curDiff.getText()); |
204 | } else { | |
205 | // Upon reaching an equality, check for prior redundancies. | |
206 | 0 | if (countDeletes >= 1 && countInserts >= 1) { |
207 | // Delete the offending records and add the merged ones. | |
208 | 0 | pointer.previous(); |
209 | 0 | for (int j = 0; j < countDeletes + countInserts; j++) { |
210 | 0 | pointer.previous(); |
211 | 0 | pointer.remove(); |
212 | } | |
213 | 0 | Diff newDiff = new Diff(textDelete.toString(), textInsert.toString(), false); |
214 | 0 | for (Difference diff : newDiff.compare()) { |
215 | 0 | pointer.add(diff); |
216 | } | |
217 | } | |
218 | 0 | countInserts = 0; |
219 | 0 | countDeletes = 0; |
220 | 0 | textDelete.delete(0, textDelete.length()); |
221 | 0 | textInsert.delete(0, textInsert.length()); |
222 | } | |
223 | 0 | curDiff = pointer.hasNext() ? pointer.next() : null; |
224 | 0 | } |
225 | 0 | diffs.remove(diffs.size() - 1); // Remove the dummy entry at the |
226 | // end. | |
227 | } | |
228 | 0 | return diffs; |
229 | } | |
230 | ||
231 | /** | |
232 | * loc is a location in source, compute and return the equivalent location | |
233 | * in target. e.g. "The cat" vs "The big cat", 1->1, 5->8 | |
234 | * | |
235 | * @param diffs | |
236 | * List of Difference objects | |
237 | * @param loc | |
238 | * Location within source | |
239 | * @return Location within target | |
240 | */ | |
241 | public int xIndex(final List<Difference> diffs, final int loc) { | |
242 | 0 | int chars1 = 0; |
243 | 0 | int chars2 = 0; |
244 | 0 | int lastChars1 = 0; |
245 | 0 | int lastChars2 = 0; |
246 | 0 | Difference lastDiff = null; |
247 | 0 | for (Difference diff : diffs) { |
248 | 0 | EditType editType = diff.getEditType(); |
249 | ||
250 | // Equality or deletion? | |
251 | 0 | if (!EditType.INSERT.equals(editType)) { |
252 | 0 | chars1 += diff.getText().length(); |
253 | } | |
254 | ||
255 | // Equality or insertion? | |
256 | 0 | if (!EditType.DELETE.equals(editType)) { |
257 | 0 | chars2 += diff.getText().length(); |
258 | } | |
259 | ||
260 | // Overshot the location? | |
261 | 0 | if (chars1 > loc) { |
262 | 0 | lastDiff = diff; |
263 | 0 | break; |
264 | } | |
265 | 0 | lastChars1 = chars1; |
266 | 0 | lastChars2 = chars2; |
267 | 0 | } |
268 | ||
269 | // Was the location was deleted? | |
270 | 0 | if (lastDiff != null && EditType.DELETE.equals(lastDiff.getEditType())) { |
271 | 0 | return lastChars2; |
272 | } | |
273 | ||
274 | // Add the remaining character length. | |
275 | 0 | return lastChars2 + (loc - lastChars1); |
276 | } | |
277 | ||
278 | /** | |
279 | * Convert a Difference list into a pretty HTML report. | |
280 | * | |
281 | * @param diffs | |
282 | * List of Difference objects | |
283 | * @return HTML representation | |
284 | */ | |
285 | public String prettyHtml(List<Difference> diffs) { | |
286 | 0 | StringBuilder buf = new StringBuilder(); |
287 | 0 | for (int x = 0; x < diffs.size(); x++) { |
288 | 0 | Difference diff = diffs.get(x); |
289 | 0 | EditType editType = diff.getEditType(); // Mode (delete, equal, |
290 | // insert) | |
291 | 0 | String text = diff.getText(); // Text of change. |
292 | // TODO(DMS): Do replacements | |
293 | // text = text.replace(/&/g, "&"); | |
294 | // text = text.replace(/</g, "<"); | |
295 | // text = text.replace(/>/g, ">"); | |
296 | // text = text.replace(/\n/g, "¶<BR>"); | |
297 | 0 | if (EditType.DELETE.equals(editType)) { |
298 | 0 | buf.append("<del style=\"background:#FFE6E6;\">"); |
299 | 0 | buf.append(text); |
300 | 0 | buf.append("</del>"); |
301 | 0 | } else if (EditType.INSERT.equals(editType)) { |
302 | 0 | buf.append("<ins style=\"background:#E6FFE6;\">"); |
303 | 0 | buf.append(text); |
304 | 0 | buf.append("</ins>"); |
305 | } else { | |
306 | 0 | buf.append("<span>"); |
307 | 0 | buf.append(text); |
308 | 0 | buf.append("</span>"); |
309 | } | |
310 | } | |
311 | 0 | return buf.toString(); |
312 | } | |
313 | ||
314 | /** | |
315 | * The baseline text. | |
316 | */ | |
317 | private String source; | |
318 | ||
319 | /** | |
320 | * The changed text. | |
321 | */ | |
322 | private String target; | |
323 | ||
324 | /** | |
325 | * Whether a slightly faster less optimal diff should be run. | |
326 | */ | |
327 | private boolean checkLines; | |
328 | } |