| Patch.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2007
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: Patch.java 2149 2011-04-09 15:51:45Z dmsmith $
21 */
22 package org.crosswire.common.diff;
23
24 import java.util.ArrayList;
25 import java.util.List;
26 import java.util.ListIterator;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29
30 /**
31 * Marshals a patch to a list of Differences, Differences to a patch and applies
32 * a list of differences to text to patch it.
33 *
34 * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
35 * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
36 *
37 * @see gnu.lgpl.License for license details.<br>
38 * The copyright to this program is held by it's authors.
39 * @author DM Smith [dmsmith555 at yahoo dot com]
40 */
41 public class Patch {
42 /**
43 * Create an empty patch.
44 */
45 public Patch() {
46 patches = new ArrayList<PatchEntry>();
47 margin = PatchEntry.getMargin();
48 }
49
50 /**
51 * Create a Patch from a textual representation,
52 *
53 * @param input
54 * Text representation of patches
55 */
56 public Patch(String input) {
57 this();
58 fromText(input);
59 }
60
61 /**
62 * Create a patch that can turn text1 into text2.
63 *
64 * @param source
65 * Old text
66 * @param target
67 * New text
68 */
69 public Patch(String source, String target) {
70 this(source, target, null);
71 }
72
73 /**
74 * Create a patch that can turn text1 into text2. Use the diffs provided, if
75 * not null. Compute diffs otherwise.
76 *
77 * @param source
78 * Old text
79 * @param target
80 * New text
81 * @param diffs
82 * Optional array of diff tuples for text1 to text2.
83 */
84 public Patch(String source, String target, List<Difference> diffs) {
85 this();
86 make(source, target, diffs);
87 }
88
89 /**
90 * Compute a list of patches to turn text1 into text2. Use the diffs
91 * provided.
92 *
93 * @param source
94 * Old text
95 * @param target
96 * New text
97 * @param diffList
98 * Optional array of diff tuples for text1 to text2.
99 * @return this patch
100 */
101 public Patch make(String source, String target, List<Difference> diffList) {
102 List<Difference> diffs = diffList;
103 if (diffs == null) {
104 Diff diff = new Diff(source, target);
105 diffs = diff.compare();
106 if (diffs.size() > 2) {
107 DiffCleanup.cleanupSemantic(diffs);
108 DiffCleanup.cleanupEfficiency(diffs);
109 }
110 }
111
112 patches.clear();
113
114 if (diffs.isEmpty()) {
115 return this; // Get rid of the null case.
116 }
117
118 PatchEntry patch = new PatchEntry();
119 int charCount1 = 0; // Number of characters into the text1 string.
120 int charCount2 = 0; // Number of characters into the text2 string.
121 // Recreate the patches to determine context info.
122 String prePatchText = source;
123 String postPatchText = source;
124 int x = 0;
125 for (Difference diff : diffs) {
126 EditType editType = diff.getEditType();
127 String diffText = diff.getText();
128 int len = diffText.length();
129
130 if (!patch.hasDifferences() && !EditType.EQUAL.equals(editType)) {
131 // A new patch starts here.
132 patch.setSourceStart(charCount1);
133 patch.setTargetStart(charCount2);
134 }
135
136 if (EditType.INSERT.equals(editType)) {
137 // Insertion
138 patch.addDifference(diff);
139 patch.adjustTargetLength(len);
140 postPatchText = postPatchText.substring(0, charCount2) + diffText + postPatchText.substring(charCount2);
141 } else if (EditType.DELETE.equals(editType)) {
142 // Deletion.
143 patch.adjustSourceLength(len);
144 patch.addDifference(diff);
145 postPatchText = postPatchText.substring(0, charCount2) + postPatchText.substring(charCount2 + len);
146 } else if (EditType.EQUAL.equals(editType) && len <= 2 * margin && patch.hasDifferences() && diffs.size() != x + 1) {
147 // Small equality inside a patch.
148 patch.addDifference(diff);
149 patch.adjustSourceLength(len);
150 patch.adjustTargetLength(len);
151 }
152
153 // Time for a new patch.
154 if (EditType.EQUAL.equals(editType) && len >= 2 * margin && patch.hasDifferences()) {
155 patch.addContext(prePatchText);
156 patches.add(patch);
157 patch = new PatchEntry();
158 prePatchText = postPatchText;
159 }
160
161 // Update the current character count.
162 if (!EditType.INSERT.equals(editType)) {
163 charCount1 += len;
164 }
165
166 if (!EditType.DELETE.equals(editType)) {
167 charCount2 += len;
168 }
169
170 x++;
171 }
172
173 // Pick up the leftover patch if not empty.
174 if (patch.hasDifferences()) {
175 patch.addContext(prePatchText);
176 patches.add(patch);
177 }
178
179 return this;
180 }
181
182 /**
183 * Merge this patch onto the text. Return a patched text, as well as an
184 * array of true/false values indicating which patches were applied.
185 *
186 * @param text
187 * Old text
188 * @return the patch result
189 */
190 public PatchResults apply(String text) {
191 splitMax();
192 boolean[] results = new boolean[patches.size()];
193 String resultText = text;
194 int delta = 0;
195 int expectedLoc = 0;
196 int startLoc = -1;
197 String text1 = "";
198 String text2 = "";
199 List<Difference> diffs;
200 int index1 = 0;
201 int index2 = 0;
202 int x = 0;
203 for (PatchEntry aPatch : patches) {
204 expectedLoc = aPatch.getTargetStart() + delta;
205 text1 = aPatch.getSourceText();
206 Match match = new Match(resultText, text1, expectedLoc);
207 startLoc = match.locate();
208 if (startLoc == -1) {
209 // No match found. :(
210 results[x] = false;
211 } else {
212 // Found a match. :)
213 results[x] = true;
214 delta = startLoc - expectedLoc;
215 text2 = resultText.substring(startLoc, startLoc + text1.length());
216 if (text1.equals(text2)) {
217 // Perfect match, just shove the replacement text in.
218 resultText = resultText.substring(0, startLoc) + aPatch.getTargetText() + resultText.substring(startLoc + text1.length());
219 } else {
220 // Imperfect match. Run a diff to get a framework of
221 // equivalent indicies.
222 Diff diff = new Diff(text1, text2, false);
223 diffs = diff.compare();
224 index1 = 0;
225 for (Difference aDiff : aPatch) {
226 EditType editType = aDiff.getEditType();
227 if (!EditType.EQUAL.equals(editType)) {
228 index2 = diff.xIndex(diffs, index1);
229 }
230
231 if (EditType.INSERT.equals(editType)) {
232 // Insertion
233 resultText = resultText.substring(0, startLoc + index2) + aDiff.getText() + resultText.substring(startLoc + index2);
234 } else if (EditType.DELETE.equals(editType)) {
235 // Deletion
236 resultText = resultText.substring(0, startLoc + index2)
237 + resultText.substring(startLoc + diff.xIndex(diffs, index1 + aDiff.getText().length()));
238 }
239
240 if (!EditType.DELETE.equals(editType)) {
241 index1 += aDiff.getText().length();
242 }
243 }
244 }
245 }
246 x++;
247 }
248 return new PatchResults(resultText, results);
249 }
250
251 /**
252 * Look through the patches and break up any which are longer than the
253 * maximum limit of the match algorithm.
254 */
255 public void splitMax() {
256 int maxPatternLength = new Match().maxPatternLength();
257 ListIterator<PatchEntry> pointer = patches.listIterator();
258 PatchEntry bigPatch = pointer.hasNext() ? pointer.next() : null;
259 while (bigPatch != null) {
260 if (bigPatch.getSourceLength() <= maxPatternLength) {
261 if (!pointer.hasNext()) {
262 break;
263 }
264
265 bigPatch = pointer.next();
266 }
267
268 // Remove the big old patch.
269 pointer.remove();
270 int patchSize = maxPatternLength;
271 int start1 = bigPatch.getSourceStart();
272 int start2 = bigPatch.getTargetStart();
273 String preContext = "";
274 while (bigPatch.hasDifferences()) {
275 // Create one of several smaller patches.
276 PatchEntry patch = new PatchEntry();
277 boolean empty = true;
278
279 int len = preContext.length();
280 patch.setSourceStart(start1 - len);
281 patch.setTargetStart(start2 - len);
282 if (len > 0) {
283 patch.setSourceLength(len);
284 patch.setTargetLength(len);
285 patch.addDifference(new Difference(EditType.EQUAL, preContext));
286 }
287
288 while (bigPatch.hasDifferences() && patch.getSourceLength() < patchSize - margin) {
289 Difference bigDiff = bigPatch.getFirstDifference();
290 EditType editType = bigDiff.getEditType();
291 String diffText = bigDiff.getText();
292 if (EditType.INSERT.equals(editType)) {
293 // Insertions are harmless.
294 len = diffText.length();
295 patch.adjustTargetLength(len);
296 start2 += len;
297 patch.addDifference(bigPatch.removeFirstDifference());
298 empty = false;
299 } else {
300 // Deletion or equality. Only take as much as we can
301 // stomach.
302 diffText = diffText.substring(0, Math.min(diffText.length(), patchSize - patch.getSourceLength() - margin));
303 len = diffText.length();
304 patch.adjustSourceLength(len);
305 start1 += len;
306 if (EditType.EQUAL.equals(editType)) {
307 patch.adjustTargetLength(len);
308 start2 += len;
309 } else {
310 empty = false;
311 }
312
313 patch.addDifference(new Difference(editType, diffText));
314
315 if (diffText.equals(bigDiff.getText())) {
316 bigPatch.removeFirstDifference();
317 } else {
318 bigDiff.setText(bigDiff.getText().substring(len));
319 }
320 }
321 }
322
323 // Compute the head context for the next patch.
324 preContext = patch.getTargetText();
325 preContext = preContext.substring(Math.max(0, preContext.length() - margin));
326
327 // Append the end context for this patch.
328 String postcontext = null;
329 if (bigPatch.getSourceText().length() > margin) {
330 postcontext = bigPatch.getSourceText().substring(0, margin);
331 } else {
332 postcontext = bigPatch.getSourceText();
333 }
334 if (postcontext.length() > 0) {
335 patch.adjustSourceLength(postcontext.length());
336 patch.adjustTargetLength(postcontext.length());
337 if (patch.getDifferenceCount() > 0 && EditType.EQUAL.equals(patch.getLastDifference().getEditType())) {
338 Difference diff = patch.getLastDifference();
339 diff.appendText(postcontext);
340 } else {
341 patch.addDifference(new Difference(EditType.EQUAL, postcontext));
342 }
343 }
344
345 if (!empty) {
346 pointer.add(patch);
347 }
348 }
349
350 bigPatch = pointer.hasNext() ? pointer.next() : null;
351 }
352 }
353
354 /**
355 * Take a list of patches and return a textual representation.
356 *
357 * @return Text representation of patches.
358 */
359 public String toText() {
360 StringBuilder text = new StringBuilder();
361 for (PatchEntry entry : patches) {
362 text.append(entry);
363 }
364 return text.toString();
365 }
366
367 /**
368 * Parse a textual representation of patches and return a List of Patch
369 * objects.
370 *
371 * @param input
372 * Text representation of patches
373 * @return List of Patch objects
374 */
375 public Patch fromText(String input) {
376 patches.clear();
377
378 Matcher m = patchBoundaryPattern.matcher(input);
379
380 // Add segments before each match found
381 int index = 0;
382 while (m.find()) {
383 int start = m.start();
384 String match = input.substring(index, start);
385 patches.add(new PatchEntry(match));
386 index = start + 1;
387 }
388
389 if (index == 0) {
390 // No match was found, the patch consists of the entire string
391 patches.add(new PatchEntry(input));
392 } else {
393 // Add remaining segment
394 patches.add(new PatchEntry(input.substring(index)));
395 }
396
397 return this;
398 }
399
400 /**
401 * A holder of the results of a patch, with a results indicating which patch
402 * entries were able to be applied.
403 */
404 public static class PatchResults {
405 /**
406 * @param text
407 * @param results
408 */
409 public PatchResults(String text, boolean[] results) {
410 this.text = text;
411 this.results = results.clone();
412 }
413
414 /**
415 * @return the results
416 */
417 public boolean[] getResults() {
418 return results.clone();
419 }
420
421 /**
422 * @return the text
423 */
424 public String getText() {
425 return text;
426 }
427
428 private String text;
429 private boolean[] results;
430 }
431
432 // Ideally we'd like to have the @@ be merely a look-ahead, but it doesn't
433 // work that way with split.
434 private static Pattern patchBoundaryPattern = Pattern.compile("\n@@");
435
436 private List<PatchEntry> patches;
437 private int margin;
438 }
439