1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2007
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: Diff.java 2149 2011-04-09 15:51:45Z dmsmith $
21   */
22  package org.crosswire.common.diff;
23  
24  import java.util.ArrayList;
25  import java.util.List;
26  import java.util.ListIterator;
27  
28  /**
29   * Computes the difference between two texts to create a list of differences.
30   * 
31   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
32   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
33   * 
34   * @see gnu.lgpl.License for license details.<br>
35   *      The copyright to this program is held by it's authors.
36   * @author DM Smith [dmsmith555 at yahoo dot com]
37   */
38  public class Diff {
39      /**
40       * Construct an object that can find the differences between two texts. Run
41       * a faster slightly less optimal diff. This constructor allows the
42       * 'checkLines' to be optional. Most of the time checkLines is wanted, so
43       * default to true.
44       * 
45       * @param source
46       *            Old string to be diffed
47       * @param target
48       *            New string to be diffed
49       */
50      public Diff(final String source, final String target) {
51          this(source, target, true);
52      }
53  
54      /**
55       * Construct an object that can find the differences between two texts.
56       * 
57       * @param source
58       *            Old string to be diffed
59       * @param target
60       *            New string to be diffed
61       * @param checkLines
62       *            Speedup flag. If false, then don't run a line-level diff first
63       *            to identify the changed areas. If true, then run a faster
64       *            slightly less optimal diff
65       */
66      public Diff(final String source, final String target, final boolean checkLines) {
67          this.source = source;
68          this.target = target;
69          this.checkLines = checkLines;
70      }
71  
72      /**
73       * Find the differences between two texts. Simplifies the problem by
74       * stripping any common prefix or suffix off the texts before diffing.
75       * 
76       * @return List of Difference objects
77       */
78      public List<Difference> compare() {
79          // Check for equality (speedup)
80          List<Difference> diffs;
81          if (source.equals(target)) {
82              diffs = new ArrayList<Difference>();
83              diffs.add(new Difference(EditType.EQUAL, source));
84              return diffs;
85          }
86  
87          // Trim off common prefix (speedup)
88          int commonLength = Commonality.prefix(source, target);
89          String commonPrefix = source.substring(0, commonLength);
90          source = source.substring(commonLength);
91          target = target.substring(commonLength);
92  
93          // Trim off common suffix (speedup)
94          commonLength = Commonality.suffix(source, target);
95          String commonSuffix = source.substring(source.length() - commonLength);
96          source = source.substring(0, source.length() - commonLength);
97          target = target.substring(0, target.length() - commonLength);
98  
99          // Compute the diff on the middle block
100         diffs = compute();
101 
102         // Restore the prefix and suffix
103         if (!"".equals(commonPrefix)) {
104             diffs.add(0, new Difference(EditType.EQUAL, commonPrefix));
105         }
106 
107         if (!"".equals(commonSuffix)) {
108             diffs.add(new Difference(EditType.EQUAL, commonSuffix));
109         }
110 
111         DiffCleanup.cleanupMerge(diffs);
112 
113         return diffs;
114     }
115 
116     /**
117      * Find the differences between two texts.
118      * 
119      * @return List of Difference objects
120      */
121     private List<Difference> compute() {
122         List<Difference> diffs = new ArrayList<Difference>();
123 
124         if ("".equals(source)) {
125             // Just add some text (speedup)
126             diffs.add(new Difference(EditType.INSERT, target));
127             return diffs;
128         }
129 
130         if ("".equals(target)) {
131             // Just delete some text (speedup)
132             diffs.add(new Difference(EditType.DELETE, source));
133             return diffs;
134         }
135 
136         String longText = source.length() > target.length() ? source : target;
137         String shortText = source.length() > target.length() ? target : source;
138         int i = longText.indexOf(shortText);
139         if (i != -1) {
140             // Shorter text is inside the longer text (speedup)
141             EditType editType = (source.length() > target.length()) ? EditType.DELETE : EditType.INSERT;
142             diffs.add(new Difference(editType, longText.substring(0, i)));
143             diffs.add(new Difference(EditType.EQUAL, shortText));
144             diffs.add(new Difference(editType, longText.substring(i + shortText.length())));
145             return diffs;
146         }
147 
148         // Check to see if the problem can be split in two.
149         CommonMiddle middleMatch = Commonality.halfMatch(source, target);
150         if (middleMatch != null) {
151             // A half-match was found, sort out the return data.
152             // Send both pairs off for separate processing.
153             Diff startDiff = new Diff(middleMatch.getSourcePrefix(), middleMatch.getTargetPrefix(), checkLines);
154             Diff endDiff = new Diff(middleMatch.getSourceSuffix(), middleMatch.getTargetSuffix(), checkLines);
155             // Merge the results.
156             diffs = startDiff.compare();
157             diffs.add(new Difference(EditType.EQUAL, middleMatch.getCommonality()));
158             diffs.addAll(endDiff.compare());
159             return diffs;
160         }
161 
162         // Perform a real diff.
163         if (checkLines && source.length() + target.length() < 250) {
164             checkLines = false; // Too trivial for the overhead.
165         }
166 
167         LineMap lineMap = null;
168         if (checkLines) {
169             // Scan the text on a line-by-line basis first.
170             lineMap = new LineMap(source, target);
171             source = lineMap.getSourceMap();
172             target = lineMap.getTargetMap();
173         }
174 
175         diffs = new DifferenceEngine(source, target).generate();
176 
177         if (diffs == null) {
178             // No acceptable result.
179             diffs = new ArrayList<Difference>();
180             diffs.add(new Difference(EditType.DELETE, source));
181             diffs.add(new Difference(EditType.INSERT, target));
182         }
183 
184         if (checkLines && lineMap != null) {
185             // Convert the diff back to original text.
186             lineMap.restore(diffs);
187             // Eliminate freak matches (e.g. blank lines)
188             DiffCleanup.cleanupSemantic(diffs);
189 
190             // Rediff any replacement blocks, this time character-by-character.
191             // Add a dummy entry at the end.
192             diffs.add(new Difference(EditType.EQUAL, ""));
193             int countDeletes = 0;
194             int countInserts = 0;
195             StringBuilder textDelete = new StringBuilder();
196             StringBuilder textInsert = new StringBuilder();
197             ListIterator<Difference> pointer = diffs.listIterator();
198             Difference curDiff = pointer.next();
199             while (curDiff != null) {
200                 EditType editType = curDiff.getEditType();
201                 if (EditType.INSERT.equals(editType)) {
202                     countInserts++;
203                     textInsert.append(curDiff.getText());
204                 } else if (EditType.DELETE.equals(editType)) {
205                     countDeletes++;
206                     textDelete.append(curDiff.getText());
207                 } else {
208                     // Upon reaching an equality, check for prior redundancies.
209                     if (countDeletes >= 1 && countInserts >= 1) {
210                         // Delete the offending records and add the merged ones.
211                         pointer.previous();
212                         for (int j = 0; j < countDeletes + countInserts; j++) {
213                             pointer.previous();
214                             pointer.remove();
215                         }
216                         Diff newDiff = new Diff(textDelete.toString(), textInsert.toString(), false);
217                         for (Difference diff : newDiff.compare()) {
218                             pointer.add(diff);
219                         }
220                     }
221                     countInserts = 0;
222                     countDeletes = 0;
223                     textDelete.delete(0, textDelete.length());
224                     textInsert.delete(0, textInsert.length());
225                 }
226                 curDiff = pointer.hasNext() ? pointer.next() : null;
227             }
228             diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
229             // end.
230         }
231         return diffs;
232     }
233 
234     /**
235      * loc is a location in source, compute and return the equivalent location
236      * in target. e.g. "The cat" vs "The big cat", 1->1, 5->8
237      * 
238      * @param diffs
239      *            List of Difference objects
240      * @param loc
241      *            Location within source
242      * @return Location within target
243      */
244     public int xIndex(final List<Difference> diffs, final int loc) {
245         int chars1 = 0;
246         int chars2 = 0;
247         int lastChars1 = 0;
248         int lastChars2 = 0;
249         Difference lastDiff = null;
250         for (Difference diff : diffs) {
251             EditType editType = diff.getEditType();
252 
253             // Equality or deletion?
254             if (!EditType.INSERT.equals(editType)) {
255                 chars1 += diff.getText().length();
256             }
257 
258             // Equality or insertion?
259             if (!EditType.DELETE.equals(editType)) {
260                 chars2 += diff.getText().length();
261             }
262 
263             // Overshot the location?
264             if (chars1 > loc) {
265                 lastDiff = diff;
266                 break;
267             }
268             lastChars1 = chars1;
269             lastChars2 = chars2;
270         }
271 
272         // Was the location was deleted?
273         if (lastDiff != null && EditType.DELETE.equals(lastDiff.getEditType())) {
274             return lastChars2;
275         }
276 
277         // Add the remaining character length.
278         return lastChars2 + (loc - lastChars1);
279     }
280 
281     /**
282      * Convert a Difference list into a pretty HTML report.
283      * 
284      * @param diffs
285      *            List of Difference objects
286      * @return HTML representation
287      */
288     public String prettyHtml(List<Difference> diffs) {
289         StringBuilder buf = new StringBuilder();
290         for (int x = 0; x < diffs.size(); x++) {
291             Difference diff = diffs.get(x);
292             EditType editType = diff.getEditType(); // Mode (delete, equal,
293             // insert)
294             String text = diff.getText(); // Text of change.
295             // TODO(DMS): Do replacements
296             // text = text.replace(/&/g, "&amp;");
297             // text = text.replace(/</g, "&lt;");
298             // text = text.replace(/>/g, "&gt;");
299             // text = text.replace(/\n/g, "&para;<BR>");
300             if (EditType.DELETE.equals(editType)) {
301                 buf.append("<del style=\"background:#FFE6E6;\">");
302                 buf.append(text);
303                 buf.append("</del>");
304             } else if (EditType.INSERT.equals(editType)) {
305                 buf.append("<ins style=\"background:#E6FFE6;\">");
306                 buf.append(text);
307                 buf.append("</ins>");
308             } else {
309                 buf.append("<span>");
310                 buf.append(text);
311                 buf.append("</span>");
312             }
313         }
314         return buf.toString();
315     }
316 
317     /**
318      * The baseline text.
319      */
320     private String source;
321 
322     /**
323      * The changed text.
324      */
325     private String target;
326 
327     /**
328      * Whether a slightly faster less optimal diff should be run.
329      */
330     private boolean checkLines;
331 }
332