1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2007 - 2016
18   *
19   */
20  package org.crosswire.common.diff;
21  
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.ListIterator;
25  
26  /**
27   * Computes the difference between two texts to create a list of differences.
28   * 
29   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
30   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
31   * 
32   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
33   * @author DM Smith
34   */
35  public class Diff {
36      /**
37       * Construct an object that can find the differences between two texts. Run
38       * a faster slightly less optimal diff. This constructor allows the
39       * 'checkLines' to be optional. Most of the time checkLines is wanted, so
40       * default to true.
41       * 
42       * @param source
43       *            Old string to be diffed
44       * @param target
45       *            New string to be diffed
46       */
47      public Diff(final String source, final String target) {
48          this(source, target, true);
49      }
50  
51      /**
52       * Construct an object that can find the differences between two texts.
53       * 
54       * @param source
55       *            Old string to be diffed
56       * @param target
57       *            New string to be diffed
58       * @param checkLines
59       *            Speedup flag. If false, then don't run a line-level diff first
60       *            to identify the changed areas. If true, then run a faster
61       *            slightly less optimal diff
62       */
63      public Diff(final String source, final String target, final boolean checkLines) {
64          this.source = source;
65          this.target = target;
66          this.checkLines = checkLines;
67      }
68  
69      /**
70       * Find the differences between two texts. Simplifies the problem by
71       * stripping any common prefix or suffix off the texts before diffing.
72       * 
73       * @return List of Difference objects
74       */
75      public List<Difference> compare() {
76          // Check for equality (speedup)
77          List<Difference> diffs;
78          if (source.equals(target)) {
79              diffs = new ArrayList<Difference>();
80              diffs.add(new Difference(EditType.EQUAL, source));
81              return diffs;
82          }
83  
84          // Trim off common prefix (speedup)
85          int commonLength = Commonality.prefix(source, target);
86          String commonPrefix = source.substring(0, commonLength);
87          source = source.substring(commonLength);
88          target = target.substring(commonLength);
89  
90          // Trim off common suffix (speedup)
91          commonLength = Commonality.suffix(source, target);
92          String commonSuffix = source.substring(source.length() - commonLength);
93          source = source.substring(0, source.length() - commonLength);
94          target = target.substring(0, target.length() - commonLength);
95  
96          // Compute the diff on the middle block
97          diffs = compute();
98  
99          // Restore the prefix and suffix
100         if (!"".equals(commonPrefix)) {
101             diffs.add(0, new Difference(EditType.EQUAL, commonPrefix));
102         }
103 
104         if (!"".equals(commonSuffix)) {
105             diffs.add(new Difference(EditType.EQUAL, commonSuffix));
106         }
107 
108         DiffCleanup.cleanupMerge(diffs);
109 
110         return diffs;
111     }
112 
113     /**
114      * Find the differences between two texts.
115      * 
116      * @return List of Difference objects
117      */
118     private List<Difference> compute() {
119         List<Difference> diffs = new ArrayList<Difference>();
120 
121         if ("".equals(source)) {
122             // Just add some text (speedup)
123             diffs.add(new Difference(EditType.INSERT, target));
124             return diffs;
125         }
126 
127         if ("".equals(target)) {
128             // Just delete some text (speedup)
129             diffs.add(new Difference(EditType.DELETE, source));
130             return diffs;
131         }
132 
133         String longText = source.length() > target.length() ? source : target;
134         String shortText = source.length() > target.length() ? target : source;
135         int i = longText.indexOf(shortText);
136         if (i != -1) {
137             // Shorter text is inside the longer text (speedup)
138             EditType editType = (source.length() > target.length()) ? EditType.DELETE : EditType.INSERT;
139             diffs.add(new Difference(editType, longText.substring(0, i)));
140             diffs.add(new Difference(EditType.EQUAL, shortText));
141             diffs.add(new Difference(editType, longText.substring(i + shortText.length())));
142             return diffs;
143         }
144 
145         // Check to see if the problem can be split in two.
146         CommonMiddle middleMatch = Commonality.halfMatch(source, target);
147         if (middleMatch != null) {
148             // A half-match was found, sort out the return data.
149             // Send both pairs off for separate processing.
150             Diff startDiff = new Diff(middleMatch.getSourcePrefix(), middleMatch.getTargetPrefix(), checkLines);
151             Diff endDiff = new Diff(middleMatch.getSourceSuffix(), middleMatch.getTargetSuffix(), checkLines);
152             // Merge the results.
153             diffs = startDiff.compare();
154             diffs.add(new Difference(EditType.EQUAL, middleMatch.getCommonality()));
155             diffs.addAll(endDiff.compare());
156             return diffs;
157         }
158 
159         // Perform a real diff.
160         if (checkLines && source.length() + target.length() < 250) {
161             checkLines = false; // Too trivial for the overhead.
162         }
163 
164         LineMap lineMap = null;
165         if (checkLines) {
166             // Scan the text on a line-by-line basis first.
167             lineMap = new LineMap(source, target);
168             source = lineMap.getSourceMap();
169             target = lineMap.getTargetMap();
170         }
171 
172         diffs = new DifferenceEngine(source, target).generate();
173 
174         if (diffs == null) {
175             // No acceptable result.
176             diffs = new ArrayList<Difference>();
177             diffs.add(new Difference(EditType.DELETE, source));
178             diffs.add(new Difference(EditType.INSERT, target));
179         }
180 
181         if (checkLines && lineMap != null) {
182             // Convert the diff back to original text.
183             lineMap.restore(diffs);
184             // Eliminate freak matches (e.g. blank lines)
185             DiffCleanup.cleanupSemantic(diffs);
186 
187             // Rediff any replacement blocks, this time character-by-character.
188             // Add a dummy entry at the end.
189             diffs.add(new Difference(EditType.EQUAL, ""));
190             int countDeletes = 0;
191             int countInserts = 0;
192             StringBuilder textDelete = new StringBuilder();
193             StringBuilder textInsert = new StringBuilder();
194             ListIterator<Difference> pointer = diffs.listIterator();
195             Difference curDiff = pointer.next();
196             while (curDiff != null) {
197                 EditType editType = curDiff.getEditType();
198                 if (EditType.INSERT.equals(editType)) {
199                     countInserts++;
200                     textInsert.append(curDiff.getText());
201                 } else if (EditType.DELETE.equals(editType)) {
202                     countDeletes++;
203                     textDelete.append(curDiff.getText());
204                 } else {
205                     // Upon reaching an equality, check for prior redundancies.
206                     if (countDeletes >= 1 && countInserts >= 1) {
207                         // Delete the offending records and add the merged ones.
208                         pointer.previous();
209                         for (int j = 0; j < countDeletes + countInserts; j++) {
210                             pointer.previous();
211                             pointer.remove();
212                         }
213                         Diff newDiff = new Diff(textDelete.toString(), textInsert.toString(), false);
214                         for (Difference diff : newDiff.compare()) {
215                             pointer.add(diff);
216                         }
217                     }
218                     countInserts = 0;
219                     countDeletes = 0;
220                     textDelete.delete(0, textDelete.length());
221                     textInsert.delete(0, textInsert.length());
222                 }
223                 curDiff = pointer.hasNext() ? pointer.next() : null;
224             }
225             diffs.remove(diffs.size() - 1); // Remove the dummy entry at the
226             // end.
227         }
228         return diffs;
229     }
230 
231     /**
232      * loc is a location in source, compute and return the equivalent location
233      * in target. e.g. "The cat" vs "The big cat", 1-&gt;1, 5-&gt;8
234      * 
235      * @param diffs
236      *            List of Difference objects
237      * @param loc
238      *            Location within source
239      * @return Location within target
240      */
241     public int xIndex(final List<Difference> diffs, final int loc) {
242         int chars1 = 0;
243         int chars2 = 0;
244         int lastChars1 = 0;
245         int lastChars2 = 0;
246         Difference lastDiff = null;
247         for (Difference diff : diffs) {
248             EditType editType = diff.getEditType();
249 
250             // Equality or deletion?
251             if (!EditType.INSERT.equals(editType)) {
252                 chars1 += diff.getText().length();
253             }
254 
255             // Equality or insertion?
256             if (!EditType.DELETE.equals(editType)) {
257                 chars2 += diff.getText().length();
258             }
259 
260             // Overshot the location?
261             if (chars1 > loc) {
262                 lastDiff = diff;
263                 break;
264             }
265             lastChars1 = chars1;
266             lastChars2 = chars2;
267         }
268 
269         // Was the location was deleted?
270         if (lastDiff != null && EditType.DELETE.equals(lastDiff.getEditType())) {
271             return lastChars2;
272         }
273 
274         // Add the remaining character length.
275         return lastChars2 + (loc - lastChars1);
276     }
277 
278     /**
279      * Convert a Difference list into a pretty HTML report.
280      * 
281      * @param diffs
282      *            List of Difference objects
283      * @return HTML representation
284      */
285     public String prettyHtml(List<Difference> diffs) {
286         StringBuilder buf = new StringBuilder();
287         for (int x = 0; x < diffs.size(); x++) {
288             Difference diff = diffs.get(x);
289             EditType editType = diff.getEditType(); // Mode (delete, equal,
290             // insert)
291             String text = diff.getText(); // Text of change.
292             // TODO(DMS): Do replacements
293             // text = text.replace(/&/g, "&amp;");
294             // text = text.replace(/</g, "&lt;");
295             // text = text.replace(/>/g, "&gt;");
296             // text = text.replace(/\n/g, "&para;<BR>");
297             if (EditType.DELETE.equals(editType)) {
298                 buf.append("<del style=\"background:#FFE6E6;\">");
299                 buf.append(text);
300                 buf.append("</del>");
301             } else if (EditType.INSERT.equals(editType)) {
302                 buf.append("<ins style=\"background:#E6FFE6;\">");
303                 buf.append(text);
304                 buf.append("</ins>");
305             } else {
306                 buf.append("<span>");
307                 buf.append(text);
308                 buf.append("</span>");
309             }
310         }
311         return buf.toString();
312     }
313 
314     /**
315      * The baseline text.
316      */
317     private String source;
318 
319     /**
320      * The changed text.
321      */
322     private String target;
323 
324     /**
325      * Whether a slightly faster less optimal diff should be run.
326      */
327     private boolean checkLines;
328 }
329