1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2007 - 2016
18   *
19   */
20  package org.crosswire.common.diff;
21  
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.HashSet;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.Set;
28  
29  /**
30   * Builds a map of a baseline/source text and a changed/target text, navigating
31   * it to determine differences.
32   * 
33   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
34   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
35   * 
36   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
37   * @author DM Smith
38   */
39  public class DifferenceEngine {
40  
41      /**
42       * Empty Difference Engine, which won't find anything.
43       */
44      public DifferenceEngine() {
45          this("", "");
46      }
47  
48      /**
49       * Find the differences between two texts. Simplifies the problem by
50       * stripping any common prefix or suffix off the texts before diffing.
51       * 
52       * @param source
53       *            Old string to be diffed
54       * @param target
55       *            New string to be diffed
56       */
57      public DifferenceEngine(final String source, final String target) {
58          this.source = source;
59          this.target = target;
60          this.sourceLength = source.length();
61          this.targetLength = target.length();
62      }
63  
64      /**
65       * Explore the intersection points between the two texts.
66       * 
67       * @return List of Difference objects or null if no diff available
68       */
69      public List<Difference> generate() {
70          long msEnd = System.currentTimeMillis() + (long) (timeout * 1000);
71          int maxD = (this.sourceLength + this.targetLength) / 2;
72          List<Set<String>> vMap1 = new ArrayList<Set<String>>();
73          List<Set<String>> vMap2 = new ArrayList<Set<String>>();
74          Map<Integer, Integer> v1 = new HashMap<Integer, Integer>();
75          Map<Integer, Integer> v2 = new HashMap<Integer, Integer>();
76          v1.put(Integer.valueOf(1), Integer.valueOf(0));
77          v2.put(Integer.valueOf(1), Integer.valueOf(0));
78          int x;
79          int y;
80          String footstep; // Used to track overlapping paths.
81          Map<String, Integer> footsteps = new HashMap<String, Integer>();
82          boolean done = false;
83          // If the total number of characters is odd, then the front path will
84          // collide with the reverse path.
85          boolean front = (this.sourceLength + this.targetLength) % 2 != 0;
86          for (int d = 0; d < maxD; d++) {
87              // Bail out if timeout reached.
88              if (timeout > 0 && System.currentTimeMillis() > msEnd) {
89                  return null;
90              }
91  
92              // Walk the front path one step.
93              vMap1.add(new HashSet<String>()); // Adds at index 'd'.
94              for (int k = -d; k <= d; k += 2) {
95                  Integer kPlus1Key = Integer.valueOf(k + 1);
96                  Integer kPlus1Value = v1.get(kPlus1Key);
97                  Integer kMinus1Key = Integer.valueOf(k - 1);
98                  Integer kMinus1Value = v1.get(kMinus1Key);
99                  if (k == -d || k != d && kMinus1Value.intValue() < kPlus1Value.intValue()) {
100                     x = kPlus1Value.intValue();
101                 } else {
102                     x = kMinus1Value.intValue() + 1;
103                 }
104                 y = x - k;
105                 footstep = x + "," + y;
106                 if (front && (footsteps.containsKey(footstep))) {
107                     done = true;
108                 }
109                 if (!front) {
110                     footsteps.put(footstep, Integer.valueOf(d));
111                 }
112                 while (!done && x < this.sourceLength && y < this.targetLength && source.charAt(x) == target.charAt(y)) {
113                     x++;
114                     y++;
115                     footstep = x + "," + y;
116                     if (front && footsteps.containsKey(footstep)) {
117                         done = true;
118                     }
119                     if (!front) {
120                         footsteps.put(footstep, Integer.valueOf(d));
121                     }
122                 }
123                 v1.put(Integer.valueOf(k), Integer.valueOf(x));
124                 Set<String> s = vMap1.get(d);
125                 s.add(x + "," + y);
126                 if (done) {
127                     // Front path ran over reverse path.
128                     Integer footstepValue = footsteps.get(footstep);
129                     vMap2 = vMap2.subList(0, footstepValue.intValue() + 1);
130                     List<Difference> a = path1(vMap1, source.substring(0, x), target.substring(0, y));
131                     a.addAll(path2(vMap2, source.substring(x), target.substring(y)));
132                     return a;
133                 }
134             }
135 
136             // Walk the reverse path one step.
137             vMap2.add(new HashSet<String>()); // Adds at index 'd'.
138             for (int k = -d; k <= d; k += 2) {
139                 Integer kPlus1Key = Integer.valueOf(k + 1);
140                 Integer kPlus1Value = v2.get(kPlus1Key);
141                 Integer kMinus1Key = Integer.valueOf(k - 1);
142                 Integer kMinus1Value = v2.get(kMinus1Key);
143                 if (k == -d || k != d && kMinus1Value.intValue() < kPlus1Value.intValue()) {
144                     x = kPlus1Value.intValue();
145                 } else {
146                     x = kMinus1Value.intValue() + 1;
147                 }
148                 y = x - k;
149                 footstep = (this.sourceLength - x) + "," + (this.targetLength - y);
150                 if (!front && (footsteps.containsKey(footstep))) {
151                     done = true;
152                 }
153                 if (front) {
154                     footsteps.put(footstep, Integer.valueOf(d));
155                 }
156                 while (!done && x < this.sourceLength && y < this.targetLength && source.charAt(this.sourceLength - x - 1) == target.charAt(this.targetLength - y - 1)) {
157                     x++;
158                     y++;
159                     footstep = (this.sourceLength - x) + "," + (this.targetLength - y);
160                     if (!front && (footsteps.containsKey(footstep))) {
161                         done = true;
162                     }
163                     if (front) {
164                         footsteps.put(footstep, Integer.valueOf(d));
165                     }
166                 }
167 
168                 v2.put(Integer.valueOf(k), Integer.valueOf(x));
169                 Set<String> s = vMap2.get(d);
170                 s.add(x + "," + y);
171                 if (done) {
172                     // Reverse path ran over front path.
173                     Integer footstepValue = footsteps.get(footstep);
174                     vMap1 = vMap1.subList(0, footstepValue.intValue() + 1);
175                     List<Difference> a = path1(vMap1, source.substring(0, this.sourceLength - x), target.substring(0, this.targetLength - y));
176                     a.addAll(path2(vMap2, source.substring(this.sourceLength - x), target.substring(this.targetLength - y)));
177                     return a;
178                 }
179             }
180         }
181 
182         // Number of diffs equals number of characters, no commonality at all.
183         return null;
184     }
185 
186     /**
187      * Work from the middle back to the start to determine the path.
188      * 
189      * @param vMap
190      *            List of path sets.
191      * @param newSource
192      *            Old string fragment to be diffed
193      * @param newTarget
194      *            New string fragment to be diffed
195      * @return List of Difference objects
196      */
197     protected List<Difference> path1(final List<Set<String>> vMap, final String newSource, final String newTarget) {
198         List<Difference> path = new ArrayList<Difference>();
199         int x = newSource.length();
200         int y = newTarget.length();
201         EditType lastEditType = null;
202         for (int d = vMap.size() - 2; d >= 0; d--) {
203             while (true) {
204                 Set<String> set = vMap.get(d);
205                 if (set.contains((x - 1) + "," + y)) {
206                     x--;
207                     if (EditType.DELETE.equals(lastEditType)) {
208                         Difference firstDiff = path.get(0);
209                         firstDiff.prependText(newSource.charAt(x));
210                     } else {
211                         path.add(0, new Difference(EditType.DELETE, newSource.substring(x, x + 1)));
212                     }
213                     lastEditType = EditType.DELETE;
214                     break;
215                 } else if (set.contains(x + "," + (y - 1))) {
216                     y--;
217                     if (EditType.INSERT.equals(lastEditType)) {
218                         Difference firstDiff = path.get(0);
219                         firstDiff.prependText(newTarget.charAt(y));
220                     } else {
221                         path.add(0, new Difference(EditType.INSERT, newTarget.substring(y, y + 1)));
222                     }
223                     lastEditType = EditType.INSERT;
224                     break;
225                 } else {
226                     x--;
227                     y--;
228                     assert newSource.charAt(x) == newTarget.charAt(y) : "No diagonal.  Can't happen. (path1)";
229                     if (EditType.EQUAL.equals(lastEditType)) {
230                         Difference firstDiff = path.get(0);
231                         firstDiff.prependText(newSource.charAt(x));
232                     } else {
233                         path.add(0, new Difference(EditType.EQUAL, newSource.substring(x, x + 1)));
234                     }
235                     lastEditType = EditType.EQUAL;
236                 }
237             }
238         }
239         return path;
240     }
241 
242     /**
243      * Work from the middle back to the end to determine the path.
244      * 
245      * @param vMap
246      *            List of path sets.
247      * @param newSource
248      *            Old string fragment to be diffed
249      * @param newTarget
250      *            New string fragment to be diffed
251      * @return List of Difference objects
252      */
253     protected List<Difference> path2(final List<Set<String>> vMap, final String newSource, final String newTarget) {
254         List<Difference> path = new ArrayList<Difference>();
255 
256         //cached versions of length from immutable strings
257         final int cachedNewSourceLength = newSource.length();
258         final int cachedNewTargetLength = newTarget.length();
259 
260         int x = cachedNewSourceLength;
261         int y = cachedNewTargetLength;
262         EditType lastEditType = null;
263         for (int d = vMap.size() - 2; d >= 0; d--) {
264             while (true) {
265                 Set<String> set = vMap.get(d);
266                 if (set.contains((x - 1) + "," + y)) {
267                     x--;
268                     if (EditType.DELETE.equals(lastEditType)) {
269                         Difference lastDiff = path.get(path.size() - 1);
270                         lastDiff.appendText(newSource.charAt(cachedNewSourceLength - x - 1));
271                     } else {
272                         path.add(new Difference(EditType.DELETE, newSource.substring(cachedNewSourceLength - x - 1, cachedNewSourceLength - x)));
273                     }
274                     lastEditType = EditType.DELETE;
275                     break;
276                 } else if (set.contains(x + "," + (y - 1))) {
277                     y--;
278                     if (EditType.INSERT.equals(lastEditType)) {
279                         Difference lastDiff = path.get(path.size() - 1);
280                         lastDiff.appendText(newTarget.charAt(cachedNewTargetLength - y - 1));
281                     } else {
282                         path.add(new Difference(EditType.INSERT, newTarget.substring(cachedNewTargetLength - y - 1, cachedNewTargetLength - y)));
283                     }
284                     lastEditType = EditType.INSERT;
285                     break;
286                 } else {
287                     x--;
288                     y--;
289                     assert newSource.charAt(cachedNewSourceLength - x - 1) == newTarget.charAt(cachedNewTargetLength - y - 1) : "No diagonal.  Can't happen. (path2)";
290 
291                     if (EditType.EQUAL.equals(lastEditType)) {
292                         Difference lastDiff = path.get(path.size() - 1);
293                         lastDiff.appendText(newSource.charAt(cachedNewSourceLength - x - 1));
294                     } else {
295                         path.add(new Difference(EditType.EQUAL, newSource.substring(cachedNewSourceLength - x - 1, cachedNewSourceLength - x)));
296                     }
297                     lastEditType = EditType.EQUAL;
298                 }
299             }
300         }
301         return path;
302     }
303 
304     /**
305      * Set the timeout for the diff operation. The default is 1 second. Use 0
306      * for infinity.
307      * 
308      * @param newTimeout the new timeout
309      */
310     public static void setTimeout(float newTimeout) {
311         timeout = newTimeout;
312     }
313 
314     /**
315      * Number of seconds to map a diff before giving up. Use 0 for infinity.
316      */
317     private static final float TIMEOUT = 1.0f;
318     private static float timeout = TIMEOUT;
319     /**
320      * Made final because we now rely on caching the string lengths (they could be cached
321      * at method level if require non-final in future)
322      * The baseline text.
323      */
324     private final String source;
325 
326     /**
327      * The changed text.
328      * 
329      */
330     private final String target;
331 
332     // cached versions of source and target length, 
333     // as for roughly 1 call to generate(), was causing 117'000 calls to length()
334     private final int targetLength;
335     private final int sourceLength;
336 
337 }
338