1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2007
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: DifferenceEngine.java 2149 2011-04-09 15:51:45Z dmsmith $
21   */
22  package org.crosswire.common.diff;
23  
24  import java.util.ArrayList;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Set;
30  
31  /**
32   * Builds a map of a baseline/source text and a changed/target text, navigating
33   * it to determine differences.
34   * 
35   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
36   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
37   * 
38   * @see gnu.lgpl.License for license details.<br>
39   *      The copyright to this program is held by it's authors.
40   * @author DM Smith [dmsmith555 at yahoo dot com]
41   */
42  public class DifferenceEngine {
43      /**
44       * Empty Difference Engine, which won't find anything.
45       */
46      public DifferenceEngine() {
47          this("", "");
48      }
49  
50      /**
51       * Find the differences between two texts. Simplifies the problem by
52       * stripping any common prefix or suffix off the texts before diffing.
53       * 
54       * @param source
55       *            Old string to be diffed
56       * @param target
57       *            New string to be diffed
58       */
59      public DifferenceEngine(final String source, final String target) {
60          this.source = source;
61          this.target = target;
62      }
63  
64      /**
65       * @param newSource
66       *            the source to set
67       */
68      public void setSource(String newSource) {
69          this.source = newSource;
70      }
71  
72      /**
73       * @param newTarget
74       *            the target to set
75       */
76      public void setTarget(String newTarget) {
77          this.target = newTarget;
78      }
79  
80      /**
81       * Explore the intersection points between the two texts.
82       * 
83       * @return List of Difference objects or null if no diff available
84       */
85      public List<Difference> generate() {
86          long msEnd = System.currentTimeMillis() + (long) (timeout * 1000);
87          int maxD = (source.length() + target.length()) / 2;
88          List<Set<String>> vMap1 = new ArrayList<Set<String>>();
89          List<Set<String>> vMap2 = new ArrayList<Set<String>>();
90          Map<Integer, Integer> v1 = new HashMap<Integer, Integer>();
91          Map<Integer, Integer> v2 = new HashMap<Integer, Integer>();
92          v1.put(Integer.valueOf(1), Integer.valueOf(0));
93          v2.put(Integer.valueOf(1), Integer.valueOf(0));
94          int x;
95          int y;
96          String footstep; // Used to track overlapping paths.
97          Map<String, Integer> footsteps = new HashMap<String, Integer>();
98          boolean done = false;
99          // If the total number of characters is odd, then the front path will
100         // collide with the reverse path.
101         boolean front = (source.length() + target.length()) % 2 != 0;
102         for (int d = 0; d < maxD; d++) {
103             // Bail out if timeout reached.
104             if (timeout > 0 && System.currentTimeMillis() > msEnd) {
105                 return null;
106             }
107 
108             // Walk the front path one step.
109             vMap1.add(new HashSet<String>()); // Adds at index 'd'.
110             for (int k = -d; k <= d; k += 2) {
111                 Integer kPlus1Key = Integer.valueOf(k + 1);
112                 Integer kPlus1Value = v1.get(kPlus1Key);
113                 Integer kMinus1Key = Integer.valueOf(k - 1);
114                 Integer kMinus1Value = v1.get(kMinus1Key);
115                 if (k == -d || k != d && kMinus1Value.intValue() < kPlus1Value.intValue()) {
116                     x = kPlus1Value.intValue();
117                 } else {
118                     x = kMinus1Value.intValue() + 1;
119                 }
120                 y = x - k;
121                 footstep = x + "," + y;
122                 if (front && (footsteps.containsKey(footstep))) {
123                     done = true;
124                 }
125                 if (!front) {
126                     footsteps.put(footstep, Integer.valueOf(d));
127                 }
128                 while (!done && x < source.length() && y < target.length() && source.charAt(x) == target.charAt(y)) {
129                     x++;
130                     y++;
131                     footstep = x + "," + y;
132                     if (front && footsteps.containsKey(footstep)) {
133                         done = true;
134                     }
135                     if (!front) {
136                         footsteps.put(footstep, Integer.valueOf(d));
137                     }
138                 }
139                 v1.put(Integer.valueOf(k), Integer.valueOf(x));
140                 Set<String> s = vMap1.get(d);
141                 s.add(x + "," + y);
142                 if (done) {
143                     // Front path ran over reverse path.
144                     Integer footstepValue = footsteps.get(footstep);
145                     vMap2 = vMap2.subList(0, footstepValue.intValue() + 1);
146                     List<Difference> a = path1(vMap1, source.substring(0, x), target.substring(0, y));
147                     a.addAll(path2(vMap2, source.substring(x), target.substring(y)));
148                     return a;
149                 }
150             }
151 
152             // Walk the reverse path one step.
153             vMap2.add(new HashSet<String>()); // Adds at index 'd'.
154             for (int k = -d; k <= d; k += 2) {
155                 Integer kPlus1Key = Integer.valueOf(k + 1);
156                 Integer kPlus1Value = v2.get(kPlus1Key);
157                 Integer kMinus1Key = Integer.valueOf(k - 1);
158                 Integer kMinus1Value = v2.get(kMinus1Key);
159                 if (k == -d || k != d && kMinus1Value.intValue() < kPlus1Value.intValue()) {
160                     x = kPlus1Value.intValue();
161                 } else {
162                     x = kMinus1Value.intValue() + 1;
163                 }
164                 y = x - k;
165                 footstep = (source.length() - x) + "," + (target.length() - y);
166                 if (!front && (footsteps.containsKey(footstep))) {
167                     done = true;
168                 }
169                 if (front) {
170                     footsteps.put(footstep, Integer.valueOf(d));
171                 }
172                 while (!done && x < source.length() && y < target.length() && source.charAt(source.length() - x - 1) == target.charAt(target.length() - y - 1)) {
173                     x++;
174                     y++;
175                     footstep = (source.length() - x) + "," + (target.length() - y);
176                     if (!front && (footsteps.containsKey(footstep))) {
177                         done = true;
178                     }
179                     if (front) {
180                         footsteps.put(footstep, Integer.valueOf(d));
181                     }
182                 }
183 
184                 v2.put(Integer.valueOf(k), Integer.valueOf(x));
185                 Set<String> s = vMap2.get(d);
186                 s.add(x + "," + y);
187                 if (done) {
188                     // Reverse path ran over front path.
189                     Integer footstepValue = footsteps.get(footstep);
190                     vMap1 = vMap1.subList(0, footstepValue.intValue() + 1);
191                     List<Difference> a = path1(vMap1, source.substring(0, source.length() - x), target.substring(0, target.length() - y));
192                     a.addAll(path2(vMap2, source.substring(source.length() - x), target.substring(target.length() - y)));
193                     return a;
194                 }
195             }
196         }
197 
198         // Number of diffs equals number of characters, no commonality at all.
199         return null;
200     }
201 
202     /**
203      * Work from the middle back to the start to determine the path.
204      * 
205      * @param vMap
206      *            List of path sets.
207      * @param newSource
208      *            Old string fragment to be diffed
209      * @param newTarget
210      *            New string fragment to be diffed
211      * @return List of Difference objects
212      */
213     protected List<Difference> path1(final List<Set<String>> vMap, final String newSource, final String newTarget) {
214         List<Difference> path = new ArrayList<Difference>();
215         int x = newSource.length();
216         int y = newTarget.length();
217         EditType lastEditType = null;
218         for (int d = vMap.size() - 2; d >= 0; d--) {
219             while (true) {
220                 Set<String> set = vMap.get(d);
221                 if (set.contains((x - 1) + "," + y)) {
222                     x--;
223                     if (EditType.DELETE.equals(lastEditType)) {
224                         Difference firstDiff = path.get(0);
225                         firstDiff.prependText(newSource.charAt(x));
226                     } else {
227                         path.add(0, new Difference(EditType.DELETE, newSource.substring(x, x + 1)));
228                     }
229                     lastEditType = EditType.DELETE;
230                     break;
231                 } else if (set.contains(x + "," + (y - 1))) {
232                     y--;
233                     if (EditType.INSERT.equals(lastEditType)) {
234                         Difference firstDiff = path.get(0);
235                         firstDiff.prependText(newTarget.charAt(y));
236                     } else {
237                         path.add(0, new Difference(EditType.INSERT, newTarget.substring(y, y + 1)));
238                     }
239                     lastEditType = EditType.INSERT;
240                     break;
241                 } else {
242                     x--;
243                     y--;
244                     assert newSource.charAt(x) == newTarget.charAt(y) : "No diagonal.  Can't happen. (path1)";
245                     if (EditType.EQUAL.equals(lastEditType)) {
246                         Difference firstDiff = path.get(0);
247                         firstDiff.prependText(newSource.charAt(x));
248                     } else {
249                         path.add(0, new Difference(EditType.EQUAL, newSource.substring(x, x + 1)));
250                     }
251                     lastEditType = EditType.EQUAL;
252                 }
253             }
254         }
255         return path;
256     }
257 
258     /**
259      * Work from the middle back to the end to determine the path.
260      * 
261      * @param vMap
262      *            List of path sets.
263      * @param newSource
264      *            Old string fragment to be diffed
265      * @param newTarget
266      *            New string fragment to be diffed
267      * @return List of Difference objects
268      */
269     protected List<Difference> path2(final List<Set<String>> vMap, final String newSource, final String newTarget) {
270         List<Difference> path = new ArrayList<Difference>();
271         int x = newSource.length();
272         int y = newTarget.length();
273         EditType lastEditType = null;
274         for (int d = vMap.size() - 2; d >= 0; d--) {
275             while (true) {
276                 Set<String> set = vMap.get(d);
277                 if (set.contains((x - 1) + "," + y)) {
278                     x--;
279                     if (EditType.DELETE.equals(lastEditType)) {
280                         Difference lastDiff = path.get(path.size() - 1);
281                         lastDiff.appendText(newSource.charAt(newSource.length() - x - 1));
282                     } else {
283                         path.add(new Difference(EditType.DELETE, newSource.substring(newSource.length() - x - 1, newSource.length() - x)));
284                     }
285                     lastEditType = EditType.DELETE;
286                     break;
287                 } else if (set.contains(x + "," + (y - 1))) {
288                     y--;
289                     if (EditType.INSERT.equals(lastEditType)) {
290                         Difference lastDiff = path.get(path.size() - 1);
291                         lastDiff.appendText(newTarget.charAt(newTarget.length() - y - 1));
292                     } else {
293                         path.add(new Difference(EditType.INSERT, newTarget.substring(newTarget.length() - y - 1, newTarget.length() - y)));
294                     }
295                     lastEditType = EditType.INSERT;
296                     break;
297                 } else {
298                     x--;
299                     y--;
300                     assert newSource.charAt(newSource.length() - x - 1) == newTarget.charAt(newTarget.length() - y - 1) : "No diagonal.  Can't happen. (path2)";
301 
302                     if (EditType.EQUAL.equals(lastEditType)) {
303                         Difference lastDiff = path.get(path.size() - 1);
304                         lastDiff.appendText(newSource.charAt(newSource.length() - x - 1));
305                     } else {
306                         path.add(new Difference(EditType.EQUAL, newSource.substring(newSource.length() - x - 1, newSource.length() - x)));
307                     }
308                     lastEditType = EditType.EQUAL;
309                 }
310             }
311         }
312         return path;
313     }
314 
315     /**
316      * Set the timeout for the diff operation. The default is 1 second. Use 0
317      * for infinity.
318      * 
319      * @param newTimeout
320      */
321     public static void setTimeout(float newTimeout) {
322         timeout = newTimeout;
323     }
324 
325     /**
326      * Number of seconds to map a diff before giving up. Use 0 for infinity.
327      */
328     private static final float TIMEOUT = 1.0f;
329     private static float timeout = TIMEOUT;
330     /**
331      * The baseline text.
332      */
333     private String source;
334 
335     /**
336      * The changed text.
337      */
338     private String target;
339 }
340