1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2007 - 2016
18   *
19   */
20  package org.crosswire.common.diff;
21  
22  import java.util.ArrayList;
23  import java.util.Iterator;
24  import java.util.List;
25  import java.util.regex.Matcher;
26  import java.util.regex.Pattern;
27  
28  /**
29   * A PatchEntry is a single "instruction" in a Patch, consisting of a interval
30   * over which differences are applied and the differences that should be
31   * applied.
32   * 
33   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
34   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
35   * 
36   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
37   * @author DM Smith
38   */
39  public class PatchEntry implements Iterable<Difference> {
40      // Constructor for a patch object.
41      public PatchEntry() {
42          this.diffs = new ArrayList<Difference>();
43          this.sourceStart = 0;
44          this.targetStart = 0;
45          this.sourceLength = 0;
46          this.targetLength = 0;
47      }
48  
49      // Constructor for a patch object.
50      public PatchEntry(String patchText) {
51          this();
52          fromText(patchText);
53      }
54  
55      /**
56       * @return the sourceStart
57       */
58      public int getSourceStart() {
59          return sourceStart;
60      }
61  
62      /**
63       * @param start
64       *            the sourceStart to set
65       */
66      public void setSourceStart(int start) {
67          this.sourceStart = start;
68      }
69  
70      /**
71       * @param adjustment
72       *            the adjustment to sourceStart
73       */
74      public void adjustSourceStart(int adjustment) {
75          this.sourceStart += adjustment;
76      }
77  
78      /**
79       * @return the targetStart
80       */
81      public int getTargetStart() {
82          return targetStart;
83      }
84  
85      /**
86       * @param start
87       *            the targetStart to set
88       */
89      public void setTargetStart(int start) {
90          this.targetStart = start;
91      }
92  
93      /**
94       * @param adjustment
95       *            the adjustment to targetStart
96       */
97      public void adjustTargetStart(int adjustment) {
98          this.targetStart += adjustment;
99      }
100 
101     /**
102      * @return the sourceLength
103      */
104     public int getSourceLength() {
105         return sourceLength;
106     }
107 
108     /**
109      * @param length
110      *            the sourceLength to set
111      */
112     public void setSourceLength(int length) {
113         this.sourceLength = length;
114     }
115 
116     /**
117      * @param adjustment
118      *            the adjustment to sourceLength
119      */
120     public void adjustSourceLength(int adjustment) {
121         this.sourceLength += adjustment;
122     }
123 
124     /**
125      * @return the targetLength
126      */
127     public int getTargetLength() {
128         return targetLength;
129     }
130 
131     /**
132      * @param length
133      *            the targetLength to set
134      */
135     public void setTargetLength(int length) {
136         this.targetLength = length;
137     }
138 
139     /**
140      * @param adjustment
141      *            the adjustment to targetLength
142      */
143     public void adjustTargetLength(int adjustment) {
144         this.targetLength += adjustment;
145     }
146 
147     // Emulate GNU diff's format.
148     // Header: @@ -382,8 +481,9 @@
149     // Indices are printed as 1-based, not 0-based.
150     @Override
151     public String toString() {
152         StringBuilder txt = new StringBuilder();
153         txt.append("@@ -");
154         txt.append(getCoordinates(sourceStart, sourceLength));
155         txt.append(" +");
156         txt.append(getCoordinates(targetStart, targetLength));
157         txt.append(" @@\n");
158 
159         for (Difference diff : diffs) {
160             txt.append(diff.getEditType().getSymbol());
161             txt.append(encode(diff.getText()));
162             txt.append('\n');
163         }
164         // String result = txt.toString();
165         // // Replicate the JavaScript encodeURI() function (not including %20)
166         // result = result.replace("%3D", "=").replace("%3B",
167         // ";").replace("%27", "'")
168         // .replace("%2C", ",").replace("%2F", "/").replace("%7E", "~")
169         // .replace("%21", "!").replace("%40", "@").replace("%23", "#")
170         // .replace("%24", "$").replace("%26", "&").replace("%28", "(")
171         // .replace("%29", ")").replace("%2B", "+").replace("%3A", ":")
172         // .replace("%3F", "?");
173         return txt.toString();
174     }
175 
176     /**
177      * Parse a textual representation of a patch entry and populate this patch
178      * entry.
179      * 
180      * @param input
181      *            Text representation of this patch entry
182      * @return this patch entry
183      */
184     public PatchEntry fromText(String input) {
185         diffs.clear();
186         String[] text = newlinePattern.split(input);
187         char sign = '\0';
188         String line = "";
189 
190         Matcher matcher = patchPattern.matcher(text[0]);
191         matcher.matches();
192         assert matcher.groupCount() == 4 : "Invalid patch string:\n" + text[0];
193         // m = text[0].match(/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/);
194 
195         sourceStart = Integer.parseInt(matcher.group(1));
196 
197         if (matcher.group(2).length() == 0) {
198             sourceStart--;
199             sourceLength = 1;
200         } else if (matcher.group(2).charAt(0) == '0') {
201             setSourceLength(0);
202         } else {
203             sourceStart--;
204             sourceLength = Integer.parseInt(matcher.group(2));
205         }
206 
207         targetStart = Integer.parseInt(matcher.group(3));
208         if (matcher.group(4).length() == 0) {
209             targetStart--;
210             targetLength = 1;
211         } else if (matcher.group(4).charAt(0) == '0') {
212             targetLength = 0;
213         } else {
214             targetStart--;
215             targetLength = Integer.parseInt(matcher.group(4));
216         }
217 
218         for (int lineCount = 1; lineCount < text.length; lineCount++) {
219             line = text[lineCount];
220             if (line.length() > 0) {
221                 sign = line.charAt(0);
222                 line = decode(line.substring(1));
223                 diffs.add(new Difference(EditType.fromSymbol(sign), line));
224             }
225         }
226         return this;
227     }
228 
229     // Compute and return the source text (all equalities and deletions).
230     public String getSourceText() {
231         StringBuilder txt = new StringBuilder();
232         for (Difference diff : diffs) {
233             if (!EditType.INSERT.equals(diff.getEditType())) {
234                 txt.append(diff.getText());
235             }
236         }
237         return txt.toString();
238     }
239 
240     // Compute and return the destination text (all equalities and insertions).
241     public String getTargetText() {
242         StringBuilder txt = new StringBuilder();
243         for (Difference diff : diffs) {
244             if (!EditType.DELETE.equals(diff.getEditType())) {
245                 txt.append(diff.getText());
246             }
247         }
248         return txt.toString();
249     }
250 
251     public void addContext(String text) {
252         int maxPatternLength = new Match().maxPatternLength();
253         int padding = 0;
254         String pattern = text.substring(targetStart, targetStart + sourceLength);
255         int textLength = text.length();
256 
257         // Increase the context until we're unique
258         // (but don't let the pattern expand beyond the maximum length our
259         // Locator can handle).
260         int end = maxPatternLength - PatchEntry.margin - PatchEntry.margin;
261         while (text.indexOf(pattern) != text.lastIndexOf(pattern) && pattern.length() < end) {
262             padding += PatchEntry.margin;
263             pattern = text.substring(Math.max(0, targetStart - padding), Math.min(textLength, targetStart + sourceLength + padding));
264         }
265 
266         // Add one chunk for good luck.
267         padding += PatchEntry.margin;
268 
269         // Add the prefix.
270         String prefix = text.substring(Math.max(0, targetStart - padding), targetStart);
271         int prefixLength = prefix.length();
272         if (prefixLength > 0) {
273             diffs.add(0, new Difference(EditType.EQUAL, prefix));
274         }
275 
276         // Add the suffix
277         String suffix = text.substring(targetStart + sourceLength, Math.min(textLength, targetStart + sourceLength + padding));
278         int suffixLength = suffix.length();
279         if (suffixLength > 0) {
280             diffs.add(new Difference(EditType.EQUAL, suffix));
281         }
282 
283         // Roll back the start points.
284         sourceStart -= prefixLength;
285         targetStart -= prefixLength;
286 
287         // Extend the lengths.
288         sourceLength += prefixLength + suffixLength;
289         targetLength += prefixLength + suffixLength;
290     }
291 
292     public void addDifference(Difference diff) {
293         diffs.add(diff);
294     }
295 
296     public int getDifferenceCount() {
297         return diffs.size();
298     }
299 
300     public boolean hasDifferences() {
301         return !diffs.isEmpty();
302     }
303 
304     public Iterator<Difference> iterator() {
305         return diffs.iterator();
306     }
307 
308     public Difference getFirstDifference() {
309         if (diffs.isEmpty()) {
310             return null;
311         }
312         return diffs.get(0);
313     }
314 
315     public Difference removeFirstDifference() {
316         if (diffs.isEmpty()) {
317             return null;
318         }
319         return diffs.remove(0);
320     }
321 
322     public Difference getLastDifference() {
323         if (diffs.isEmpty()) {
324             return null;
325         }
326         return diffs.get(diffs.size() - 1);
327     }
328 
329     protected void setDifferences(List<Difference> newDiffs) {
330         diffs = newDiffs;
331     }
332 
333     /**
334      * @param newMargin
335      *            the margin to set
336      */
337     public static void setMargin(int newMargin) {
338         PatchEntry.margin = newMargin;
339     }
340 
341     /**
342      * @return the margin
343      */
344     public static int getMargin() {
345         return margin;
346     }
347 
348     private String getCoordinates(int start, int length) {
349         StringBuilder buf = new StringBuilder();
350 
351         if (length == 0) {
352             buf.append(start);
353             buf.append(",0");
354         } else if (length == 1) {
355             buf.append(sourceStart + 1);
356         } else {
357             buf.append(start + 1);
358             buf.append(',');
359             buf.append(length);
360         }
361 
362         return buf.toString();
363     }
364 
365     /**
366      * This algorithm allows for \n to be included in a difference. Thus it
367      * needs to be escaped. We will use URL encoding of \n. But this makes % a
368      * meta-character, thus it needs to be encoded too.
369      * 
370      * @param str
371      *            the unencoded string
372      * @return the encoded string
373      */
374     private String encode(String str) {
375         int strlen = str.length();
376         StringBuilder buf = new StringBuilder(2 * strlen);
377         for (int i = 0; i < strlen; i++) {
378             char c = str.charAt(i);
379             switch (c) {
380             case '%':
381                 buf.append("%25");
382                 break;
383             case '\n':
384                 buf.append("%0A");
385                 break;
386             default:
387                 buf.append(c);
388             }
389         }
390         return buf.toString();
391     }
392 
393     /**
394      * Undo encoding
395      * 
396      * @param str
397      *            the encoded string
398      * @return the unencoded string
399      */
400     private String decode(String str) {
401         int strlen = str.length();
402         StringBuilder buf = new StringBuilder(2 * strlen);
403         int i = 0;
404         for (i = 0; i < strlen; i++) {
405             char c = str.charAt(i);
406             if (c == '%') {
407                 if ("%0A".equals(str.substring(i, i + 3))) {
408                     buf.append('\n');
409                 } else { // if ("%25".equals(str.substring(i, i + 3))
410                     buf.append('%');
411                 }
412                 i += 2;
413             } else {
414                 buf.append(c);
415             }
416         }
417         return buf.toString();
418     }
419 
420     /**
421      * Chunk size for context length.
422      */
423     private static final int MARGIN = 4;
424     private static int margin = MARGIN;
425     private static Pattern newlinePattern = Pattern.compile("\n");
426     private static Pattern patchPattern = Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
427 
428     private List<Difference> diffs;
429     private int sourceStart;
430     private int targetStart;
431     private int sourceLength;
432     private int targetLength;
433 }
434