1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2007 - 2016
18   *
19   */
20  package org.crosswire.common.diff;
21  
22  /**
23   * A Commonality is shared text at the beginning, middle or end of two strings.
24   * 
25   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
26   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
27   * 
28   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
29   * @author DM Smith
30   */
31  public final class Commonality {
32      /**
33       * This is a utility class, therefore the constructor is private.
34       */
35      private Commonality() {
36      }
37  
38      /**
39       * Find the length of a common prefix.
40       * 
41       * @param source
42       *            First string
43       * @param target
44       *            Second string
45       * @return The number of characters common to the start of each string.
46       */
47      public static int prefix(final String source, final String target) {
48          int pointermin = 0;
49          int pointermax = Math.min(source.length(), target.length());
50          int pointermid = pointermax;
51          while (pointermin < pointermid) {
52              if (source.regionMatches(0, target, 0, pointermid)) {
53                  pointermin = pointermid;
54              } else {
55                  pointermax = pointermid;
56              }
57              pointermid = (pointermax - pointermin) / 2 + pointermin;
58          }
59  
60          return pointermid;
61      }
62  
63      /**
64       * Find the length of a common suffix.
65       * 
66       * @param source
67       *            First string
68       * @param target
69       *            Second string
70       * @return The number of characters common to the end of each string.
71       */
72      public static int suffix(final String source, final String target) {
73          int pointermin = 0;
74          int pointermax = Math.min(source.length(), target.length());
75          int pointermid = pointermax;
76          while (pointermin < pointermid) {
77              if (source.regionMatches(source.length() - pointermid, target, target.length() - pointermid, pointermid)) {
78                  pointermin = pointermid;
79              } else {
80                  pointermax = pointermid;
81              }
82              pointermid = (pointermax - pointermin) / 2 + pointermin;
83          }
84  
85          return pointermid;
86      }
87  
88      /**
89       * Do the two texts share a substring which is at least half the length of
90       * the longer text?
91       * 
92       * @param source
93       *            Baseline string
94       * @param target
95       *            Changed string
96       * @return a CommonMiddle Or null if there was no match.
97       */
98      public static CommonMiddle halfMatch(final String source, final String target) {
99          int sourceLength = source.length();
100         int targetLength = target.length();
101         String longText = sourceLength > targetLength ? source : target;
102         String shortText = sourceLength > targetLength ? target : source;
103         int longTextLength = Math.max(sourceLength, targetLength);
104         if (longTextLength < 10 || shortText.length() < 1) {
105             return null; // Pointless.
106         }
107 
108         // First check if the second quarter is the seed for a half-match.
109         CommonMiddle hm1 = halfMatch(longText, shortText, ceil(longTextLength, 4));
110         // Check again based on the third quarter.
111         CommonMiddle hm2 = halfMatch(longText, shortText, ceil(longTextLength, 2));
112         CommonMiddle hm = null;
113         if (hm1 == null && hm2 == null) {
114             return null;
115         } else if (hm2 == null) {
116             hm = hm1;
117         } else if (hm1 == null) {
118             hm = hm2;
119         } else {
120             // Both matched. Select the longest.
121             hm = hm1.getCommonality().length() > hm2.getCommonality().length() ? hm1 : hm2;
122         }
123 
124         // A half-match was found, sort out the return data.
125         if (sourceLength > targetLength) {
126             return hm;
127         }
128 
129         // While hm cannot be null our QA checker can't figure that out.
130         if (hm != null) {
131             return new CommonMiddle(hm.getTargetPrefix(), hm.getTargetSuffix(), hm.getSourcePrefix(), hm.getSourceSuffix(), hm.getCommonality());
132         }
133         return null;
134     }
135 
136     /**
137      * Does a substring of shortText exist within longText such that the
138      * substring is at least half the length of longText?
139      * 
140      * @param longText
141      *            Longer string
142      * @param shortText
143      *            Shorter string
144      * @param startIndex
145      *            Start index of quarter length substring within longText
146      * @return Five element String array, containing the prefix of longText, the
147      *         suffix of longText, the prefix of shortText, the suffix of
148      *         shortText and the common middle. Or null if there was no match.
149      */
150     private static CommonMiddle halfMatch(final String longText, final String shortText, final int startIndex) {
151         // Start with a 1/4 length substring at position i as a seed.
152         String seed = longText.substring(startIndex, startIndex + (longText.length() / 4));
153         int j = -1;
154         String common = "";
155         String longTextPrefix = "";
156         String longTextSuffix = "";
157         String shortTextPrefix = "";
158         String shortTextSuffix = "";
159         while ((j = shortText.indexOf(seed, j + 1)) != -1) {
160             int prefixLength = Commonality.prefix(longText.substring(startIndex), shortText.substring(j));
161             int suffixLength = Commonality.suffix(longText.substring(0, startIndex), shortText.substring(0, j));
162             if (common.length() < (prefixLength + suffixLength)) {
163                 common = shortText.substring(j - suffixLength, j) + shortText.substring(j, j + prefixLength);
164                 longTextPrefix = longText.substring(0, startIndex - suffixLength);
165                 longTextSuffix = longText.substring(startIndex + prefixLength);
166                 shortTextPrefix = shortText.substring(0, j - suffixLength);
167                 shortTextSuffix = shortText.substring(j + prefixLength);
168             }
169         }
170 
171         if (common.length() >= longText.length() / 2) {
172             return new CommonMiddle(longTextPrefix, longTextSuffix, shortTextPrefix, shortTextSuffix, common);
173         }
174 
175         return null;
176     }
177 
178     private static int ceil(int number, int divisor) {
179         assert divisor > 0;
180         int result = number / divisor + ((number % divisor) > 0 ? 1 : 0);
181         // assert result == (int) Math.ceil(((double) number) / ((double) divisor));
182         return result;
183     }
184 
185 }
186