| Commonality.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 or later
5 * as published by the Free Software Foundation. This program is distributed
6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * © CrossWire Bible Society, 2007 - 2016
18 *
19 */
20 package org.crosswire.common.diff;
21
22 /**
23 * A Commonality is shared text at the beginning, middle or end of two strings.
24 *
25 * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
26 * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
27 *
28 * @see gnu.lgpl.License The GNU Lesser General Public License for details.
29 * @author DM Smith
30 */
31 public final class Commonality {
32 /**
33 * This is a utility class, therefore the constructor is private.
34 */
35 private Commonality() {
36 }
37
38 /**
39 * Find the length of a common prefix.
40 *
41 * @param source
42 * First string
43 * @param target
44 * Second string
45 * @return The number of characters common to the start of each string.
46 */
47 public static int prefix(final String source, final String target) {
48 int pointermin = 0;
49 int pointermax = Math.min(source.length(), target.length());
50 int pointermid = pointermax;
51 while (pointermin < pointermid) {
52 if (source.regionMatches(0, target, 0, pointermid)) {
53 pointermin = pointermid;
54 } else {
55 pointermax = pointermid;
56 }
57 pointermid = (pointermax - pointermin) / 2 + pointermin;
58 }
59
60 return pointermid;
61 }
62
63 /**
64 * Find the length of a common suffix.
65 *
66 * @param source
67 * First string
68 * @param target
69 * Second string
70 * @return The number of characters common to the end of each string.
71 */
72 public static int suffix(final String source, final String target) {
73 int pointermin = 0;
74 int pointermax = Math.min(source.length(), target.length());
75 int pointermid = pointermax;
76 while (pointermin < pointermid) {
77 if (source.regionMatches(source.length() - pointermid, target, target.length() - pointermid, pointermid)) {
78 pointermin = pointermid;
79 } else {
80 pointermax = pointermid;
81 }
82 pointermid = (pointermax - pointermin) / 2 + pointermin;
83 }
84
85 return pointermid;
86 }
87
88 /**
89 * Do the two texts share a substring which is at least half the length of
90 * the longer text?
91 *
92 * @param source
93 * Baseline string
94 * @param target
95 * Changed string
96 * @return a CommonMiddle Or null if there was no match.
97 */
98 public static CommonMiddle halfMatch(final String source, final String target) {
99 int sourceLength = source.length();
100 int targetLength = target.length();
101 String longText = sourceLength > targetLength ? source : target;
102 String shortText = sourceLength > targetLength ? target : source;
103 int longTextLength = Math.max(sourceLength, targetLength);
104 if (longTextLength < 10 || shortText.length() < 1) {
105 return null; // Pointless.
106 }
107
108 // First check if the second quarter is the seed for a half-match.
109 CommonMiddle hm1 = halfMatch(longText, shortText, ceil(longTextLength, 4));
110 // Check again based on the third quarter.
111 CommonMiddle hm2 = halfMatch(longText, shortText, ceil(longTextLength, 2));
112 CommonMiddle hm = null;
113 if (hm1 == null && hm2 == null) {
114 return null;
115 } else if (hm2 == null) {
116 hm = hm1;
117 } else if (hm1 == null) {
118 hm = hm2;
119 } else {
120 // Both matched. Select the longest.
121 hm = hm1.getCommonality().length() > hm2.getCommonality().length() ? hm1 : hm2;
122 }
123
124 // A half-match was found, sort out the return data.
125 if (sourceLength > targetLength) {
126 return hm;
127 }
128
129 // While hm cannot be null our QA checker can't figure that out.
130 if (hm != null) {
131 return new CommonMiddle(hm.getTargetPrefix(), hm.getTargetSuffix(), hm.getSourcePrefix(), hm.getSourceSuffix(), hm.getCommonality());
132 }
133 return null;
134 }
135
136 /**
137 * Does a substring of shortText exist within longText such that the
138 * substring is at least half the length of longText?
139 *
140 * @param longText
141 * Longer string
142 * @param shortText
143 * Shorter string
144 * @param startIndex
145 * Start index of quarter length substring within longText
146 * @return Five element String array, containing the prefix of longText, the
147 * suffix of longText, the prefix of shortText, the suffix of
148 * shortText and the common middle. Or null if there was no match.
149 */
150 private static CommonMiddle halfMatch(final String longText, final String shortText, final int startIndex) {
151 // Start with a 1/4 length substring at position i as a seed.
152 String seed = longText.substring(startIndex, startIndex + (longText.length() / 4));
153 int j = -1;
154 String common = "";
155 String longTextPrefix = "";
156 String longTextSuffix = "";
157 String shortTextPrefix = "";
158 String shortTextSuffix = "";
159 while ((j = shortText.indexOf(seed, j + 1)) != -1) {
160 int prefixLength = Commonality.prefix(longText.substring(startIndex), shortText.substring(j));
161 int suffixLength = Commonality.suffix(longText.substring(0, startIndex), shortText.substring(0, j));
162 if (common.length() < (prefixLength + suffixLength)) {
163 common = shortText.substring(j - suffixLength, j) + shortText.substring(j, j + prefixLength);
164 longTextPrefix = longText.substring(0, startIndex - suffixLength);
165 longTextSuffix = longText.substring(startIndex + prefixLength);
166 shortTextPrefix = shortText.substring(0, j - suffixLength);
167 shortTextSuffix = shortText.substring(j + prefixLength);
168 }
169 }
170
171 if (common.length() >= longText.length() / 2) {
172 return new CommonMiddle(longTextPrefix, longTextSuffix, shortTextPrefix, shortTextSuffix, common);
173 }
174
175 return null;
176 }
177
178 private static int ceil(int number, int divisor) {
179 assert divisor > 0;
180 int result = number / divisor + ((number % divisor) > 0 ? 1 : 0);
181 // assert result == (int) Math.ceil(((double) number) / ((double) divisor));
182 return result;
183 }
184
185 }
186