| Commonality.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2007
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: Commonality.java 2196 2012-01-03 13:53:37Z dmsmith $
21 */
22 package org.crosswire.common.diff;
23
24 /**
25 * A Commonality is shared text at the beginning, middle or end of two strings.
26 *
27 * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
28 * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
29 *
30 * @see gnu.lgpl.License for license details.<br>
31 * The copyright to this program is held by it's authors.
32 * @author DM Smith [dmsmith555 at yahoo dot com]
33 */
34 public class Commonality {
35 /**
36 * This is a utility class, therefore the constructor is private.
37 */
38 private Commonality() {
39 }
40
41 /**
42 * Find the length of a common prefix.
43 *
44 * @param source
45 * First string
46 * @param target
47 * Second string
48 * @return The number of characters common to the start of each string.
49 */
50 public static int prefix(final String source, final String target) {
51 int pointermin = 0;
52 int pointermax = Math.min(source.length(), target.length());
53 int pointermid = pointermax;
54 while (pointermin < pointermid) {
55 if (source.regionMatches(0, target, 0, pointermid)) {
56 pointermin = pointermid;
57 } else {
58 pointermax = pointermid;
59 }
60 pointermid = (pointermax - pointermin) / 2 + pointermin;
61 }
62
63 return pointermid;
64 }
65
66 /**
67 * Find the length of a common suffix.
68 *
69 * @param source
70 * First string
71 * @param target
72 * Second string
73 * @return The number of characters common to the end of each string.
74 */
75 public static int suffix(final String source, final String target) {
76 int pointermin = 0;
77 int pointermax = Math.min(source.length(), target.length());
78 int pointermid = pointermax;
79 while (pointermin < pointermid) {
80 if (source.regionMatches(source.length() - pointermid, target, target.length() - pointermid, pointermid)) {
81 pointermin = pointermid;
82 } else {
83 pointermax = pointermid;
84 }
85 pointermid = (pointermax - pointermin) / 2 + pointermin;
86 }
87
88 return pointermid;
89 }
90
91 /**
92 * Do the two texts share a substring which is at least half the length of
93 * the longer text?
94 *
95 * @param source
96 * Baseline string
97 * @param target
98 * Changed string
99 * @return a CommonMiddle Or null if there was no match.
100 */
101 public static CommonMiddle halfMatch(final String source, final String target) {
102 int sourceLength = source.length();
103 int targetLength = target.length();
104 String longText = sourceLength > targetLength ? source : target;
105 String shortText = sourceLength > targetLength ? target : source;
106 int longTextLength = Math.max(sourceLength, targetLength);
107 if (longTextLength < 10 || shortText.length() < 1) {
108 return null; // Pointless.
109 }
110
111 // First check if the second quarter is the seed for a half-match.
112 CommonMiddle hm1 = halfMatch(longText, shortText, ceil(longTextLength, 4));
113 // Check again based on the third quarter.
114 CommonMiddle hm2 = halfMatch(longText, shortText, ceil(longTextLength, 2));
115 CommonMiddle hm = null;
116 if (hm1 == null && hm2 == null) {
117 return null;
118 } else if (hm2 == null) {
119 hm = hm1;
120 } else if (hm1 == null) {
121 hm = hm2;
122 } else {
123 // Both matched. Select the longest.
124 hm = hm1.getCommonality().length() > hm2.getCommonality().length() ? hm1 : hm2;
125 }
126
127 // A half-match was found, sort out the return data.
128 if (sourceLength > targetLength) {
129 return hm;
130 }
131
132 // While hm cannot be null our QA checker can't figure that out.
133 if (hm != null) {
134 return new CommonMiddle(hm.getTargetPrefix(), hm.getTargetSuffix(), hm.getSourcePrefix(), hm.getSourceSuffix(), hm.getCommonality());
135 }
136 return null;
137 }
138
139 private static int ceil(int number, int divisor) {
140 assert divisor > 0;
141 int result = number / divisor + ((number % divisor) > 0 ? 1 : 0);
142 // assert result == (int) Math.ceil(((double) number) / ((double)
143 // divisor));
144 return result;
145 }
146
147 /**
148 * Does a substring of shortText exist within longText such that the
149 * substring is at least half the length of longText?
150 *
151 * @param longText
152 * Longer string
153 * @param shortText
154 * Shorter string
155 * @param startIndex
156 * Start index of quarter length substring within longText
157 * @return Five element String array, containing the prefix of longText, the
158 * suffix of longText, the prefix of shortText, the suffix of
159 * shortText and the common middle. Or null if there was no match.
160 */
161 private static CommonMiddle halfMatch(final String longText, final String shortText, final int startIndex) {
162 // Start with a 1/4 length substring at position i as a seed.
163 String seed = longText.substring(startIndex, startIndex + (longText.length() / 4));
164 int j = -1;
165 String common = "";
166 String longTextPrefix = "";
167 String longTextSuffix = "";
168 String shortTextPrefix = "";
169 String shortTextSuffix = "";
170 while ((j = shortText.indexOf(seed, j + 1)) != -1) {
171 int prefixLength = Commonality.prefix(longText.substring(startIndex), shortText.substring(j));
172 int suffixLength = Commonality.suffix(longText.substring(0, startIndex), shortText.substring(0, j));
173 if (common.length() < (prefixLength + suffixLength)) {
174 common = shortText.substring(j - suffixLength, j) + shortText.substring(j, j + prefixLength);
175 longTextPrefix = longText.substring(0, startIndex - suffixLength);
176 longTextSuffix = longText.substring(startIndex + prefixLength);
177 shortTextPrefix = shortText.substring(0, j - suffixLength);
178 shortTextSuffix = shortText.substring(j + prefixLength);
179 }
180 }
181
182 if (common.length() >= longText.length() / 2) {
183 return new CommonMiddle(longTextPrefix, longTextSuffix, shortTextPrefix, shortTextSuffix, common);
184 }
185
186 return null;
187 }
188
189 }
190