Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
Commonality |
|
| 5.0;5 |
1 | /** | |
2 | * Distribution License: | |
3 | * JSword is free software; you can redistribute it and/or modify it under | |
4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
5 | * as published by the Free Software Foundation. This program is distributed | |
6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
8 | * See the GNU Lesser General Public License for more details. | |
9 | * | |
10 | * The License is available on the internet at: | |
11 | * http://www.gnu.org/copyleft/lgpl.html | |
12 | * or by writing to: | |
13 | * Free Software Foundation, Inc. | |
14 | * 59 Temple Place - Suite 330 | |
15 | * Boston, MA 02111-1307, USA | |
16 | * | |
17 | * © CrossWire Bible Society, 2007 - 2016 | |
18 | * | |
19 | */ | |
20 | package org.crosswire.common.diff; | |
21 | ||
22 | /** | |
23 | * A Commonality is shared text at the beginning, middle or end of two strings. | |
24 | * | |
25 | * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br> | |
26 | * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a> | |
27 | * | |
28 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
29 | * @author DM Smith | |
30 | */ | |
31 | 0 | public final class Commonality { |
32 | /** | |
33 | * This is a utility class, therefore the constructor is private. | |
34 | */ | |
35 | 0 | private Commonality() { |
36 | 0 | } |
37 | ||
38 | /** | |
39 | * Find the length of a common prefix. | |
40 | * | |
41 | * @param source | |
42 | * First string | |
43 | * @param target | |
44 | * Second string | |
45 | * @return The number of characters common to the start of each string. | |
46 | */ | |
47 | public static int prefix(final String source, final String target) { | |
48 | 0 | int pointermin = 0; |
49 | 0 | int pointermax = Math.min(source.length(), target.length()); |
50 | 0 | int pointermid = pointermax; |
51 | 0 | while (pointermin < pointermid) { |
52 | 0 | if (source.regionMatches(0, target, 0, pointermid)) { |
53 | 0 | pointermin = pointermid; |
54 | } else { | |
55 | 0 | pointermax = pointermid; |
56 | } | |
57 | 0 | pointermid = (pointermax - pointermin) / 2 + pointermin; |
58 | } | |
59 | ||
60 | 0 | return pointermid; |
61 | } | |
62 | ||
63 | /** | |
64 | * Find the length of a common suffix. | |
65 | * | |
66 | * @param source | |
67 | * First string | |
68 | * @param target | |
69 | * Second string | |
70 | * @return The number of characters common to the end of each string. | |
71 | */ | |
72 | public static int suffix(final String source, final String target) { | |
73 | 0 | int pointermin = 0; |
74 | 0 | int pointermax = Math.min(source.length(), target.length()); |
75 | 0 | int pointermid = pointermax; |
76 | 0 | while (pointermin < pointermid) { |
77 | 0 | if (source.regionMatches(source.length() - pointermid, target, target.length() - pointermid, pointermid)) { |
78 | 0 | pointermin = pointermid; |
79 | } else { | |
80 | 0 | pointermax = pointermid; |
81 | } | |
82 | 0 | pointermid = (pointermax - pointermin) / 2 + pointermin; |
83 | } | |
84 | ||
85 | 0 | return pointermid; |
86 | } | |
87 | ||
88 | /** | |
89 | * Do the two texts share a substring which is at least half the length of | |
90 | * the longer text? | |
91 | * | |
92 | * @param source | |
93 | * Baseline string | |
94 | * @param target | |
95 | * Changed string | |
96 | * @return a CommonMiddle Or null if there was no match. | |
97 | */ | |
98 | public static CommonMiddle halfMatch(final String source, final String target) { | |
99 | 0 | int sourceLength = source.length(); |
100 | 0 | int targetLength = target.length(); |
101 | 0 | String longText = sourceLength > targetLength ? source : target; |
102 | 0 | String shortText = sourceLength > targetLength ? target : source; |
103 | 0 | int longTextLength = Math.max(sourceLength, targetLength); |
104 | 0 | if (longTextLength < 10 || shortText.length() < 1) { |
105 | 0 | return null; // Pointless. |
106 | } | |
107 | ||
108 | // First check if the second quarter is the seed for a half-match. | |
109 | 0 | CommonMiddle hm1 = halfMatch(longText, shortText, ceil(longTextLength, 4)); |
110 | // Check again based on the third quarter. | |
111 | 0 | CommonMiddle hm2 = halfMatch(longText, shortText, ceil(longTextLength, 2)); |
112 | 0 | CommonMiddle hm = null; |
113 | 0 | if (hm1 == null && hm2 == null) { |
114 | 0 | return null; |
115 | 0 | } else if (hm2 == null) { |
116 | 0 | hm = hm1; |
117 | 0 | } else if (hm1 == null) { |
118 | 0 | hm = hm2; |
119 | } else { | |
120 | // Both matched. Select the longest. | |
121 | 0 | hm = hm1.getCommonality().length() > hm2.getCommonality().length() ? hm1 : hm2; |
122 | } | |
123 | ||
124 | // A half-match was found, sort out the return data. | |
125 | 0 | if (sourceLength > targetLength) { |
126 | 0 | return hm; |
127 | } | |
128 | ||
129 | // While hm cannot be null our QA checker can't figure that out. | |
130 | 0 | if (hm != null) { |
131 | 0 | return new CommonMiddle(hm.getTargetPrefix(), hm.getTargetSuffix(), hm.getSourcePrefix(), hm.getSourceSuffix(), hm.getCommonality()); |
132 | } | |
133 | 0 | return null; |
134 | } | |
135 | ||
136 | /** | |
137 | * Does a substring of shortText exist within longText such that the | |
138 | * substring is at least half the length of longText? | |
139 | * | |
140 | * @param longText | |
141 | * Longer string | |
142 | * @param shortText | |
143 | * Shorter string | |
144 | * @param startIndex | |
145 | * Start index of quarter length substring within longText | |
146 | * @return Five element String array, containing the prefix of longText, the | |
147 | * suffix of longText, the prefix of shortText, the suffix of | |
148 | * shortText and the common middle. Or null if there was no match. | |
149 | */ | |
150 | private static CommonMiddle halfMatch(final String longText, final String shortText, final int startIndex) { | |
151 | // Start with a 1/4 length substring at position i as a seed. | |
152 | 0 | String seed = longText.substring(startIndex, startIndex + (longText.length() / 4)); |
153 | 0 | int j = -1; |
154 | 0 | String common = ""; |
155 | 0 | String longTextPrefix = ""; |
156 | 0 | String longTextSuffix = ""; |
157 | 0 | String shortTextPrefix = ""; |
158 | 0 | String shortTextSuffix = ""; |
159 | 0 | while ((j = shortText.indexOf(seed, j + 1)) != -1) { |
160 | 0 | int prefixLength = Commonality.prefix(longText.substring(startIndex), shortText.substring(j)); |
161 | 0 | int suffixLength = Commonality.suffix(longText.substring(0, startIndex), shortText.substring(0, j)); |
162 | 0 | if (common.length() < (prefixLength + suffixLength)) { |
163 | 0 | common = shortText.substring(j - suffixLength, j) + shortText.substring(j, j + prefixLength); |
164 | 0 | longTextPrefix = longText.substring(0, startIndex - suffixLength); |
165 | 0 | longTextSuffix = longText.substring(startIndex + prefixLength); |
166 | 0 | shortTextPrefix = shortText.substring(0, j - suffixLength); |
167 | 0 | shortTextSuffix = shortText.substring(j + prefixLength); |
168 | } | |
169 | 0 | } |
170 | ||
171 | 0 | if (common.length() >= longText.length() / 2) { |
172 | 0 | return new CommonMiddle(longTextPrefix, longTextSuffix, shortTextPrefix, shortTextSuffix, common); |
173 | } | |
174 | ||
175 | 0 | return null; |
176 | } | |
177 | ||
178 | private static int ceil(int number, int divisor) { | |
179 | 0 | assert divisor > 0; |
180 | 0 | int result = number / divisor + ((number % divisor) > 0 ? 1 : 0); |
181 | // assert result == (int) Math.ceil(((double) number) / ((double) divisor)); | |
182 | 0 | return result; |
183 | } | |
184 | ||
185 | } |