1
20 package org.crosswire.common.diff;
21
22 import java.util.HashMap;
23 import java.util.Map;
24
25
35 public class Bitap implements Locator {
36
46 public Bitap(String text, String pattern, int loc) {
47 this.text = text;
48 this.pattern = pattern;
49 this.loc = loc;
50 alphabet = new HashMap<Character, Integer>();
51 }
52
53
58 public int maxPatternLength() {
59 return MAXBITS;
60 }
61
62
67 public int locate() {
68 alphabet();
70
71 scoreTextLength = Math.max(text.length(), Bitap.minLength);
73 scoreTextLength = Math.min(scoreTextLength, Bitap.maxLength);
74
75 double scoreThreshold = Bitap.threshold;
77
78 int bestLoc = text.indexOf(pattern, loc);
80 if (bestLoc != -1) {
81 scoreThreshold = Math.min(bitapScore(0, bestLoc), scoreThreshold);
82 }
83
84 bestLoc = text.lastIndexOf(pattern, loc + pattern.length());
86 if (bestLoc != -1) {
87 scoreThreshold = Math.min(bitapScore(0, bestLoc), scoreThreshold);
88 }
89
90 int matchmask = (int) Math.pow(2, pattern.length() - 1);
92
93 bestLoc = -1;
94
95 int binMin;
96 int binMid;
97 int binMax = Math.max(loc + loc, text.length());
98 int[] lastrd = new int[0];
99 for (int d = 0; d < pattern.length(); d++) {
100 int[] rd = new int[text.length()];
103
104 binMin = loc;
107 binMid = binMax;
108 while (binMin < binMid) {
109 if (bitapScore(d, binMid) < scoreThreshold) {
110 binMin = binMid;
111 } else {
112 binMax = binMid;
113 }
114 binMid = (binMax - binMin) / 2 + binMin;
115 }
116
117 binMax = binMid; int start = Math.max(0, loc - (binMid - loc) - 1);
120 int finish = Math.min(text.length() - 1, pattern.length() + binMid);
121
122 if (text.charAt(finish) == pattern.charAt(pattern.length() - 1)) {
123 rd[finish] = (int) Math.pow(2, d + 1) - 1;
124 } else {
125 rd[finish] = (int) Math.pow(2, d) - 1;
126 }
127
128 for (int j = finish - 1; j >= start; j--) {
129 Character curChar = Character.valueOf(text.charAt(j));
130 int mask = alphabet.containsKey(curChar) ? alphabet.get(curChar).intValue() : 0;
131 if (d == 0) { rd[j] = ((rd[j + 1] << 1) | 1) & mask;
133 } else { rd[j] = ((rd[j + 1] << 1) | 1) & mask | ((lastrd[j + 1] << 1) | 1) | ((lastrd[j] << 1) | 1) | lastrd[j + 1];
135 }
136
137 if ((rd[j] & matchmask) != 0) {
138 double score = bitapScore(d, j);
139 if (score <= scoreThreshold) {
142 scoreThreshold = score;
144 bestLoc = j;
145 if (j > loc) {
146 start = Math.max(0, loc - (j - loc));
149 } else {
150 break;
152 }
153 }
154 }
155 }
156
157 if (bitapScore(d + 1, loc) > scoreThreshold) {
162 break;
164 }
165
166 lastrd = rd;
167 }
168
169 return bestLoc;
170 }
171
172 protected Map<Character, Integer> getAlphabet() {
173 return alphabet;
174 }
175
176
185 private double bitapScore(int e, int x) {
186 int d = Math.abs(loc - x);
189 return (e / (float) pattern.length() / Bitap.balance) + (d / (float) scoreTextLength / (1.0 - Bitap.balance));
190 }
191
192 protected void alphabet() {
194 int len = pattern.length();
195
196 assert len <= Bitap.MAXBITS : "Pattern too long for this application.";
197
198 for (int i = 0; i < len; i++) {
199 Character c = Character.valueOf(pattern.charAt(i));
200 Integer value = alphabet.get(c);
201 int mask = value == null ? 0 : value.intValue();
202 mask |= (int) Math.pow(2, len - i - 1);
203 alphabet.put(c, Integer.valueOf(mask));
204 }
205 }
206
207 public static void setBalance(float newBalance) {
208 balance = newBalance;
209 }
210
211 public static void setThreshold(float newThreshold) {
212 threshold = newThreshold;
213 }
214
215 public static void setMinLength(int newMinLength) {
216 minLength = newMinLength;
217 }
218
219 public static void setMaxLength(int newMaxLength) {
220 maxLength = newMaxLength;
221 }
222
223
227 private static final int MAXBITS = 32;
228
229
232 private static final float BALANCE = 0.5f;
233 private static float balance = BALANCE;
234
235
238 private static final float THRESHOLD = 0.5f;
239 private static float threshold = THRESHOLD;
240
241
244 private static final int MINLENGTH = 100;
245 private static int minLength = MINLENGTH;
246 private static final int MAXLENGTH = 1000;
247 private static int maxLength = MAXLENGTH;
248
249
252 private String text;
253
254
257 private String pattern;
258
259
262 private int loc;
263
264
267 private int scoreTextLength;
268
269
272 private Map<Character, Integer> alphabet;
273 }
274