| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| DiffCleanup |
|
| 11.8;11.8 |
| 1 | /** | |
| 2 | * Distribution License: | |
| 3 | * JSword is free software; you can redistribute it and/or modify it under | |
| 4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
| 5 | * as published by the Free Software Foundation. This program is distributed | |
| 6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
| 7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
| 8 | * See the GNU Lesser General Public License for more details. | |
| 9 | * | |
| 10 | * The License is available on the internet at: | |
| 11 | * http://www.gnu.org/copyleft/lgpl.html | |
| 12 | * or by writing to: | |
| 13 | * Free Software Foundation, Inc. | |
| 14 | * 59 Temple Place - Suite 330 | |
| 15 | * Boston, MA 02111-1307, USA | |
| 16 | * | |
| 17 | * © CrossWire Bible Society, 2005 - 2016 | |
| 18 | * | |
| 19 | */ | |
| 20 | package org.crosswire.common.diff; | |
| 21 | ||
| 22 | import java.util.List; | |
| 23 | import java.util.ListIterator; | |
| 24 | import java.util.Stack; | |
| 25 | ||
| 26 | /** | |
| 27 | * Various Strategies to cleanup a diff list. | |
| 28 | * | |
| 29 | * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br> | |
| 30 | * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a> | |
| 31 | * | |
| 32 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
| 33 | * @author DM Smith | |
| 34 | */ | |
| 35 | 0 | public final class DiffCleanup { |
| 36 | /** | |
| 37 | * Utility class constructor. | |
| 38 | */ | |
| 39 | 0 | private DiffCleanup() { |
| 40 | 0 | } |
| 41 | ||
| 42 | /** | |
| 43 | * Reduce the number of edits by eliminating semantically trivial | |
| 44 | * equalities. | |
| 45 | * | |
| 46 | * @param diffs | |
| 47 | * List of Difference objects | |
| 48 | */ | |
| 49 | public static void cleanupSemantic(final List<Difference> diffs) { | |
| 50 | 0 | boolean changes = false; |
| 51 | 0 | Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are |
| 52 | // found. | |
| 53 | 0 | String lastEquality = null; // Always equal to |
| 54 | // equalities.lastElement().getText() | |
| 55 | 0 | int lengthChangesPre = 0; // Number of characters that changed prior to |
| 56 | // the equality. | |
| 57 | 0 | int lengthChangesPost = 0; // Number of characters that changed after |
| 58 | // the equality. | |
| 59 | 0 | ListIterator<Difference> pointer = diffs.listIterator(); |
| 60 | 0 | Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null; |
| 61 | 0 | while (curDiff != null) { |
| 62 | 0 | EditType editType = curDiff.getEditType(); |
| 63 | 0 | if (EditType.EQUAL.equals(editType)) { |
| 64 | // equality found | |
| 65 | 0 | equalities.push(curDiff); |
| 66 | 0 | lengthChangesPre = lengthChangesPost; |
| 67 | 0 | lengthChangesPost = 0; |
| 68 | 0 | lastEquality = curDiff.getText(); |
| 69 | } else { | |
| 70 | // an insertion or deletion | |
| 71 | 0 | lengthChangesPost += curDiff.getText().length(); |
| 72 | 0 | int lastLen = lastEquality != null ? lastEquality.length() : 0; |
| 73 | 0 | if (lastEquality != null && lastLen <= lengthChangesPre && lastLen <= lengthChangesPost) { |
| 74 | // position pointer to the element after the one at the end | |
| 75 | // of the stack | |
| 76 | 0 | while (curDiff != equalities.lastElement()) { |
| 77 | 0 | curDiff = pointer.previous(); |
| 78 | } | |
| 79 | 0 | pointer.next(); |
| 80 | ||
| 81 | // Replace equality with a delete. | |
| 82 | 0 | pointer.set(new Difference(EditType.DELETE, lastEquality)); |
| 83 | // Insert a corresponding an insert. | |
| 84 | 0 | pointer.add(new Difference(EditType.INSERT, lastEquality)); |
| 85 | 0 | equalities.pop(); // Throw away the equality we just |
| 86 | // deleted; | |
| 87 | 0 | if (!equalities.empty()) { |
| 88 | // Throw away the previous equality (it needs to be | |
| 89 | // reevaluated). | |
| 90 | 0 | equalities.pop(); |
| 91 | } | |
| 92 | 0 | if (equalities.empty()) { |
| 93 | // There are no previous equalities, walk back to the | |
| 94 | // start. | |
| 95 | 0 | while (pointer.hasPrevious()) { |
| 96 | 0 | pointer.previous(); |
| 97 | } | |
| 98 | } else { | |
| 99 | // There is a safe equality we can fall back to. | |
| 100 | 0 | curDiff = equalities.lastElement(); |
| 101 | 0 | while (curDiff != pointer.previous()) { |
| 102 | // Intentionally empty loop. | |
| 103 | } | |
| 104 | } | |
| 105 | ||
| 106 | 0 | lengthChangesPre = 0; // Reset the counters. |
| 107 | 0 | lengthChangesPost = 0; |
| 108 | 0 | lastEquality = null; |
| 109 | 0 | changes = true; |
| 110 | } | |
| 111 | } | |
| 112 | 0 | curDiff = pointer.hasNext() ? (Difference) pointer.next() : null; |
| 113 | 0 | } |
| 114 | ||
| 115 | 0 | if (changes) { |
| 116 | 0 | cleanupMerge(diffs); |
| 117 | } | |
| 118 | 0 | } |
| 119 | ||
| 120 | /** | |
| 121 | * Reduce the number of edits by eliminating operationally trivial | |
| 122 | * equalities. | |
| 123 | * | |
| 124 | * @param diffs | |
| 125 | * List of Difference objects | |
| 126 | */ | |
| 127 | public static void cleanupEfficiency(final List<Difference> diffs) { | |
| 128 | 0 | if (diffs.isEmpty()) { |
| 129 | 0 | return; |
| 130 | } | |
| 131 | ||
| 132 | 0 | boolean changes = false; |
| 133 | 0 | Stack<Difference> equalities = new Stack<Difference>(); // Stack of indices where equalities are |
| 134 | // found. | |
| 135 | 0 | String lastEquality = null; // Always equal to |
| 136 | // equalities.lastElement().getText(); | |
| 137 | 0 | int preInsert = 0; // Is there an insertion operation before the last |
| 138 | // equality. | |
| 139 | 0 | int preDelete = 0; // Is there an deletion operation before the last |
| 140 | // equality. | |
| 141 | 0 | int postInsert = 0; // Is there an insertion operation after the last |
| 142 | // equality. | |
| 143 | 0 | int postDelete = 0; // Is there an deletion operation after the last |
| 144 | // equality. | |
| 145 | ||
| 146 | 0 | ListIterator<Difference> pointer = diffs.listIterator(); |
| 147 | 0 | Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null; |
| 148 | 0 | Difference safeDiff = curDiff; // The last Difference that is known to |
| 149 | // be unsplitable. | |
| 150 | ||
| 151 | 0 | while (curDiff != null) { |
| 152 | 0 | EditType editType = curDiff.getEditType(); |
| 153 | 0 | if (EditType.EQUAL.equals(editType)) { |
| 154 | // equality found | |
| 155 | 0 | if (curDiff.getText().length() < editCost && (postInsert + postDelete) > 0) { |
| 156 | // Candidate found. | |
| 157 | 0 | equalities.push(curDiff); |
| 158 | 0 | preInsert = postInsert; |
| 159 | 0 | preDelete = postDelete; |
| 160 | 0 | lastEquality = curDiff.getText(); |
| 161 | } else { | |
| 162 | // Not a candidate, and can never become one. | |
| 163 | 0 | equalities.clear(); |
| 164 | 0 | lastEquality = null; |
| 165 | 0 | safeDiff = curDiff; |
| 166 | } | |
| 167 | 0 | postInsert = 0; |
| 168 | 0 | postDelete = 0; |
| 169 | } else { | |
| 170 | // an insertion or deletion | |
| 171 | 0 | if (EditType.DELETE.equals(editType)) { |
| 172 | 0 | postDelete = 1; |
| 173 | } else { | |
| 174 | 0 | postInsert = 1; |
| 175 | } | |
| 176 | ||
| 177 | // Five types to be split: | |
| 178 | // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> | |
| 179 | // <ins>A</ins>X<ins>C</ins><del>D</del> | |
| 180 | // <ins>A</ins><del>B</del>X<ins>C</ins> | |
| 181 | // <ins>A</del>X<ins>C</ins><del>D</del> | |
| 182 | // <ins>A</ins><del>B</del>X<del>C</del> | |
| 183 | 0 | if (lastEquality != null |
| 184 | && (((preInsert + preDelete + postInsert + postDelete) > 0) | |
| 185 | || ((lastEquality.length() < editCost / 2) | |
| 186 | && (preInsert + preDelete + postInsert + postDelete) == 3))) | |
| 187 | { | |
| 188 | // position pointer to the element after the one at the end | |
| 189 | // of the stack | |
| 190 | 0 | while (curDiff != equalities.lastElement()) { |
| 191 | 0 | curDiff = pointer.previous(); |
| 192 | } | |
| 193 | 0 | pointer.next(); |
| 194 | ||
| 195 | // Replace equality with a delete. | |
| 196 | 0 | pointer.set(new Difference(EditType.DELETE, lastEquality)); |
| 197 | // Insert a corresponding an insert. | |
| 198 | 0 | curDiff = new Difference(EditType.INSERT, lastEquality); |
| 199 | 0 | pointer.add(curDiff); |
| 200 | ||
| 201 | 0 | equalities.pop(); // Throw away the equality we just |
| 202 | // deleted; | |
| 203 | 0 | lastEquality = null; |
| 204 | 0 | if (preInsert == 1 && preDelete == 1) { |
| 205 | // No changes made which could affect previous entry, | |
| 206 | // keep going. | |
| 207 | 0 | postInsert = 1; |
| 208 | 0 | postDelete = 1; |
| 209 | 0 | equalities.clear(); |
| 210 | 0 | safeDiff = curDiff; |
| 211 | } else { | |
| 212 | 0 | if (!equalities.empty()) { |
| 213 | // Throw away the previous equality; | |
| 214 | 0 | equalities.pop(); |
| 215 | } | |
| 216 | 0 | if (equalities.empty()) { |
| 217 | // There are no previous questionable equalities, | |
| 218 | // walk back to the last known safe diff. | |
| 219 | 0 | curDiff = safeDiff; |
| 220 | } else { | |
| 221 | // There is an equality we can fall back to. | |
| 222 | 0 | curDiff = equalities.lastElement(); |
| 223 | } | |
| 224 | 0 | while (curDiff != pointer.previous()) { |
| 225 | // Intentionally empty loop. | |
| 226 | } | |
| 227 | ||
| 228 | 0 | postInsert = 0; |
| 229 | 0 | postDelete = 0; |
| 230 | } | |
| 231 | 0 | changes = true; |
| 232 | } | |
| 233 | } | |
| 234 | 0 | curDiff = pointer.hasNext() ? (Difference) pointer.next() : null; |
| 235 | 0 | } |
| 236 | ||
| 237 | 0 | if (changes) { |
| 238 | 0 | cleanupMerge(diffs); |
| 239 | } | |
| 240 | 0 | } |
| 241 | ||
| 242 | /** | |
| 243 | * Reorder and merge like edit sections. Merge equalities. Any edit section | |
| 244 | * can move as long as it doesn't cross an equality. | |
| 245 | * | |
| 246 | * @param diffs | |
| 247 | * List of Difference objects | |
| 248 | */ | |
| 249 | public static void cleanupMerge(final List<Difference> diffs) { | |
| 250 | // Add a dummy entry at the end. | |
| 251 | 0 | diffs.add(new Difference(EditType.EQUAL, "")); |
| 252 | ||
| 253 | 0 | int countDelete = 0; |
| 254 | 0 | int countInsert = 0; |
| 255 | 0 | StringBuilder textDelete = new StringBuilder(); |
| 256 | 0 | StringBuilder textInsert = new StringBuilder(); |
| 257 | ||
| 258 | 0 | int commonLength = 0; |
| 259 | ||
| 260 | 0 | ListIterator<Difference> pointer = diffs.listIterator(); |
| 261 | 0 | Difference curDiff = pointer.hasNext() ? (Difference) pointer.next() : null; |
| 262 | 0 | Difference prevEqual = null; |
| 263 | 0 | while (curDiff != null) { |
| 264 | 0 | EditType editType = curDiff.getEditType(); |
| 265 | 0 | if (EditType.INSERT.equals(editType)) { |
| 266 | 0 | countInsert++; |
| 267 | 0 | textInsert.append(curDiff.getText()); |
| 268 | 0 | prevEqual = null; |
| 269 | 0 | } else if (EditType.DELETE.equals(editType)) { |
| 270 | 0 | countDelete++; |
| 271 | 0 | textDelete.append(curDiff.getText()); |
| 272 | 0 | prevEqual = null; |
| 273 | 0 | } else if (EditType.EQUAL.equals(editType)) { |
| 274 | // Upon reaching an equality, check for prior redundancies. | |
| 275 | 0 | if (countDelete != 0 || countInsert != 0) { |
| 276 | // Delete the offending records. | |
| 277 | 0 | pointer.previous(); // Reverse direction. |
| 278 | 0 | while (countDelete-- > 0) { |
| 279 | 0 | pointer.previous(); |
| 280 | 0 | pointer.remove(); |
| 281 | } | |
| 282 | 0 | while (countInsert-- > 0) { |
| 283 | 0 | pointer.previous(); |
| 284 | 0 | pointer.remove(); |
| 285 | } | |
| 286 | ||
| 287 | 0 | if (countDelete != 0 && countInsert != 0) { |
| 288 | // Factor out any common prefixes. | |
| 289 | 0 | commonLength = Commonality.prefix(textInsert.toString(), textDelete.toString()); |
| 290 | 0 | if (commonLength > 0) { |
| 291 | 0 | if (pointer.hasPrevious()) { |
| 292 | 0 | curDiff = pointer.previous(); |
| 293 | 0 | assert EditType.EQUAL.equals(curDiff.getEditType()) : "Previous diff should have been an equality."; |
| 294 | 0 | curDiff.appendText(textInsert.substring(0, commonLength)); |
| 295 | 0 | pointer.next(); |
| 296 | } else { | |
| 297 | 0 | pointer.add(new Difference(EditType.EQUAL, textInsert.substring(0, commonLength))); |
| 298 | } | |
| 299 | 0 | textInsert.replace(0, textInsert.length(), textInsert.substring(commonLength)); |
| 300 | 0 | textDelete.replace(0, textDelete.length(), textDelete.substring(commonLength)); |
| 301 | } | |
| 302 | ||
| 303 | // Factor out any common suffixes. | |
| 304 | 0 | commonLength = Commonality.suffix(textInsert.toString(), textDelete.toString()); |
| 305 | 0 | if (commonLength > 0) { |
| 306 | 0 | curDiff = pointer.next(); |
| 307 | 0 | curDiff.prependText(textInsert.substring(textInsert.length() - commonLength)); |
| 308 | 0 | textInsert.replace(0, textInsert.length(), textInsert.substring(0, textInsert.length() - commonLength)); |
| 309 | 0 | textDelete.replace(0, textDelete.length(), textDelete.substring(0, textDelete.length() - commonLength)); |
| 310 | 0 | pointer.previous(); |
| 311 | } | |
| 312 | } | |
| 313 | ||
| 314 | // Insert the merged records. | |
| 315 | 0 | if (textDelete.length() != 0) { |
| 316 | 0 | pointer.add(new Difference(EditType.DELETE, textDelete.toString())); |
| 317 | } | |
| 318 | ||
| 319 | 0 | if (textInsert.length() != 0) { |
| 320 | 0 | pointer.add(new Difference(EditType.INSERT, textInsert.toString())); |
| 321 | } | |
| 322 | ||
| 323 | // Step forward to the equality. | |
| 324 | 0 | curDiff = pointer.hasNext() ? (Difference) pointer.next() : null; |
| 325 | 0 | } else if (prevEqual != null) { |
| 326 | // Merge this equality with the previous one. | |
| 327 | 0 | prevEqual.appendText(curDiff.getText()); |
| 328 | 0 | pointer.remove(); |
| 329 | 0 | curDiff = pointer.previous(); |
| 330 | 0 | pointer.next(); // Forward direction |
| 331 | } | |
| 332 | ||
| 333 | 0 | countInsert = 0; |
| 334 | 0 | countDelete = 0; |
| 335 | 0 | textDelete.delete(0, textDelete.length()); |
| 336 | 0 | textInsert.delete(0, textInsert.length()); |
| 337 | 0 | prevEqual = curDiff; |
| 338 | } | |
| 339 | 0 | curDiff = pointer.hasNext() ? (Difference) pointer.next() : null; |
| 340 | 0 | } |
| 341 | ||
| 342 | 0 | Difference lastDiff = diffs.get(diffs.size() - 1); |
| 343 | 0 | if (lastDiff.getText().length() == 0) { |
| 344 | 0 | diffs.remove(diffs.size() - 1); // Remove the dummy entry at the |
| 345 | // end. | |
| 346 | } | |
| 347 | 0 | } |
| 348 | ||
| 349 | /** | |
| 350 | * Set the edit cost for efficiency | |
| 351 | * | |
| 352 | * @param newEditCost the edit cost | |
| 353 | */ | |
| 354 | public static void setEditCost(int newEditCost) { | |
| 355 | 0 | editCost = newEditCost; |
| 356 | 0 | } |
| 357 | ||
| 358 | /** | |
| 359 | * Cost of an empty edit operation in terms of edit characters. | |
| 360 | */ | |
| 361 | private static final int EDIT_COST = 4; | |
| 362 | 0 | private static int editCost = EDIT_COST; |
| 363 | } |