Coverage Report - org.crosswire.common.diff.Distance
 
Classes in this File Line Coverage Branch Coverage Complexity
Distance
0%
0/24
0%
0/16
6.5
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2007 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.common.diff;
 21  
 
 22  
 /**
 23  
  * Compute the distance between 2 strings. The larger the number the greater the
 24  
  * distance.
 25  
  * 
 26  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 27  
  * @author DM Smith
 28  
  */
 29  
 public final class Distance {
 30  
     /**
 31  
      * Prevent instantiation.
 32  
      */
 33  0
     private Distance() {
 34  0
     }
 35  
 
 36  
     /**
 37  
      * Compute the LevenshteinDistance between two strings. See <a
 38  
      * href="http://www.merriampark.com/ldjava.htm"
 39  
      * >www.merriampark.com/ldjava.htm</a> for original implementation.
 40  
      * 
 41  
      * @param source
 42  
      *            the baseline text
 43  
      * @param target
 44  
      *            the changed text
 45  
      * @return the distance
 46  
      */
 47  
     public static int getLevenshteinDistance(String source, String target) {
 48  0
         if (source == null || target == null) {
 49  0
             throw new IllegalArgumentException("Strings must not be null");
 50  
         }
 51  
 
 52  
         /*
 53  
          * The difference between this impl. and the previous is that, rather
 54  
          * than creating and retaining a matrix of size s.length()+1 by
 55  
          * t.length()+1, we maintain two single-dimensional arrays of length
 56  
          * s.length()+1. The first, d, is the 'current working' distance array
 57  
          * that maintains the newest distance cost counts as we iterate through
 58  
          * the characters of String s. Each time we increment the index of
 59  
          * String t we are comparing, d is copied to p, the second int[]. Doing
 60  
          * so allows us to retain the previous cost counts as required by the
 61  
          * algorithm (taking the minimum of the cost count to the left, up one,
 62  
          * and diagonally up and to the left of the current cost count being
 63  
          * calculated). (Note that the arrays aren't really copied anymore, just
 64  
          * switched...this is clearly much better than cloning an array or doing
 65  
          * a System.arraycopy() each time through the outer loop.)
 66  
          * 
 67  
          * Effectively, the difference between the two implementations is this
 68  
          * one does not cause an out of memory condition when calculating the LD
 69  
          * over two very large strings.
 70  
          */
 71  
 
 72  0
         int sourceLength = source.length(); // length of source
 73  0
         int targetLength = target.length(); // length of target
 74  
 
 75  0
         if (sourceLength == 0) {
 76  0
             return targetLength;
 77  0
         } else if (targetLength == 0) {
 78  0
             return sourceLength;
 79  
         }
 80  
 
 81  0
         int[] prevDist = new int[sourceLength + 1]; // 'previous' cost array,
 82  
         // horizontally
 83  0
         int[] dist = new int[sourceLength + 1]; // cost array, horizontally
 84  
         int[] swap; // placeholder to assist in swapping prevDist and dist
 85  
 
 86  
         // indexes into strings source and target
 87  
         int i; // iterates through source
 88  
         int j; // iterates through target
 89  
 
 90  
         char targetJ; // jth character of t
 91  
 
 92  
         int cost;
 93  
 
 94  0
         for (i = 0; i <= sourceLength; i++) {
 95  0
             prevDist[i] = i;
 96  
         }
 97  
 
 98  0
         for (j = 1; j <= targetLength; j++) {
 99  0
             targetJ = target.charAt(j - 1);
 100  0
             dist[0] = j;
 101  
 
 102  0
             for (i = 1; i <= sourceLength; i++) {
 103  0
                 cost = source.charAt(i - 1) == targetJ ? 0 : 1;
 104  
                 // minimum of cell to the left + 1, to the top + 1, diagonally
 105  
                 // left and up +cost
 106  0
                 dist[i] = Math.min(Math.min(dist[i - 1] + 1, prevDist[i] + 1), prevDist[i - 1] + cost);
 107  
             }
 108  
 
 109  
             // copy current distance counts to 'previous row' distance counts
 110  0
             swap = prevDist;
 111  0
             prevDist = dist;
 112  0
             dist = swap;
 113  
         }
 114  
 
 115  
         // our last action in the above loop was to switch d and p, so p now
 116  
         // actually has the most recent cost counts
 117  0
         return prevDist[sourceLength];
 118  
     }
 119  
 }