1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2007
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: Commonality.java 2196 2012-01-03 13:53:37Z dmsmith $
21   */
22  package org.crosswire.common.diff;
23  
24  /**
25   * A Commonality is shared text at the beginning, middle or end of two strings.
26   * 
27   * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
28   * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
29   * 
30   * @see gnu.lgpl.License for license details.<br>
31   *      The copyright to this program is held by it's authors.
32   * @author DM Smith [dmsmith555 at yahoo dot com]
33   */
34  public class Commonality {
35      /**
36       * This is a utility class, therefore the constructor is private.
37       */
38      private Commonality() {
39      }
40  
41      /**
42       * Find the length of a common prefix.
43       * 
44       * @param source
45       *            First string
46       * @param target
47       *            Second string
48       * @return The number of characters common to the start of each string.
49       */
50      public static int prefix(final String source, final String target) {
51          int pointermin = 0;
52          int pointermax = Math.min(source.length(), target.length());
53          int pointermid = pointermax;
54          while (pointermin < pointermid) {
55              if (source.regionMatches(0, target, 0, pointermid)) {
56                  pointermin = pointermid;
57              } else {
58                  pointermax = pointermid;
59              }
60              pointermid = (pointermax - pointermin) / 2 + pointermin;
61          }
62  
63          return pointermid;
64      }
65  
66      /**
67       * Find the length of a common suffix.
68       * 
69       * @param source
70       *            First string
71       * @param target
72       *            Second string
73       * @return The number of characters common to the end of each string.
74       */
75      public static int suffix(final String source, final String target) {
76          int pointermin = 0;
77          int pointermax = Math.min(source.length(), target.length());
78          int pointermid = pointermax;
79          while (pointermin < pointermid) {
80              if (source.regionMatches(source.length() - pointermid, target, target.length() - pointermid, pointermid)) {
81                  pointermin = pointermid;
82              } else {
83                  pointermax = pointermid;
84              }
85              pointermid = (pointermax - pointermin) / 2 + pointermin;
86          }
87  
88          return pointermid;
89      }
90  
91      /**
92       * Do the two texts share a substring which is at least half the length of
93       * the longer text?
94       * 
95       * @param source
96       *            Baseline string
97       * @param target
98       *            Changed string
99       * @return a CommonMiddle Or null if there was no match.
100      */
101     public static CommonMiddle halfMatch(final String source, final String target) {
102         int sourceLength = source.length();
103         int targetLength = target.length();
104         String longText = sourceLength > targetLength ? source : target;
105         String shortText = sourceLength > targetLength ? target : source;
106         int longTextLength = Math.max(sourceLength, targetLength);
107         if (longTextLength < 10 || shortText.length() < 1) {
108             return null; // Pointless.
109         }
110 
111         // First check if the second quarter is the seed for a half-match.
112         CommonMiddle hm1 = halfMatch(longText, shortText, ceil(longTextLength, 4));
113         // Check again based on the third quarter.
114         CommonMiddle hm2 = halfMatch(longText, shortText, ceil(longTextLength, 2));
115         CommonMiddle hm = null;
116         if (hm1 == null && hm2 == null) {
117             return null;
118         } else if (hm2 == null) {
119             hm = hm1;
120         } else if (hm1 == null) {
121             hm = hm2;
122         } else {
123             // Both matched. Select the longest.
124             hm = hm1.getCommonality().length() > hm2.getCommonality().length() ? hm1 : hm2;
125         }
126 
127         // A half-match was found, sort out the return data.
128         if (sourceLength > targetLength) {
129             return hm;
130         }
131 
132         // While hm cannot be null our QA checker can't figure that out.
133         if (hm != null) {
134             return new CommonMiddle(hm.getTargetPrefix(), hm.getTargetSuffix(), hm.getSourcePrefix(), hm.getSourceSuffix(), hm.getCommonality());
135         }
136         return null;
137     }
138 
139     private static int ceil(int number, int divisor) {
140         assert divisor > 0;
141         int result = number / divisor + ((number % divisor) > 0 ? 1 : 0);
142         // assert result == (int) Math.ceil(((double) number) / ((double)
143         // divisor));
144         return result;
145     }
146 
147     /**
148      * Does a substring of shortText exist within longText such that the
149      * substring is at least half the length of longText?
150      * 
151      * @param longText
152      *            Longer string
153      * @param shortText
154      *            Shorter string
155      * @param startIndex
156      *            Start index of quarter length substring within longText
157      * @return Five element String array, containing the prefix of longText, the
158      *         suffix of longText, the prefix of shortText, the suffix of
159      *         shortText and the common middle. Or null if there was no match.
160      */
161     private static CommonMiddle halfMatch(final String longText, final String shortText, final int startIndex) {
162         // Start with a 1/4 length substring at position i as a seed.
163         String seed = longText.substring(startIndex, startIndex + (longText.length() / 4));
164         int j = -1;
165         String common = "";
166         String longTextPrefix = "";
167         String longTextSuffix = "";
168         String shortTextPrefix = "";
169         String shortTextSuffix = "";
170         while ((j = shortText.indexOf(seed, j + 1)) != -1) {
171             int prefixLength = Commonality.prefix(longText.substring(startIndex), shortText.substring(j));
172             int suffixLength = Commonality.suffix(longText.substring(0, startIndex), shortText.substring(0, j));
173             if (common.length() < (prefixLength + suffixLength)) {
174                 common = shortText.substring(j - suffixLength, j) + shortText.substring(j, j + prefixLength);
175                 longTextPrefix = longText.substring(0, startIndex - suffixLength);
176                 longTextSuffix = longText.substring(startIndex + prefixLength);
177                 shortTextPrefix = shortText.substring(0, j - suffixLength);
178                 shortTextSuffix = shortText.substring(j + prefixLength);
179             }
180         }
181 
182         if (common.length() >= longText.length() / 2) {
183             return new CommonMiddle(longTextPrefix, longTextSuffix, shortTextPrefix, shortTextSuffix, common);
184         }
185 
186         return null;
187     }
188 
189 }
190