1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2007 - 2016
18   *
19   */
20  package org.crosswire.common.diff;
21  
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  
27  /**
28   * LineMap is a heuristic algorithm that allows the differencing of a
29   * representation of lines. A Diff of the source and target maps can be
30   * reconstituted with restore.
31   * 
32   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
33   * @author DM Smith
34   */
35  public class LineMap {
36      /**
37       * Split two texts into a list of strings. Reduce the texts to a string of
38       * hashes where each Unicode character represents one line. The result is
39       * that text1 is encoded into
40       * 
41       * @param source
42       *            Baseline string
43       * @param target
44       *            Changed string
45       */
46      public LineMap(final String source, final String target) {
47          // e.g. linearray[4] == "Hello\n"
48          // e.g. linehash.get("Hello\n") == 4
49  
50          // "\x00" is a valid character, but various debuggers don't like it.
51          // So we'll insert a junk entry to avoid generating a null character.
52          lines = new ArrayList<String>();
53          lines.add("");
54  
55          Map<String, Integer> linehash = new HashMap<String, Integer>();
56          sourceMap = linesToCharsMunge(source, lines, linehash);
57          targetMap = linesToCharsMunge(target, lines, linehash);
58      }
59  
60      /**
61       * Rehydrate the text in a diff from a string of line hashes to real lines
62       * of text.
63       * 
64       * @param diffs
65       *            List of Difference objects
66       */
67      public void restore(final List<?> diffs) {
68          StringBuilder text = new StringBuilder();
69          for (int x = 0; x < diffs.size(); x++) {
70              Difference diff = (Difference) diffs.get(x);
71              String chars = diff.getText();
72  
73              text.delete(0, text.length());
74              for (int y = 0; y < chars.length(); y++) {
75                  text.append(lines.get(chars.charAt(y)));
76              }
77  
78              diff.setText(text.toString());
79          }
80      }
81  
82      /**
83       * @return the sourceMap
84       */
85      public String getSourceMap() {
86          return sourceMap;
87      }
88  
89      /**
90       * @return the targetMap
91       */
92      public String getTargetMap() {
93          return targetMap;
94      }
95  
96      /**
97       * @return the lines
98       */
99      public List<String> getLines() {
100         return lines;
101     }
102 
103     /**
104      * Split a text into a list of strings. Reduce the texts to a string of
105      * hashes where each Unicode character represents one line.
106      * 
107      * @param text
108      *            String to encode
109      * @param linearray
110      *            List of unique strings
111      * @param linehash
112      *            Map of strings to indices
113      * @return Encoded string
114      */
115     private String linesToCharsMunge(final String text, List<String> linearray, Map<String, Integer> linehash) {
116         StringBuilder buf = new StringBuilder();
117         String work = text;
118         // text.split('\n') would work fine, but would temporarily double our
119         // memory footprint for minimal speed improvement.
120         while (work.length() != 0) {
121             int i = work.indexOf('\n');
122             if (i == -1) {
123                 i = work.length() - 1;
124             }
125             String line = work.substring(0, i + 1);
126             work = work.substring(i + 1);
127             if (linehash.containsKey(line)) {
128                 Integer charInt = linehash.get(line);
129                 buf.append(String.valueOf((char) charInt.intValue()));
130             } else {
131                 linearray.add(line);
132                 linehash.put(line, Integer.valueOf(linearray.size() - 1));
133                 buf.append(String.valueOf((char) (linearray.size() - 1)));
134             }
135         }
136         return buf.toString();
137     }
138 
139     /**
140      * Each character in sourceMap provides an integer representation of the
141      * line in the original.
142      */
143     private String sourceMap;
144 
145     /**
146      * Each character in sourceMap provides an integer representation of the
147      * line in the original.
148      */
149     private String targetMap;
150 
151     /**
152      * The lines from the original. Useful for reconstitution.
153      */
154     private List<String> lines;
155 }
156