| LineMap.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 or later
5 * as published by the Free Software Foundation. This program is distributed
6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * © CrossWire Bible Society, 2007 - 2016
18 *
19 */
20 package org.crosswire.common.diff;
21
22 import java.util.ArrayList;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26
27 /**
28 * LineMap is a heuristic algorithm that allows the differencing of a
29 * representation of lines. A Diff of the source and target maps can be
30 * reconstituted with restore.
31 *
32 * @see gnu.lgpl.License The GNU Lesser General Public License for details.
33 * @author DM Smith
34 */
35 public class LineMap {
36 /**
37 * Split two texts into a list of strings. Reduce the texts to a string of
38 * hashes where each Unicode character represents one line. The result is
39 * that text1 is encoded into
40 *
41 * @param source
42 * Baseline string
43 * @param target
44 * Changed string
45 */
46 public LineMap(final String source, final String target) {
47 // e.g. linearray[4] == "Hello\n"
48 // e.g. linehash.get("Hello\n") == 4
49
50 // "\x00" is a valid character, but various debuggers don't like it.
51 // So we'll insert a junk entry to avoid generating a null character.
52 lines = new ArrayList<String>();
53 lines.add("");
54
55 Map<String, Integer> linehash = new HashMap<String, Integer>();
56 sourceMap = linesToCharsMunge(source, lines, linehash);
57 targetMap = linesToCharsMunge(target, lines, linehash);
58 }
59
60 /**
61 * Rehydrate the text in a diff from a string of line hashes to real lines
62 * of text.
63 *
64 * @param diffs
65 * List of Difference objects
66 */
67 public void restore(final List<?> diffs) {
68 StringBuilder text = new StringBuilder();
69 for (int x = 0; x < diffs.size(); x++) {
70 Difference diff = (Difference) diffs.get(x);
71 String chars = diff.getText();
72
73 text.delete(0, text.length());
74 for (int y = 0; y < chars.length(); y++) {
75 text.append(lines.get(chars.charAt(y)));
76 }
77
78 diff.setText(text.toString());
79 }
80 }
81
82 /**
83 * @return the sourceMap
84 */
85 public String getSourceMap() {
86 return sourceMap;
87 }
88
89 /**
90 * @return the targetMap
91 */
92 public String getTargetMap() {
93 return targetMap;
94 }
95
96 /**
97 * @return the lines
98 */
99 public List<String> getLines() {
100 return lines;
101 }
102
103 /**
104 * Split a text into a list of strings. Reduce the texts to a string of
105 * hashes where each Unicode character represents one line.
106 *
107 * @param text
108 * String to encode
109 * @param linearray
110 * List of unique strings
111 * @param linehash
112 * Map of strings to indices
113 * @return Encoded string
114 */
115 private String linesToCharsMunge(final String text, List<String> linearray, Map<String, Integer> linehash) {
116 StringBuilder buf = new StringBuilder();
117 String work = text;
118 // text.split('\n') would work fine, but would temporarily double our
119 // memory footprint for minimal speed improvement.
120 while (work.length() != 0) {
121 int i = work.indexOf('\n');
122 if (i == -1) {
123 i = work.length() - 1;
124 }
125 String line = work.substring(0, i + 1);
126 work = work.substring(i + 1);
127 if (linehash.containsKey(line)) {
128 Integer charInt = linehash.get(line);
129 buf.append(String.valueOf((char) charInt.intValue()));
130 } else {
131 linearray.add(line);
132 linehash.put(line, Integer.valueOf(linearray.size() - 1));
133 buf.append(String.valueOf((char) (linearray.size() - 1)));
134 }
135 }
136 return buf.toString();
137 }
138
139 /**
140 * Each character in sourceMap provides an integer representation of the
141 * line in the original.
142 */
143 private String sourceMap;
144
145 /**
146 * Each character in sourceMap provides an integer representation of the
147 * line in the original.
148 */
149 private String targetMap;
150
151 /**
152 * The lines from the original. Useful for reconstitution.
153 */
154 private List<String> lines;
155 }
156