LineMap.java |
1 /** 2 * Distribution License: 3 * JSword is free software; you can redistribute it and/or modify it under 4 * the terms of the GNU Lesser General Public License, version 2.1 or later 5 * as published by the Free Software Foundation. This program is distributed 6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even 7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 8 * See the GNU Lesser General Public License for more details. 9 * 10 * The License is available on the internet at: 11 * http://www.gnu.org/copyleft/lgpl.html 12 * or by writing to: 13 * Free Software Foundation, Inc. 14 * 59 Temple Place - Suite 330 15 * Boston, MA 02111-1307, USA 16 * 17 * © CrossWire Bible Society, 2007 - 2016 18 * 19 */ 20 package org.crosswire.common.diff; 21 22 import java.util.ArrayList; 23 import java.util.HashMap; 24 import java.util.List; 25 import java.util.Map; 26 27 /** 28 * LineMap is a heuristic algorithm that allows the differencing of a 29 * representation of lines. A Diff of the source and target maps can be 30 * reconstituted with restore. 31 * 32 * @see gnu.lgpl.License The GNU Lesser General Public License for details. 33 * @author DM Smith 34 */ 35 public class LineMap { 36 /** 37 * Split two texts into a list of strings. Reduce the texts to a string of 38 * hashes where each Unicode character represents one line. The result is 39 * that text1 is encoded into 40 * 41 * @param source 42 * Baseline string 43 * @param target 44 * Changed string 45 */ 46 public LineMap(final String source, final String target) { 47 // e.g. linearray[4] == "Hello\n" 48 // e.g. linehash.get("Hello\n") == 4 49 50 // "\x00" is a valid character, but various debuggers don't like it. 51 // So we'll insert a junk entry to avoid generating a null character. 52 lines = new ArrayList<String>(); 53 lines.add(""); 54 55 Map<String, Integer> linehash = new HashMap<String, Integer>(); 56 sourceMap = linesToCharsMunge(source, lines, linehash); 57 targetMap = linesToCharsMunge(target, lines, linehash); 58 } 59 60 /** 61 * Rehydrate the text in a diff from a string of line hashes to real lines 62 * of text. 63 * 64 * @param diffs 65 * List of Difference objects 66 */ 67 public void restore(final List<?> diffs) { 68 StringBuilder text = new StringBuilder(); 69 for (int x = 0; x < diffs.size(); x++) { 70 Difference diff = (Difference) diffs.get(x); 71 String chars = diff.getText(); 72 73 text.delete(0, text.length()); 74 for (int y = 0; y < chars.length(); y++) { 75 text.append(lines.get(chars.charAt(y))); 76 } 77 78 diff.setText(text.toString()); 79 } 80 } 81 82 /** 83 * @return the sourceMap 84 */ 85 public String getSourceMap() { 86 return sourceMap; 87 } 88 89 /** 90 * @return the targetMap 91 */ 92 public String getTargetMap() { 93 return targetMap; 94 } 95 96 /** 97 * @return the lines 98 */ 99 public List<String> getLines() { 100 return lines; 101 } 102 103 /** 104 * Split a text into a list of strings. Reduce the texts to a string of 105 * hashes where each Unicode character represents one line. 106 * 107 * @param text 108 * String to encode 109 * @param linearray 110 * List of unique strings 111 * @param linehash 112 * Map of strings to indices 113 * @return Encoded string 114 */ 115 private String linesToCharsMunge(final String text, List<String> linearray, Map<String, Integer> linehash) { 116 StringBuilder buf = new StringBuilder(); 117 String work = text; 118 // text.split('\n') would work fine, but would temporarily double our 119 // memory footprint for minimal speed improvement. 120 while (work.length() != 0) { 121 int i = work.indexOf('\n'); 122 if (i == -1) { 123 i = work.length() - 1; 124 } 125 String line = work.substring(0, i + 1); 126 work = work.substring(i + 1); 127 if (linehash.containsKey(line)) { 128 Integer charInt = linehash.get(line); 129 buf.append(String.valueOf((char) charInt.intValue())); 130 } else { 131 linearray.add(line); 132 linehash.put(line, Integer.valueOf(linearray.size() - 1)); 133 buf.append(String.valueOf((char) (linearray.size() - 1))); 134 } 135 } 136 return buf.toString(); 137 } 138 139 /** 140 * Each character in sourceMap provides an integer representation of the 141 * line in the original. 142 */ 143 private String sourceMap; 144 145 /** 146 * Each character in sourceMap provides an integer representation of the 147 * line in the original. 148 */ 149 private String targetMap; 150 151 /** 152 * The lines from the original. Useful for reconstitution. 153 */ 154 private List<String> lines; 155 } 156