NumberShaper.java |
1 /** 2 * Distribution License: 3 * JSword is free software; you can redistribute it and/or modify it under 4 * the terms of the GNU Lesser General Public License, version 2.1 or later 5 * as published by the Free Software Foundation. This program is distributed 6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even 7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 8 * See the GNU Lesser General Public License for more details. 9 * 10 * The License is available on the internet at: 11 * http://www.gnu.org/copyleft/lgpl.html 12 * or by writing to: 13 * Free Software Foundation, Inc. 14 * 59 Temple Place - Suite 330 15 * Boston, MA 02111-1307, USA 16 * 17 * © CrossWire Bible Society, 2007 - 2016 18 * 19 */ 20 package org.crosswire.common.icu; 21 22 import java.io.Serializable; 23 import java.util.Locale; 24 25 import org.crosswire.jsword.internationalisation.LocaleProviderManager; 26 27 /** 28 * NumberShaper changes numbers from one number system to another. That is, the 29 * numbers 0-9 have different representations in some locales. This means that 30 * they have different code points. For example, Eastern Arabic numbers are from 31 * ۰ - ۹. 32 * <p> 33 * Internally, numbers will be represented with 0-9, but externally they should 34 * show as a user wishes. Further user input may, optionally, use the external 35 * form. 36 * </p> 37 * <p> 38 * This shaper has special behavior for Arabic numbers that are in the form 39 * "12:34" as this is taken as chapter:verse. Normally, a ':' is treated as a 40 * numeric separator, this results in "12:34", but for verses it should be 41 * "34:12". That is, Arabic, numbers are left-to-right (even though the rest of 42 * the script is right-to-left) and the ':' as a numeric separator does not 43 * change that. So to get around this we mark the ':' as a right-to-left 44 * character. 45 * </p> 46 * <p> 47 * See also: com.ibm.icu.text.ArabicShaping 48 * </p> 49 * 50 * @see java.awt.font.NumericShaper 51 * @see gnu.lgpl.License The GNU Lesser General Public License for details. 52 * @author DM Smith 53 */ 54 public class NumberShaper implements Serializable { 55 /** 56 * Create a shaper that is appropriate for the user's locale. 57 */ 58 public NumberShaper() { 59 this.nineShape = '\u0000'; 60 } 61 62 /** 63 * Determine whether shaping is possible. 64 * 65 * @return whether shaping back to 0-9 is possible. 66 */ 67 public boolean canShape() { 68 // return arabicShaper != null || numericShaper != null || getNine() != 69 // '9'; 70 return getNine() != '9'; 71 } 72 73 /** 74 * Replace 0-9 in the input with representations appropriate for the script. 75 * 76 * @param input 77 * the text to be transformed 78 * @return the transformed text 79 */ 80 public String shape(String input) { 81 if (input == null) { 82 return input; 83 } 84 85 char[] src = input.toCharArray(); 86 boolean[] transformed = new boolean[1]; 87 transformed[0] = false; 88 char[] dest = shaped(src, transformed); 89 if (transformed[0]) { 90 return new String(dest); 91 } 92 93 return input; 94 } 95 96 /** 97 * Determine whether shaping back to 0-9 is possible. 98 * 99 * @return whether shaping back to 0-9 is possible. 100 */ 101 public boolean canUnshape() { 102 return getNine() != '9'; 103 } 104 105 /** 106 * Replace script representations of numbers with 0-9. 107 * 108 * @param input 109 * the text to be transformed 110 * @return the transformed text 111 */ 112 public String unshape(String input) { 113 char[] src = input.toCharArray(); 114 boolean[] transformed = new boolean[1]; 115 transformed[0] = false; 116 char[] dest = unshaped(src, transformed); 117 if (transformed[0]) { 118 return new String(dest); 119 } 120 121 return input; 122 } 123 124 /** 125 * Perform shaping back to 0-9. 126 * @param src 127 * the text to transform 128 * @param transformed 129 * an input parameter of one boolean that can hold whether there 130 * was a transformation 131 * @return the unshaped text 132 */ 133 private char[] unshaped(char[] src, boolean[] transformed) { 134 int nine = getNine(); 135 if (nine == '9') { 136 return src; 137 } 138 139 int zero = nine - 9; 140 return transform(src, zero, nine, '9' - nine, transformed); 141 } 142 143 /** 144 * @param src 145 * the text to transform 146 * @param transformed 147 * an input parameter of one boolean that can hold whether there 148 * was a transformation 149 * @return the shaped string 150 */ 151 private char[] shaped(char[] src, boolean[] transformed) { 152 char nine = getNine(); 153 if (nine == '9') { 154 return src; 155 } 156 157 return transform(src, '0', '9', nine - '9', transformed); 158 } 159 160 /** 161 * Transform either to or from 0-9 and the script representation, returning 162 * the result and true when at least one character is transformed. 163 * 164 * @param src 165 * the text to transform 166 * @param zero 167 * zero in the source representation 168 * @param nine 169 * nine in the source representation 170 * @param offset 171 * the distance between zeros in the source and target 172 * representation 173 * @param transformed 174 * an input parameter of one boolean that can hold whether there 175 * was a transformation 176 * @return the shaped string 177 */ 178 private char[] transform(char[] src, int zero, int nine, int offset, boolean[] transformed) { 179 char[] text = src; 180 181 // offset > 0 when we are going from 0-9 182 // FIXME(DMS): C:V should be shown as V:C in Farsi. 183 /* 184 int srcLen = text.length; 185 int destLen = srcLen; 186 if (offset > 0 && srcLen > 3) { 187 // count the number of ':' flanked by '0' to '9' 188 // each one of these is going 189 // to be bracketed with RLO and PDF. 190 for (int i = 1; i < srcLen - 1; i++) { 191 char prevChar = text[i - 1]; 192 char curChar = text[i]; 193 char nextChar = text[i + 1]; 194 if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') { 195 destLen += 2; 196 } 197 } 198 199 // Did we actually see a ':' 200 if (destLen != srcLen) { 201 transformed[0] = true; 202 int sPos = 0; 203 int dPos = 0; 204 int stop = srcLen - 1; // ensure look-ahead 205 char[] dest = new char[destLen]; 206 dest[dPos++] = text[sPos++]; 207 while (sPos < stop) { 208 char prevChar = text[sPos - 1]; 209 char nextChar = text[sPos + 1]; 210 char curChar = text[sPos++]; 211 if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') { 212 dest[dPos++] = ''; // RLO 213 dest[dPos++] = curChar; 214 dest[dPos++] = ''; // PDF 215 } else if (curChar >= zero && curChar <= nine) { 216 dest[dPos++] = (char) (curChar + offset); 217 } else { 218 dest[dPos++] = curChar; 219 } 220 } 221 // copy the rest 222 while (sPos < srcLen) { 223 dest[dPos++] = text[sPos++]; 224 } 225 return dest; 226 } 227 } 228 // Are we going to '0' - '9' with embedded, specially marked ':' 229 else if (offset < 0 && srcLen > 3) { 230 for (int sPos = 0; sPos < srcLen - 2; sPos++) { 231 if (text[sPos] == '' && text[sPos + 1] == ':' && text[sPos + 2] == '') { 232 destLen -= 2; 233 sPos += 2; 234 } 235 } 236 237 // Did we actually see a ':' 238 if (destLen != srcLen) { 239 transformed[0] = true; 240 char[] dest = new char[destLen]; 241 int sPos = 0; 242 int dPos = 0; 243 int stop = srcLen - 2; // ensure look-ahead 244 while (sPos < stop) { 245 char curChar = text[sPos++]; 246 if (curChar == '' && text[sPos] == ':' && text[sPos + 1] == '') { 247 dest[dPos++] = ':'; 248 sPos += 2; // skip the whole pattern 249 } else if (curChar >= zero && curChar <= nine) { 250 dest[dPos++] = (char) (curChar + offset); 251 } else { 252 dest[dPos++] = curChar; 253 } 254 } 255 256 // copy the rest 257 while (sPos < srcLen) { 258 dest[dPos++] = text[sPos++]; 259 } 260 261 return dest; 262 } 263 } 264 */ 265 int len = src.length; 266 for (int i = 0; i < len; i++) { 267 char c = text[i]; 268 if (c >= zero && c <= nine) { 269 text[i] = (char) (c + offset); 270 transformed[0] = true; 271 } 272 } 273 274 return text; 275 } 276 277 /** 278 * Establish nine for the script. There are scripts that don't have zeroes. 279 * 280 * @return the representation for 9 in the script 281 */ 282 private char getNine() { 283 if (nineShape == '\u0000') { 284 nineShape = '9'; 285 Locale locale = LocaleProviderManager.getLocale(); 286 if ("fa".equals(locale.getLanguage())) { 287 nineShape = '\u06f9'; 288 } else if ("ar".equals(locale.getLanguage())) { 289 nineShape = '\u0669'; 290 } 291 } 292 return nineShape; 293 } 294 295 296 /** 297 * Nine for this shaper. 298 */ 299 private char nineShape; 300 301 /** 302 * Serialization ID 303 */ 304 private static final long serialVersionUID = -8408052851113601251L; 305 } 306