| NumberShaper.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 or later
5 * as published by the Free Software Foundation. This program is distributed
6 * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * © CrossWire Bible Society, 2007 - 2016
18 *
19 */
20 package org.crosswire.common.icu;
21
22 import java.io.Serializable;
23 import java.util.Locale;
24
25 import org.crosswire.jsword.internationalisation.LocaleProviderManager;
26
27 /**
28 * NumberShaper changes numbers from one number system to another. That is, the
29 * numbers 0-9 have different representations in some locales. This means that
30 * they have different code points. For example, Eastern Arabic numbers are from
31 * ۰ - ۹.
32 * <p>
33 * Internally, numbers will be represented with 0-9, but externally they should
34 * show as a user wishes. Further user input may, optionally, use the external
35 * form.
36 * </p>
37 * <p>
38 * This shaper has special behavior for Arabic numbers that are in the form
39 * "12:34" as this is taken as chapter:verse. Normally, a ':' is treated as a
40 * numeric separator, this results in "12:34", but for verses it should be
41 * "34:12". That is, Arabic, numbers are left-to-right (even though the rest of
42 * the script is right-to-left) and the ':' as a numeric separator does not
43 * change that. So to get around this we mark the ':' as a right-to-left
44 * character.
45 * </p>
46 * <p>
47 * See also: com.ibm.icu.text.ArabicShaping
48 * </p>
49 *
50 * @see java.awt.font.NumericShaper
51 * @see gnu.lgpl.License The GNU Lesser General Public License for details.
52 * @author DM Smith
53 */
54 public class NumberShaper implements Serializable {
55 /**
56 * Create a shaper that is appropriate for the user's locale.
57 */
58 public NumberShaper() {
59 this.nineShape = '\u0000';
60 }
61
62 /**
63 * Determine whether shaping is possible.
64 *
65 * @return whether shaping back to 0-9 is possible.
66 */
67 public boolean canShape() {
68 // return arabicShaper != null || numericShaper != null || getNine() !=
69 // '9';
70 return getNine() != '9';
71 }
72
73 /**
74 * Replace 0-9 in the input with representations appropriate for the script.
75 *
76 * @param input
77 * the text to be transformed
78 * @return the transformed text
79 */
80 public String shape(String input) {
81 if (input == null) {
82 return input;
83 }
84
85 char[] src = input.toCharArray();
86 boolean[] transformed = new boolean[1];
87 transformed[0] = false;
88 char[] dest = shaped(src, transformed);
89 if (transformed[0]) {
90 return new String(dest);
91 }
92
93 return input;
94 }
95
96 /**
97 * Determine whether shaping back to 0-9 is possible.
98 *
99 * @return whether shaping back to 0-9 is possible.
100 */
101 public boolean canUnshape() {
102 return getNine() != '9';
103 }
104
105 /**
106 * Replace script representations of numbers with 0-9.
107 *
108 * @param input
109 * the text to be transformed
110 * @return the transformed text
111 */
112 public String unshape(String input) {
113 char[] src = input.toCharArray();
114 boolean[] transformed = new boolean[1];
115 transformed[0] = false;
116 char[] dest = unshaped(src, transformed);
117 if (transformed[0]) {
118 return new String(dest);
119 }
120
121 return input;
122 }
123
124 /**
125 * Perform shaping back to 0-9.
126 * @param src
127 * the text to transform
128 * @param transformed
129 * an input parameter of one boolean that can hold whether there
130 * was a transformation
131 * @return the unshaped text
132 */
133 private char[] unshaped(char[] src, boolean[] transformed) {
134 int nine = getNine();
135 if (nine == '9') {
136 return src;
137 }
138
139 int zero = nine - 9;
140 return transform(src, zero, nine, '9' - nine, transformed);
141 }
142
143 /**
144 * @param src
145 * the text to transform
146 * @param transformed
147 * an input parameter of one boolean that can hold whether there
148 * was a transformation
149 * @return the shaped string
150 */
151 private char[] shaped(char[] src, boolean[] transformed) {
152 char nine = getNine();
153 if (nine == '9') {
154 return src;
155 }
156
157 return transform(src, '0', '9', nine - '9', transformed);
158 }
159
160 /**
161 * Transform either to or from 0-9 and the script representation, returning
162 * the result and true when at least one character is transformed.
163 *
164 * @param src
165 * the text to transform
166 * @param zero
167 * zero in the source representation
168 * @param nine
169 * nine in the source representation
170 * @param offset
171 * the distance between zeros in the source and target
172 * representation
173 * @param transformed
174 * an input parameter of one boolean that can hold whether there
175 * was a transformation
176 * @return the shaped string
177 */
178 private char[] transform(char[] src, int zero, int nine, int offset, boolean[] transformed) {
179 char[] text = src;
180
181 // offset > 0 when we are going from 0-9
182 // FIXME(DMS): C:V should be shown as V:C in Farsi.
183 /*
184 int srcLen = text.length;
185 int destLen = srcLen;
186 if (offset > 0 && srcLen > 3) {
187 // count the number of ':' flanked by '0' to '9'
188 // each one of these is going
189 // to be bracketed with RLO and PDF.
190 for (int i = 1; i < srcLen - 1; i++) {
191 char prevChar = text[i - 1];
192 char curChar = text[i];
193 char nextChar = text[i + 1];
194 if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') {
195 destLen += 2;
196 }
197 }
198
199 // Did we actually see a ':'
200 if (destLen != srcLen) {
201 transformed[0] = true;
202 int sPos = 0;
203 int dPos = 0;
204 int stop = srcLen - 1; // ensure look-ahead
205 char[] dest = new char[destLen];
206 dest[dPos++] = text[sPos++];
207 while (sPos < stop) {
208 char prevChar = text[sPos - 1];
209 char nextChar = text[sPos + 1];
210 char curChar = text[sPos++];
211 if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') {
212 dest[dPos++] = ''; // RLO
213 dest[dPos++] = curChar;
214 dest[dPos++] = ''; // PDF
215 } else if (curChar >= zero && curChar <= nine) {
216 dest[dPos++] = (char) (curChar + offset);
217 } else {
218 dest[dPos++] = curChar;
219 }
220 }
221 // copy the rest
222 while (sPos < srcLen) {
223 dest[dPos++] = text[sPos++];
224 }
225 return dest;
226 }
227 }
228 // Are we going to '0' - '9' with embedded, specially marked ':'
229 else if (offset < 0 && srcLen > 3) {
230 for (int sPos = 0; sPos < srcLen - 2; sPos++) {
231 if (text[sPos] == '' && text[sPos + 1] == ':' && text[sPos + 2] == '') {
232 destLen -= 2;
233 sPos += 2;
234 }
235 }
236
237 // Did we actually see a ':'
238 if (destLen != srcLen) {
239 transformed[0] = true;
240 char[] dest = new char[destLen];
241 int sPos = 0;
242 int dPos = 0;
243 int stop = srcLen - 2; // ensure look-ahead
244 while (sPos < stop) {
245 char curChar = text[sPos++];
246 if (curChar == '' && text[sPos] == ':' && text[sPos + 1] == '') {
247 dest[dPos++] = ':';
248 sPos += 2; // skip the whole pattern
249 } else if (curChar >= zero && curChar <= nine) {
250 dest[dPos++] = (char) (curChar + offset);
251 } else {
252 dest[dPos++] = curChar;
253 }
254 }
255
256 // copy the rest
257 while (sPos < srcLen) {
258 dest[dPos++] = text[sPos++];
259 }
260
261 return dest;
262 }
263 }
264 */
265 int len = src.length;
266 for (int i = 0; i < len; i++) {
267 char c = text[i];
268 if (c >= zero && c <= nine) {
269 text[i] = (char) (c + offset);
270 transformed[0] = true;
271 }
272 }
273
274 return text;
275 }
276
277 /**
278 * Establish nine for the script. There are scripts that don't have zeroes.
279 *
280 * @return the representation for 9 in the script
281 */
282 private char getNine() {
283 if (nineShape == '\u0000') {
284 nineShape = '9';
285 Locale locale = LocaleProviderManager.getLocale();
286 if ("fa".equals(locale.getLanguage())) {
287 nineShape = '\u06f9';
288 } else if ("ar".equals(locale.getLanguage())) {
289 nineShape = '\u0669';
290 }
291 }
292 return nineShape;
293 }
294
295
296 /**
297 * Nine for this shaper.
298 */
299 private char nineShape;
300
301 /**
302 * Serialization ID
303 */
304 private static final long serialVersionUID = -8408052851113601251L;
305 }
306