| NumberShaper.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the Internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2007
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: org.eclipse.jdt.ui.prefs 1178 2006-11-06 12:48:02Z dmsmith $
21 */
22
23 package org.crosswire.common.icu;
24
25 import java.io.Serializable;
26 import java.util.Locale;
27
28 /**
29 * NumberShaper changes numbers from one number system to another. That is, the
30 * numbers 0-9 have different representations in some locales. This means that
31 * they have different code points. For example, Eastern Arabic numbers are from
32 * ? - ?.
33 * <p>
34 * Internally, numbers will be represented with 0-9, but externally they should
35 * show as a user wishes. Further user input may, optionally, use the external
36 * form.
37 * </p>
38 * <p>
39 * This shaper has special behavior for Arabic numbers that are in the form
40 * "12:34" as this is taken as chapter:verse. Normally, a ':' is treated as a
41 * numeric separator, this results in "12:34", but for verses it should be
42 * "34:12". That is, Arabic, numbers are left-to-right (even though the rest of
43 * the script is right-to-left) and the ':' as a numeric separator does not
44 * change that. So to get around this we mark the ':' as a right-to-left
45 * character.
46 * </p>
47 *
48 * @see java.awt.font.NumericShaper
49 * @see com.ibm.icu.text.ArabicShaping
50 * @see gnu.lgpl.License for license details.<br>
51 * The copyright to this program is held by it's authors.
52 * @author DM Smith [dmsmith555 at yahoo dot com]
53 */
54 public class NumberShaper implements Serializable {
55 /**
56 * Create a shaper that is appropriate for the user's locale.
57 */
58 public NumberShaper() {
59 this(Locale.getDefault());
60 }
61
62 /**
63 * Create a shaper that is appropriate for the given locale.
64 *
65 * @param locale
66 * the requested Locale
67 */
68 public NumberShaper(Locale locale) {
69 this.locale = locale;
70 this.nineShape = '\u0000';
71 }
72
73 /**
74 * Determine whether shaping is possible.
75 *
76 * @return whether shaping back to 0-9 is possible.
77 */
78 public boolean canShape() {
79 // return arabicShaper != null || numericShaper != null || getNine() !=
80 // '9';
81 return getNine() != '9';
82 }
83
84 /**
85 * Replace 0-9 in the input with representations appropriate for the script.
86 *
87 * @param input
88 * the text to be transformed
89 * @return the transformed text
90 */
91 public String shape(String input) {
92 if (input == null) {
93 return input;
94 }
95
96 char[] src = input.toCharArray();
97 boolean[] transformed = new boolean[1];
98 transformed[0] = false;
99 char[] dest = shaped(src, transformed);
100 if (transformed[0]) {
101 return new String(dest);
102 }
103
104 return input;
105 }
106
107 /**
108 * Determine whether shaping back to 0-9 is possible.
109 *
110 * @return whether shaping back to 0-9 is possible.
111 */
112 public boolean canUnshape() {
113 return getNine() != '9';
114 }
115
116 /**
117 * Replace script representations of numbers with 0-9.
118 *
119 * @param input
120 * the text to be transformed
121 * @return the transformed text
122 */
123 public String unshape(String input) {
124 char[] src = input.toCharArray();
125 boolean[] transformed = new boolean[1];
126 transformed[0] = false;
127 char[] dest = unshaped(src, transformed);
128 if (transformed[0]) {
129 return new String(dest);
130 }
131
132 return input;
133 }
134
135 /**
136 * Perform shaping back to 0-9.
137 */
138 private char[] unshaped(char[] src, boolean[] transformed) {
139 int nine = getNine();
140 if (nine == '9') {
141 return src;
142 }
143
144 int zero = nine - 9;
145 return transform(src, zero, nine, '9' - nine, transformed);
146 }
147
148 /**
149 * @param src
150 * @param transformed
151 * @return the shaped string
152 */
153 private char[] shaped(char[] src, boolean[] transformed) {
154 char nine = getNine();
155 if (nine == '9') {
156 return src;
157 }
158
159 return transform(src, '0', '9', nine - '9', transformed);
160 }
161
162 /**
163 * Transform either to or from 0-9 and the script representation, returning
164 * the result and true when at least one character is transformed.
165 *
166 * @param src
167 * the text to transform
168 * @param zero
169 * zero in the source representation
170 * @param nine
171 * nine in the source representation
172 * @param offset
173 * the distance between zeros in the source and target
174 * representation
175 * @param transformed
176 * an input parameter of one boolean that can hold whether there
177 * was a transformation
178 * @return the shaped string
179 */
180 private char[] transform(char[] src, int zero, int nine, int offset, boolean[] transformed) {
181 char[] text = src;
182
183 // offset > 0 when we are going from 0-9
184 // FIXME(DMS): C:V should be shown as V:C in Farsi.
185 /*
186 int srcLen = text.length;
187 int destLen = srcLen;
188 if (offset > 0 && srcLen > 3) {
189 // count the number of ':' flanked by '0' to '9'
190 // each one of these is going
191 // to be bracketed with RLO and PDF.
192 for (int i = 1; i < srcLen - 1; i++) {
193 char prevChar = text[i - 1];
194 char curChar = text[i];
195 char nextChar = text[i + 1];
196 if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') {
197 destLen += 2;
198 }
199 }
200
201 // Did we actually see a ':'
202 if (destLen != srcLen) {
203 transformed[0] = true;
204 int sPos = 0;
205 int dPos = 0;
206 int stop = srcLen - 1; // ensure look-ahead
207 char[] dest = new char[destLen];
208 dest[dPos++] = text[sPos++];
209 while (sPos < stop) {
210 char prevChar = text[sPos - 1];
211 char nextChar = text[sPos + 1];
212 char curChar = text[sPos++];
213 if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') {
214 dest[dPos++] = '?'; // RLO
215 dest[dPos++] = curChar;
216 dest[dPos++] = '?'; // PDF
217 } else if (curChar >= zero && curChar <= nine) {
218 dest[dPos++] = (char) (curChar + offset);
219 } else {
220 dest[dPos++] = curChar;
221 }
222 }
223 // copy the rest
224 while (sPos < srcLen) {
225 dest[dPos++] = text[sPos++];
226 }
227 return dest;
228 }
229 }
230 // Are we going to '0' - '9' with embedded, specially marked ':'
231 else if (offset < 0 && srcLen > 3) {
232 for (int sPos = 0; sPos < srcLen - 2; sPos++) {
233 if (text[sPos] == '?' && text[sPos + 1] == ':' && text[sPos + 2] == '?') {
234 destLen -= 2;
235 sPos += 2;
236 }
237 }
238
239 // Did we actually see a '?:?'
240 if (destLen != srcLen) {
241 transformed[0] = true;
242 char[] dest = new char[destLen];
243 int sPos = 0;
244 int dPos = 0;
245 int stop = srcLen - 2; // ensure look-ahead
246 while (sPos < stop) {
247 char curChar = text[sPos++];
248 if (curChar == '?' && text[sPos] == ':' && text[sPos + 1] == '?') {
249 dest[dPos++] = ':';
250 sPos += 2; // skip the whole pattern
251 } else if (curChar >= zero && curChar <= nine) {
252 dest[dPos++] = (char) (curChar + offset);
253 } else {
254 dest[dPos++] = curChar;
255 }
256 }
257
258 // copy the rest
259 while (sPos < srcLen) {
260 dest[dPos++] = text[sPos++];
261 }
262
263 return dest;
264 }
265 }
266 */
267 int len = src.length;
268 for (int i = 0; i < len; i++) {
269 char c = text[i];
270 if (c >= zero && c <= nine) {
271 text[i] = (char) (c + offset);
272 transformed[0] = true;
273 }
274 }
275
276 return text;
277 }
278
279 /**
280 * Establish nine for the script. There are scripts that don't have zeroes.
281 *
282 * @return the representation for 9 in the script
283 */
284 private char getNine() {
285 if (nineShape == '\u0000') {
286 nineShape = '9';
287 if ("fa".equals(locale.getLanguage())) {
288 nineShape = '\u06f9';
289 } else if ("ar".equals(locale.getLanguage())) {
290 nineShape = '\u0669';
291 }
292 }
293 return nineShape;
294 }
295
296 /**
297 * The locale for this shaper.
298 */
299 private Locale locale;
300
301 /**
302 * Nine for this shaper.
303 */
304 private char nineShape;
305
306 /**
307 * Serialization ID
308 */
309 private static final long serialVersionUID = -8408052851113601251L;
310 }
311