1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the Internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2007
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: org.eclipse.jdt.ui.prefs 1178 2006-11-06 12:48:02Z dmsmith $
21   */
22  
23  package org.crosswire.common.icu;
24  
25  import java.io.Serializable;
26  import java.util.Locale;
27  
28  /**
29   * NumberShaper changes numbers from one number system to another. That is, the
30   * numbers 0-9 have different representations in some locales. This means that
31   * they have different code points. For example, Eastern Arabic numbers are from
32   * ? - ?.
33   * <p>
34   * Internally, numbers will be represented with 0-9, but externally they should
35   * show as a user wishes. Further user input may, optionally, use the external
36   * form.
37   * </p>
38   * <p>
39   * This shaper has special behavior for Arabic numbers that are in the form
40   * "12:34" as this is taken as chapter:verse. Normally, a ':' is treated as a
41   * numeric separator, this results in "12:34", but for verses it should be
42   * "34:12". That is, Arabic, numbers are left-to-right (even though the rest of
43   * the script is right-to-left) and the ':' as a numeric separator does not
44   * change that. So to get around this we mark the ':' as a right-to-left
45   * character.
46   * </p>
47   * 
48   * @see java.awt.font.NumericShaper
49   * @see com.ibm.icu.text.ArabicShaping
50   * @see gnu.lgpl.License for license details.<br>
51   *      The copyright to this program is held by it's authors.
52   * @author DM Smith [dmsmith555 at yahoo dot com]
53   */
54  public class NumberShaper implements Serializable {
55      /**
56       * Create a shaper that is appropriate for the user's locale.
57       */
58      public NumberShaper() {
59          this(Locale.getDefault());
60      }
61  
62      /**
63       * Create a shaper that is appropriate for the given locale.
64       * 
65       * @param locale
66       *            the requested Locale
67       */
68      public NumberShaper(Locale locale) {
69          this.locale = locale;
70          this.nineShape = '\u0000';
71      }
72  
73      /**
74       * Determine whether shaping is possible.
75       * 
76       * @return whether shaping back to 0-9 is possible.
77       */
78      public boolean canShape() {
79          // return arabicShaper != null || numericShaper != null || getNine() !=
80          // '9';
81          return getNine() != '9';
82      }
83  
84      /**
85       * Replace 0-9 in the input with representations appropriate for the script.
86       * 
87       * @param input
88       *            the text to be transformed
89       * @return the transformed text
90       */
91      public String shape(String input) {
92          if (input == null) {
93              return input;
94          }
95  
96          char[] src = input.toCharArray();
97          boolean[] transformed = new boolean[1];
98          transformed[0] = false;
99          char[] dest = shaped(src, transformed);
100         if (transformed[0]) {
101             return new String(dest);
102         }
103 
104         return input;
105     }
106 
107     /**
108      * Determine whether shaping back to 0-9 is possible.
109      * 
110      * @return whether shaping back to 0-9 is possible.
111      */
112     public boolean canUnshape() {
113         return getNine() != '9';
114     }
115 
116     /**
117      * Replace script representations of numbers with 0-9.
118      * 
119      * @param input
120      *            the text to be transformed
121      * @return the transformed text
122      */
123     public String unshape(String input) {
124         char[] src = input.toCharArray();
125         boolean[] transformed = new boolean[1];
126         transformed[0] = false;
127         char[] dest = unshaped(src, transformed);
128         if (transformed[0]) {
129             return new String(dest);
130         }
131 
132         return input;
133     }
134 
135     /**
136      * Perform shaping back to 0-9.
137      */
138     private char[] unshaped(char[] src, boolean[] transformed) {
139         int nine = getNine();
140         if (nine == '9') {
141             return src;
142         }
143 
144         int zero = nine - 9;
145         return transform(src, zero, nine, '9' - nine, transformed);
146     }
147 
148     /**
149      * @param src
150      * @param transformed
151      * @return the shaped string
152      */
153     private char[] shaped(char[] src, boolean[] transformed) {
154         char nine = getNine();
155         if (nine == '9') {
156             return src;
157         }
158 
159         return transform(src, '0', '9', nine - '9', transformed);
160     }
161 
162     /**
163      * Transform either to or from 0-9 and the script representation, returning
164      * the result and true when at least one character is transformed.
165      * 
166      * @param src
167      *            the text to transform
168      * @param zero
169      *            zero in the source representation
170      * @param nine
171      *            nine in the source representation
172      * @param offset
173      *            the distance between zeros in the source and target
174      *            representation
175      * @param transformed
176      *            an input parameter of one boolean that can hold whether there
177      *            was a transformation
178      * @return the shaped string
179      */
180     private char[] transform(char[] src, int zero, int nine, int offset, boolean[] transformed) {
181         char[] text = src;
182 
183         // offset > 0 when we are going from 0-9
184         // FIXME(DMS): C:V should be shown as V:C in Farsi.
185 /*
186         int srcLen = text.length;
187         int destLen = srcLen;
188         if (offset > 0 && srcLen > 3) {
189             // count the number of ':' flanked by '0' to '9'
190             // each one of these is going
191             // to be bracketed with RLO and PDF.
192             for (int i = 1; i < srcLen - 1; i++) {
193                 char prevChar = text[i - 1];
194                 char curChar = text[i];
195                 char nextChar = text[i + 1];
196                 if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') {
197                     destLen += 2;
198                 }
199             }
200 
201             // Did we actually see a ':'
202             if (destLen != srcLen) {
203                 transformed[0] = true;
204                 int sPos = 0;
205                 int dPos = 0;
206                 int stop = srcLen - 1; // ensure look-ahead
207                 char[] dest = new char[destLen];
208                 dest[dPos++] = text[sPos++];
209                 while (sPos < stop) {
210                     char prevChar = text[sPos - 1];
211                     char nextChar = text[sPos + 1];
212                     char curChar = text[sPos++];
213                     if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') {
214                         dest[dPos++] = '?'; // RLO
215                         dest[dPos++] = curChar;
216                         dest[dPos++] = '?'; // PDF
217                     } else if (curChar >= zero && curChar <= nine) {
218                         dest[dPos++] = (char) (curChar + offset);
219                     } else {
220                         dest[dPos++] = curChar;
221                     }
222                 }
223                 // copy the rest
224                 while (sPos < srcLen) {
225                     dest[dPos++] = text[sPos++];
226                 }
227                 return dest;
228             }
229         }
230         // Are we going to '0' - '9' with embedded, specially marked ':'
231         else if (offset < 0 && srcLen > 3) {
232             for (int sPos = 0; sPos < srcLen - 2; sPos++) {
233                 if (text[sPos] == '?' && text[sPos + 1] == ':' && text[sPos + 2] == '?') {
234                     destLen -= 2;
235                     sPos += 2;
236                 }
237             }
238 
239             // Did we actually see a '?:?'
240             if (destLen != srcLen) {
241                 transformed[0] = true;
242                 char[] dest = new char[destLen];
243                 int sPos = 0;
244                 int dPos = 0;
245                 int stop = srcLen - 2; // ensure look-ahead
246                 while (sPos < stop) {
247                     char curChar = text[sPos++];
248                     if (curChar == '?' && text[sPos] == ':' && text[sPos + 1] == '?') {
249                         dest[dPos++] = ':';
250                         sPos += 2; // skip the whole pattern
251                     } else if (curChar >= zero && curChar <= nine) {
252                         dest[dPos++] = (char) (curChar + offset);
253                     } else {
254                         dest[dPos++] = curChar;
255                     }
256                 }
257 
258                 // copy the rest
259                 while (sPos < srcLen) {
260                     dest[dPos++] = text[sPos++];
261                 }
262 
263                 return dest;
264             }
265         }
266 */
267         int len = src.length;
268         for (int i = 0; i < len; i++) {
269             char c = text[i];
270             if (c >= zero && c <= nine) {
271                 text[i] = (char) (c + offset);
272                 transformed[0] = true;
273             }
274         }
275 
276         return text;
277     }
278 
279     /**
280      * Establish nine for the script. There are scripts that don't have zeroes.
281      * 
282      * @return the representation for 9 in the script
283      */
284     private char getNine() {
285         if (nineShape == '\u0000') {
286             nineShape = '9';
287             if ("fa".equals(locale.getLanguage())) {
288                 nineShape = '\u06f9';
289             } else if ("ar".equals(locale.getLanguage())) {
290                 nineShape = '\u0669';
291             }
292         }
293         return nineShape;
294     }
295 
296     /**
297      * The locale for this shaper.
298      */
299     private Locale locale;
300 
301     /**
302      * Nine for this shaper.
303      */
304     private char nineShape;
305 
306     /**
307      * Serialization ID
308      */
309     private static final long serialVersionUID = -8408052851113601251L;
310 }
311