1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 or later
5    * as published by the Free Software Foundation. This program is distributed
6    * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7    * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *      http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * © CrossWire Bible Society, 2007 - 2016
18   *
19   */
20  package org.crosswire.common.icu;
21  
22  import java.io.Serializable;
23  import java.util.Locale;
24  
25  import org.crosswire.jsword.internationalisation.LocaleProviderManager;
26  
27  /**
28   * NumberShaper changes numbers from one number system to another. That is, the
29   * numbers 0-9 have different representations in some locales. This means that
30   * they have different code points. For example, Eastern Arabic numbers are from
31   * ۰ - ۹.
32   * <p>
33   * Internally, numbers will be represented with 0-9, but externally they should
34   * show as a user wishes. Further user input may, optionally, use the external
35   * form.
36   * </p>
37   * <p>
38   * This shaper has special behavior for Arabic numbers that are in the form
39   * "12:34" as this is taken as chapter:verse. Normally, a ':' is treated as a
40   * numeric separator, this results in "12:34", but for verses it should be
41   * "34:12". That is, Arabic, numbers are left-to-right (even though the rest of
42   * the script is right-to-left) and the ':' as a numeric separator does not
43   * change that. So to get around this we mark the ':' as a right-to-left
44   * character.
45   * </p>
46   * <p>
47   * See also: com.ibm.icu.text.ArabicShaping
48   * </p>
49   * 
50   * @see java.awt.font.NumericShaper
51   * @see gnu.lgpl.License The GNU Lesser General Public License for details.
52   * @author DM Smith
53   */
54  public class NumberShaper implements Serializable {
55      /**
56       * Create a shaper that is appropriate for the user's locale.
57       */
58      public NumberShaper() {
59          this.nineShape = '\u0000';
60      }
61  
62      /**
63       * Determine whether shaping is possible.
64       * 
65       * @return whether shaping back to 0-9 is possible.
66       */
67      public boolean canShape() {
68          // return arabicShaper != null || numericShaper != null || getNine() !=
69          // '9';
70          return getNine() != '9';
71      }
72  
73      /**
74       * Replace 0-9 in the input with representations appropriate for the script.
75       * 
76       * @param input
77       *            the text to be transformed
78       * @return the transformed text
79       */
80      public String shape(String input) {
81          if (input == null) {
82              return input;
83          }
84  
85          char[] src = input.toCharArray();
86          boolean[] transformed = new boolean[1];
87          transformed[0] = false;
88          char[] dest = shaped(src, transformed);
89          if (transformed[0]) {
90              return new String(dest);
91          }
92  
93          return input;
94      }
95  
96      /**
97       * Determine whether shaping back to 0-9 is possible.
98       * 
99       * @return whether shaping back to 0-9 is possible.
100      */
101     public boolean canUnshape() {
102         return getNine() != '9';
103     }
104 
105     /**
106      * Replace script representations of numbers with 0-9.
107      * 
108      * @param input
109      *            the text to be transformed
110      * @return the transformed text
111      */
112     public String unshape(String input) {
113         char[] src = input.toCharArray();
114         boolean[] transformed = new boolean[1];
115         transformed[0] = false;
116         char[] dest = unshaped(src, transformed);
117         if (transformed[0]) {
118             return new String(dest);
119         }
120 
121         return input;
122     }
123 
124     /**
125      * Perform shaping back to 0-9.
126      * @param src
127      *            the text to transform
128      * @param transformed
129      *            an input parameter of one boolean that can hold whether there
130      *            was a transformation
131      * @return the unshaped text
132      */
133     private char[] unshaped(char[] src, boolean[] transformed) {
134         int nine = getNine();
135         if (nine == '9') {
136             return src;
137         }
138 
139         int zero = nine - 9;
140         return transform(src, zero, nine, '9' - nine, transformed);
141     }
142 
143     /**
144      * @param src
145      *            the text to transform
146      * @param transformed
147      *            an input parameter of one boolean that can hold whether there
148      *            was a transformation
149      * @return the shaped string
150      */
151     private char[] shaped(char[] src, boolean[] transformed) {
152         char nine = getNine();
153         if (nine == '9') {
154             return src;
155         }
156 
157         return transform(src, '0', '9', nine - '9', transformed);
158     }
159 
160     /**
161      * Transform either to or from 0-9 and the script representation, returning
162      * the result and true when at least one character is transformed.
163      * 
164      * @param src
165      *            the text to transform
166      * @param zero
167      *            zero in the source representation
168      * @param nine
169      *            nine in the source representation
170      * @param offset
171      *            the distance between zeros in the source and target
172      *            representation
173      * @param transformed
174      *            an input parameter of one boolean that can hold whether there
175      *            was a transformation
176      * @return the shaped string
177      */
178     private char[] transform(char[] src, int zero, int nine, int offset, boolean[] transformed) {
179         char[] text = src;
180 
181         // offset > 0 when we are going from 0-9
182         // FIXME(DMS): C:V should be shown as V:C in Farsi.
183 /*
184         int srcLen = text.length;
185         int destLen = srcLen;
186         if (offset > 0 && srcLen > 3) {
187             // count the number of ':' flanked by '0' to '9'
188             // each one of these is going
189             // to be bracketed with RLO and PDF.
190             for (int i = 1; i < srcLen - 1; i++) {
191                 char prevChar = text[i - 1];
192                 char curChar = text[i];
193                 char nextChar = text[i + 1];
194                 if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') {
195                     destLen += 2;
196                 }
197             }
198 
199             // Did we actually see a ':'
200             if (destLen != srcLen) {
201                 transformed[0] = true;
202                 int sPos = 0;
203                 int dPos = 0;
204                 int stop = srcLen - 1; // ensure look-ahead
205                 char[] dest = new char[destLen];
206                 dest[dPos++] = text[sPos++];
207                 while (sPos < stop) {
208                     char prevChar = text[sPos - 1];
209                     char nextChar = text[sPos + 1];
210                     char curChar = text[sPos++];
211                     if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') {
212                         dest[dPos++] = '‮'; // RLO
213                         dest[dPos++] = curChar;
214                         dest[dPos++] = '‬'; // PDF
215                     } else if (curChar >= zero && curChar <= nine) {
216                         dest[dPos++] = (char) (curChar + offset);
217                     } else {
218                         dest[dPos++] = curChar;
219                     }
220                 }
221                 // copy the rest
222                 while (sPos < srcLen) {
223                     dest[dPos++] = text[sPos++];
224                 }
225                 return dest;
226             }
227         }
228         // Are we going to '0' - '9' with embedded, specially marked ':'
229         else if (offset < 0 && srcLen > 3) {
230             for (int sPos = 0; sPos < srcLen - 2; sPos++) {
231                 if (text[sPos] == '‮' && text[sPos + 1] == ':' && text[sPos + 2] == '‬') {
232                     destLen -= 2;
233                     sPos += 2;
234                 }
235             }
236 
237             // Did we actually see a '‮:‬'
238             if (destLen != srcLen) {
239                 transformed[0] = true;
240                 char[] dest = new char[destLen];
241                 int sPos = 0;
242                 int dPos = 0;
243                 int stop = srcLen - 2; // ensure look-ahead
244                 while (sPos < stop) {
245                     char curChar = text[sPos++];
246                     if (curChar == '‮' && text[sPos] == ':' && text[sPos + 1] == '‬') {
247                         dest[dPos++] = ':';
248                         sPos += 2; // skip the whole pattern
249                     } else if (curChar >= zero && curChar <= nine) {
250                         dest[dPos++] = (char) (curChar + offset);
251                     } else {
252                         dest[dPos++] = curChar;
253                     }
254                 }
255 
256                 // copy the rest
257                 while (sPos < srcLen) {
258                     dest[dPos++] = text[sPos++];
259                 }
260 
261                 return dest;
262             }
263         }
264 */
265         int len = src.length;
266         for (int i = 0; i < len; i++) {
267             char c = text[i];
268             if (c >= zero && c <= nine) {
269                 text[i] = (char) (c + offset);
270                 transformed[0] = true;
271             }
272         }
273 
274         return text;
275     }
276 
277     /**
278      * Establish nine for the script. There are scripts that don't have zeroes.
279      * 
280      * @return the representation for 9 in the script
281      */
282     private char getNine() {
283         if (nineShape == '\u0000') {
284             nineShape = '9';
285             Locale locale = LocaleProviderManager.getLocale();
286             if ("fa".equals(locale.getLanguage())) {
287                 nineShape = '\u06f9';
288             } else if ("ar".equals(locale.getLanguage())) {
289                 nineShape = '\u0669';
290             }
291         }
292         return nineShape;
293     }
294 
295 
296     /**
297      * Nine for this shaper.
298      */
299     private char nineShape;
300 
301     /**
302      * Serialization ID
303      */
304     private static final long serialVersionUID = -8408052851113601251L;
305 }
306