Coverage Report

Coverage Report - org.crosswire.common.util.Language

Classes in this File

Line Coverage

Branch Coverage

Complexity

Language

0/122

0/88

4.174

Language$CanonicalUtils

0/42

0/52

4.174

 /**
  * Distribution License:
  * JSword is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License, version 2.1 or later
  * as published by the Free Software Foundation. This program is distributed
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  * See the GNU Lesser General Public License for more details.
  *
  * The License is available on the internet at:
  *      http://www.gnu.org/copyleft/lgpl.html
  * or by writing to:
  *      Free Software Foundation, Inc.
  *      59 Temple Place - Suite 330
  *      Boston, MA 02111-1307, USA
  *
  * © CrossWire Bible Society, 2007 - 2016
  *
  */
 package org.crosswire.common.util;
 
 import java.util.Locale;
 
 /**
  * An immutable Language by specification. The specifier consists of up to three parts:
  * <ul>
  * <li>LL - An iso639-2 or iso639-3 language code</li>
  * <li>SSSS - A 4-letter iso15924 script code</li>
  * <li>CC - A 2-letter iso3166 country code</li>
  * </ul>
  * Note: This is a subset of the BCP-47 standard.
  * 
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
  * @author DM Smith
  */
 public class Language implements Comparable<Language> {
     /**
      * The default language code is en for English.
      */
     public static final String DEFAULT_LANG_CODE = "en";
 
     /**
      * The language code for invalid language specifications is und for Undetermined.
      */
     public static final String UNKNOWN_LANG_CODE = "und";
 
     /**
      * The default language is English.
      */
     public static final Language DEFAULT_LANG = new Language(DEFAULT_LANG_CODE);
 
 
     /**
      * A single language defined by an ISO-639 code. If the code is null or
      * empty then it is considered to be DEFAULT_LANG (that is, English).
      * 
      * @param specification
      *            the specifier for the particular language
      */
     public Language(String specification) {
         given = specification;
         parse(given);
     }
 
     /**
      * The specification that was given might not be be the one that
      * ultimately gets the name.
      * 
      * @return the specification that was originally given.
      */
     public String getGivenSpecification() {
         return given;
     }
 
     /**
      * The specification that was given might not be be the one that
      * ultimately gets the name.
      * 
      * @return the specification that was used to find the name.
      */
     public String getFoundSpecification() {
         getName();
         return found;
     }
 
     /**
      * Determine whether this language is valid.
      * <ul>
      * <li>LL - An iso639-2 or iso639-3 language code</li>
      * <li>SSSS - A 4-letter iso15924 script code</li>
      * <li>CC - A 2-letter iso3166 country code</li>
      * </ul>
      * 
      * @return true if the language is valid.
      */
     public boolean isValidLanguage() {
         getName();
         return valid;
     }
 
     /**
      * Get the iso639 language code.
      * 
      * @return the code for the language in lower case.
      */
     public String getCode() {
         return code;
     }
 
     /**
      * Get the iso15924 script for the language. May be null.
      * 
      * @return the code for the script in Title case.
      */
     public String getScript() {
         return script;
     }
 
     /**
      * Get the iso3166 script for the language. May be null.
      * 
      * @return the code for the country in UPPER case.
      */
     public String getCountry() {
         return country;
     }
 
     /**
      * Get the localized language name.
      * 
      * @return the name of the language
      */
     public String getName() {
         // Note: This is not quite thread safe. Unless name is volatile.
         // But it will just do the work multiple times.
         if (name == null) {
             boolean more = true;
             // Code is the ultimate fallback
             String result = code;
             String lookup = code;
 
             StringBuilder sb = new StringBuilder();
             // The lookup is as follows.
             // There is always a code
             // If all parts are specified then use that
             if (script != null && country != null) {
                 sb.append(code);
                 sb.append('-');
                 sb.append(script);
                 sb.append('-');
                 sb.append(country);
                 lookup = sb.toString();
                 result = Languages.getName(lookup);
                 more = lookup.equals(result);
             }
 
             // If script is specified it has precedence over country
             if (more && script != null) {
                 sb.setLength(0);
                 sb.append(code);
                 sb.append('-');
                 sb.append(script);
                 lookup = sb.toString();
                 result = Languages.getName(lookup);
                 more = lookup.equals(result);
             }
 
             // If country was specified, check for that now.
             if (more && country != null) {
                 sb.setLength(0);
                 sb.append(code);
                 sb.append('-');
                 sb.append(country);
                 lookup = sb.toString();
                 result = Languages.getName(lookup);
                 more = lookup.equals(result);
             }
 
             // Now check just the code.
             if (more) {
                 lookup = code;
                 result = Languages.getName(lookup);
                 more = lookup.equals(result);
             }
 
             // Oops, the localized lookup failed.
             // See if Java has one.
             if (more) {
                 lookup = code;
                 result = new Locale(lookup).getDisplayLanguage();
                 more = lookup.equals(result);
             }
 
             // Oops, Java doesn't have a clue
             // Look into our heavy handed listing
             if (more) {
                 lookup = code;
                 result = Languages.AllLanguages.getName(lookup);
                 more = lookup.equals(result);
             }
 
             // Oops, didn't find it anywhere. Mark it as invalid.
             if (more) {
                 valid = false;
             }
             // now that we are here go with what we last used and got
             found = lookup;
             // Assign name last to help with synchronization issues
             name = result;
         }
         return name;
     }
 
     /**
      * Determine whether this language is a Left-to-Right or a Right-to-Left
      * language. If the language has a script, it is used for the determination.
      * Otherwise, check the language.
      * <p>
      * Note: This is problematic. Languages do not have direction.
      * Scripts do. Further, there are over 7000 living languages, many of which
      * are written in Right-to-Left scripts and are not listed here.
      * </p>
      * 
      * @return true if the language is Left-to-Right.
      */
     public boolean isLeftToRight() {
         if (!knowsDirection) {
             ltor = !Languages.RtoL.isRtoL(script, code);
             knowsDirection = true;
         }
         return ltor;
     }
 
     /* (non-Javadoc)
      * @see java.lang.Object#hashCode()
      */
     @Override
     public int hashCode() {
         if (found == null) {
             getName();
         }
         return found.hashCode();
     }
 
     /* (non-Javadoc)
      * @see java.lang.Object#equals(java.lang.Object)
      */
     @Override
     public boolean equals(Object obj) {
         if (this == obj) {
             return true;
         }
 
         if (obj == null || getClass() != obj.getClass()) {
             return false;
         }
 
         final Language other = (Language) obj;
 
         return code.equals(other.code)  && compareStrings(script, other.script) && compareStrings(country, other.country);
     }
 
     /* (non-Javadoc)
      * @see java.lang.Object#toString()
      */
     @Override
     public String toString() {
         return getName();
     }
 
     /* (non-Javadoc)
      * @see java.lang.Comparable#compareTo(java.lang.Object)
      */
     public int compareTo(Language o) {
         return getName().compareTo(o.getName());
     }
 
     /**
      * Split the specification on '-' into 1 to 3 parts.
      * 
      * @param spec the specification to parse
      */
     private void parse(String spec) {
         String specification = spec;
         if (specification == null) {
             specification = DEFAULT_LANG_CODE;
         }
 
         int len = specification.length();
 
         // It used to be that SWORD modules used x- and X- as a language prefix
         // for minority languages. Now that we have a complete iso639 spec,
         // SWORD does not use it.
         if (len < 2 || specification.charAt(0) == '-' || specification.charAt(1) == '-') {
             valid = false;
             code = UNKNOWN_LANG_CODE;
             return;
         }
 
         // Obvious optimization of the most common case: only the language code is given
         if (len <= 3) {
             code = CanonicalUtils.getLanguage(specification, 0, len);
         }
 
         int partLen = 0;
         int start = 0;
         int split;
         for (split = 2; split < len; ++split) {
             char c = specification.charAt(split);
             if (c == '-') {
                 break;
             }
         }
         code = CanonicalUtils.getLanguage(specification, start, split);
         partLen = split - start;
         valid = partLen == 2 || partLen == 3;
         start = split + 1;
 
         // Get the second part. It is either a script or a country code
         if (split < len) {
             for (split = start; split < len; ++split) {
                 char c = specification.charAt(split);
                 if (c == '-') {
                     break;
                 }
             }
             partLen = split - start;
             if (partLen == 4) {
                 script = CanonicalUtils.getScript(specification, start, split);
             } else if (partLen == 2) {
                 country = CanonicalUtils.getCountry(specification, start, split);
             } else {
                 valid = false;
             }
             start = split + 1;
         }
 
         // Get the third part, if any. It can only be a country code.
         if (country == null && split < len) {
             for (split = start; split < len; ++split) {
                 char c = specification.charAt(split);
                 if (c == '-') {
                     break;
                 }
             }
             partLen = split - start;
             if (partLen == 2) {
                 country = CanonicalUtils.getCountry(specification, start, split);
             } else {
                 valid = false;
             }
             start = split + 1;
         }
 
         if (start <= len) {
             valid = false;
         }
     }
 
     /**
      * Equal if both a and b are the same.
      * 
      * @param a a string to compare
      * @param b a string to compare
      * @return true if both are the same.
      */
     private boolean compareStrings(String a, String b) {
         return (a == null && b == null) || (a != null && a.equals(b));
     }
 
     /**
      * Converts substrings to the canonical representation for language code, script and country.
      */
     private static final class CanonicalUtils {
         /**
          * Utility class. Private constructor.
          */
         private CanonicalUtils() {
         }
 
         /**
          * The iso639 language code's canonical form is lower case.
          * 
          * @param specification
          *            the bcp47 specification of the language
          * @param start
          *            the start of the code
          * @param end
          *            the position of the character following the code
          * @return the canonical representation for the code
          */
         public static String getLanguage(String specification, int start, int end) {
 
             // An empty string means no work
             if (start == end) {
                 return null;
             }
 
             // Avoid construction by analyzing the string
             // to see if it is already LanguageCase.
             // Find the first character that is not LanguageCase
             int first;
             for (first = start; first < end && isLowerASCII(specification.charAt(first)); ++first) {
                 continue; // keep going
             }
 
             // If we get to the end of the string then it is CountryCase
             if (first == end) {
                 return specification.substring(start, end);
             }
 
             // Bummer, we need to do work
             int len = end - start;
             char[] buf = new char[len];
             int i = 0;
             for (int j = start; j < end; ++j) {
                 buf[i++] = j < first ? specification.charAt(j) : toLowerASCII(specification.charAt(j));
             }
             return new String(buf);
         }
 
         /**
          * The iso3166 country code's canonical form is upper case.
          * 
          * @param specification
          *            the bcp47 specification of the language
          * @param start
          *            the start of the code
          * @param end
          *            the position of the character following the code
          * @return the canonical representation for the code
          */
         public static String getCountry(String specification, int start, int end) {
 
             // An empty string means no work
             if (start == end) {
                 return null;
             }
 
             // Avoid construction by analyzing the string
             // to see if it is already CountryCase.
             // Find the first character that is not CountryCase
             int first;
             for (first = start; first < end && isUpperASCII(specification.charAt(first)); ++first) {
                 continue; // keep going
             }
 
             // If we get to the end of the string then it is CountryCase
             if (first == end) {
                 return specification.substring(start, end);
             }
 
             // Bummer, we need to do work
             int len = end - start;
             char[] buf = new char[len];
             int i = 0;
             for (int j = start; j < end; ++j) {
                 buf[i++] = j < first ? specification.charAt(j) : toUpperASCII(specification.charAt(j));
             }
             return new String(buf);
         }
 
         /**
          * The iso15924 script code's canonical form is title case.
          * 
          * @param specification
          *            the bcp47 specification of the language
          * @param start
          *            the start of the code
          * @param end
          *            the position of the character following the code
          * @return the canonical representation for the code
          */
         public static String getScript(String specification, int start, int end) {
 
             // An empty string means no work
             if (start == end) {
                 return null;
             }
 
             // Avoid construction by analyzing the string
             // to see if it is already ScriptCase.
             // Find the first character that is not ScriptCase
             int first = start;
             if (isUpperASCII(specification.charAt(start))) {
                 for (first = start + 1; first < end && isLowerASCII(specification.charAt(first)); ++first) {
                     continue; // keep going
                 }
 
                 // If we get to the end of the string then it is ScriptCase
                 if (first == end) {
                     return specification.substring(start, end);
                 }
             }
 
             // Bummer, we need to do work.
             int len = end - start;
             char[] buf = new char[len];
             buf[0] = first == start ? toUpperASCII(specification.charAt(first)) : specification.charAt(first);
             int i = 1;
             for (int j = start + 1; j < end; ++j) {
                 buf[i++] = j < first ? specification.charAt(j) : toLowerASCII(specification.charAt(j));
             }
             return new String(buf);
         }
 
         /**
          * Determine whether the character is one of A-Z.
          * 
          * @param c the character to examine
          * @return true if it is in A-Z
          */
         private static boolean isUpperASCII(char c) {
             return c >= 'A' && c <= 'Z';
         }
 
         /**
          * Determine whether the character is one of a-z.
          * 
          * @param c the character to examine
          * @return true if it is in a-z
          */
         private static boolean isLowerASCII(char c) {
             return c >= 'a' && c <= 'z';
         }
 
         /**
          * Convert a character, in in a-z to its upper case value, otherwise leave it alone.
          * 
          * @param c the character to convert, if in a-z
          * @return the upper case ASCII representation of the character or the character itself.
          */
         private static char toUpperASCII(char c) {
             return isLowerASCII(c) ? (char) (c - 32) : c;
         }
 
         /**
          * Convert a character, in in A-Z to its lower case value, otherwise leave it alone.
          * 
          * @param c the character to convert, if in A-Z
          * @return the lower case ASCII representation of the character or the character itself.
          */
         private static char toLowerASCII(char c) {
             return isUpperASCII(c) ? (char) (c + 32) : c;
         }
     }
 
     /**
      * The original specification provided by the user.
      */
     private String given;
     /**
      * The effective specification.
      */
     private String found;
     /**
      * The lower case iso639 language code. 
      */
     private String code;
     /**
      * The Title case iso15924 script code.
      */
     private String script;
     /**
      * The UPPER case iso3166 country code. 
      */
     private String country;
     /**
      * The name as defined by Languages. 
      */
     private String name;
     /**
      * Flag to store whether the code is valid.
      */
     private boolean valid;
     private boolean knowsDirection;
     private boolean ltor;
 }

1		/**
2		* Distribution License:
3		* JSword is free software; you can redistribute it and/or modify it under
4		* the terms of the GNU Lesser General Public License, version 2.1 or later
5		* as published by the Free Software Foundation. This program is distributed
6		* in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7		* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8		* See the GNU Lesser General Public License for more details.
9		*
10		* The License is available on the internet at:
11		* http://www.gnu.org/copyleft/lgpl.html
12		* or by writing to:
13		* Free Software Foundation, Inc.
14		* 59 Temple Place - Suite 330
15		* Boston, MA 02111-1307, USA
16		*
17		* © CrossWire Bible Society, 2007 - 2016
18		*
19		*/
20		package org.crosswire.common.util;
21
22		import java.util.Locale;
23
24		/**
25		* An immutable Language by specification. The specifier consists of up to three parts:
26		* <ul>
27		* <li>LL - An iso639-2 or iso639-3 language code</li>
28		* <li>SSSS - A 4-letter iso15924 script code</li>
29		* <li>CC - A 2-letter iso3166 country code</li>
30		* </ul>
31		* Note: This is a subset of the BCP-47 standard.
32		*
33		* @see gnu.lgpl.License The GNU Lesser General Public License for details.
34		* @author DM Smith
35		*/
36	0	public class Language implements Comparable<Language> {
37		/**
38		* The default language code is en for English.
39		*/
40		public static final String DEFAULT_LANG_CODE = "en";
41
42		/**
43		* The language code for invalid language specifications is und for Undetermined.
44		*/
45		public static final String UNKNOWN_LANG_CODE = "und";
46
47		/**
48		* The default language is English.
49		*/
50	0	public static final Language DEFAULT_LANG = new Language(DEFAULT_LANG_CODE);
51
52
53		/**
54		* A single language defined by an ISO-639 code. If the code is null or
55		* empty then it is considered to be DEFAULT_LANG (that is, English).
56		*
57		* @param specification
58		* the specifier for the particular language
59		*/
60	0	public Language(String specification) {
61	0	given = specification;
62	0	parse(given);
63	0	}
64
65		/**
66		* The specification that was given might not be be the one that
67		* ultimately gets the name.
68		*
69		* @return the specification that was originally given.
70		*/
71		public String getGivenSpecification() {
72	0	return given;
73		}
74
75		/**
76		* The specification that was given might not be be the one that
77		* ultimately gets the name.
78		*
79		* @return the specification that was used to find the name.
80		*/
81		public String getFoundSpecification() {
82	0	getName();
83	0	return found;
84		}
85
86		/**
87		* Determine whether this language is valid.
88		* <ul>
89		* <li>LL - An iso639-2 or iso639-3 language code</li>
90		* <li>SSSS - A 4-letter iso15924 script code</li>
91		* <li>CC - A 2-letter iso3166 country code</li>
92		* </ul>
93		*
94		* @return true if the language is valid.
95		*/
96		public boolean isValidLanguage() {
97	0	getName();
98	0	return valid;
99		}
100
101		/**
102		* Get the iso639 language code.
103		*
104		* @return the code for the language in lower case.
105		*/
106		public String getCode() {
107	0	return code;
108		}
109
110		/**
111		* Get the iso15924 script for the language. May be null.
112		*
113		* @return the code for the script in Title case.
114		*/
115		public String getScript() {
116	0	return script;
117		}
118
119		/**
120		* Get the iso3166 script for the language. May be null.
121		*
122		* @return the code for the country in UPPER case.
123		*/
124		public String getCountry() {
125	0	return country;
126		}
127
128		/**
129		* Get the localized language name.
130		*
131		* @return the name of the language
132		*/
133		public String getName() {
134		// Note: This is not quite thread safe. Unless name is volatile.
135		// But it will just do the work multiple times.
136	0	if (name == null) {
137	0	boolean more = true;
138		// Code is the ultimate fallback
139	0	String result = code;
140	0	String lookup = code;
141
142	0	StringBuilder sb = new StringBuilder();
143		// The lookup is as follows.
144		// There is always a code
145		// If all parts are specified then use that
146	0	if (script != null && country != null) {
147	0	sb.append(code);
148	0	sb.append('-');
149	0	sb.append(script);
150	0	sb.append('-');
151	0	sb.append(country);
152	0	lookup = sb.toString();
153	0	result = Languages.getName(lookup);
154	0	more = lookup.equals(result);
155		}
156
157		// If script is specified it has precedence over country
158	0	if (more && script != null) {
159	0	sb.setLength(0);
160	0	sb.append(code);
161	0	sb.append('-');
162	0	sb.append(script);
163	0	lookup = sb.toString();
164	0	result = Languages.getName(lookup);
165	0	more = lookup.equals(result);
166		}
167
168		// If country was specified, check for that now.
169	0	if (more && country != null) {
170	0	sb.setLength(0);
171	0	sb.append(code);
172	0	sb.append('-');
173	0	sb.append(country);
174	0	lookup = sb.toString();
175	0	result = Languages.getName(lookup);
176	0	more = lookup.equals(result);
177		}
178
179		// Now check just the code.
180	0	if (more) {
181	0	lookup = code;
182	0	result = Languages.getName(lookup);
183	0	more = lookup.equals(result);
184		}
185
186		// Oops, the localized lookup failed.
187		// See if Java has one.
188	0	if (more) {
189	0	lookup = code;
190	0	result = new Locale(lookup).getDisplayLanguage();
191	0	more = lookup.equals(result);
192		}
193
194		// Oops, Java doesn't have a clue
195		// Look into our heavy handed listing
196	0	if (more) {
197	0	lookup = code;
198	0	result = Languages.AllLanguages.getName(lookup);
199	0	more = lookup.equals(result);
200		}
201
202		// Oops, didn't find it anywhere. Mark it as invalid.
203	0	if (more) {
204	0	valid = false;
205		}
206		// now that we are here go with what we last used and got
207	0	found = lookup;
208		// Assign name last to help with synchronization issues
209	0	name = result;
210		}
211	0	return name;
212		}
213
214		/**
215		* Determine whether this language is a Left-to-Right or a Right-to-Left
216		* language. If the language has a script, it is used for the determination.
217		* Otherwise, check the language.
218		* <p>
219		* Note: This is problematic. Languages do not have direction.
220		* Scripts do. Further, there are over 7000 living languages, many of which
221		* are written in Right-to-Left scripts and are not listed here.
222		* </p>
223		*
224		* @return true if the language is Left-to-Right.
225		*/
226		public boolean isLeftToRight() {
227	0	if (!knowsDirection) {
228	0	ltor = !Languages.RtoL.isRtoL(script, code);
229	0	knowsDirection = true;
230		}
231	0	return ltor;
232		}
233
234		/* (non-Javadoc)
235		* @see java.lang.Object#hashCode()
236		*/
237		@Override
238		public int hashCode() {
239	0	if (found == null) {
240	0	getName();
241		}
242	0	return found.hashCode();
243		}
244
245		/* (non-Javadoc)
246		* @see java.lang.Object#equals(java.lang.Object)
247		*/
248		@Override
249		public boolean equals(Object obj) {
250	0	if (this == obj) {
251	0	return true;
252		}
253
254	0	if (obj == null \|\| getClass() != obj.getClass()) {
255	0	return false;
256		}
257
258	0	final Language other = (Language) obj;
259
260	0	return code.equals(other.code) && compareStrings(script, other.script) && compareStrings(country, other.country);
261		}
262
263		/* (non-Javadoc)
264		* @see java.lang.Object#toString()
265		*/
266		@Override
267		public String toString() {
268	0	return getName();
269		}
270
271		/* (non-Javadoc)
272		* @see java.lang.Comparable#compareTo(java.lang.Object)
273		*/
274		public int compareTo(Language o) {
275	0	return getName().compareTo(o.getName());
276		}
277
278		/**
279		* Split the specification on '-' into 1 to 3 parts.
280		*
281		* @param spec the specification to parse
282		*/
283		private void parse(String spec) {
284	0	String specification = spec;
285	0	if (specification == null) {
286	0	specification = DEFAULT_LANG_CODE;
287		}
288
289	0	int len = specification.length();
290
291		// It used to be that SWORD modules used x- and X- as a language prefix
292		// for minority languages. Now that we have a complete iso639 spec,
293		// SWORD does not use it.
294	0	if (len < 2 \|\| specification.charAt(0) == '-' \|\| specification.charAt(1) == '-') {
295	0	valid = false;
296	0	code = UNKNOWN_LANG_CODE;
297	0	return;
298		}
299
300		// Obvious optimization of the most common case: only the language code is given
301	0	if (len <= 3) {
302	0	code = CanonicalUtils.getLanguage(specification, 0, len);
303		}
304
305	0	int partLen = 0;
306	0	int start = 0;
307		int split;
308	0	for (split = 2; split < len; ++split) {
309	0	char c = specification.charAt(split);
310	0	if (c == '-') {
311	0	break;
312		}
313		}
314	0	code = CanonicalUtils.getLanguage(specification, start, split);
315	0	partLen = split - start;
316	0	valid = partLen == 2 \|\| partLen == 3;
317	0	start = split + 1;
318
319		// Get the second part. It is either a script or a country code
320	0	if (split < len) {
321	0	for (split = start; split < len; ++split) {
322	0	char c = specification.charAt(split);
323	0	if (c == '-') {
324	0	break;
325		}
326		}
327	0	partLen = split - start;
328	0	if (partLen == 4) {
329	0	script = CanonicalUtils.getScript(specification, start, split);
330	0	} else if (partLen == 2) {
331	0	country = CanonicalUtils.getCountry(specification, start, split);
332		} else {
333	0	valid = false;
334		}
335	0	start = split + 1;
336		}
337
338		// Get the third part, if any. It can only be a country code.
339	0	if (country == null && split < len) {
340	0	for (split = start; split < len; ++split) {
341	0	char c = specification.charAt(split);
342	0	if (c == '-') {
343	0	break;
344		}
345		}
346	0	partLen = split - start;
347	0	if (partLen == 2) {
348	0	country = CanonicalUtils.getCountry(specification, start, split);
349		} else {
350	0	valid = false;
351		}
352	0	start = split + 1;
353		}
354
355	0	if (start <= len) {
356	0	valid = false;
357		}
358	0	}
359
360		/**
361		* Equal if both a and b are the same.
362		*
363		* @param a a string to compare
364		* @param b a string to compare
365		* @return true if both are the same.
366		*/
367		private boolean compareStrings(String a, String b) {
368	0	return (a == null && b == null) \|\| (a != null && a.equals(b));
369		}
370
371		/**
372		* Converts substrings to the canonical representation for language code, script and country.
373		*/
374		private static final class CanonicalUtils {
375		/**
376		* Utility class. Private constructor.
377		*/
378	0	private CanonicalUtils() {
379	0	}
380
381		/**
382		* The iso639 language code's canonical form is lower case.
383		*
384		* @param specification
385		* the bcp47 specification of the language
386		* @param start
387		* the start of the code
388		* @param end
389		* the position of the character following the code
390		* @return the canonical representation for the code
391		*/
392		public static String getLanguage(String specification, int start, int end) {
393
394		// An empty string means no work
395	0	if (start == end) {
396	0	return null;
397		}
398
399		// Avoid construction by analyzing the string
400		// to see if it is already LanguageCase.
401		// Find the first character that is not LanguageCase
402		int first;
403	0	for (first = start; first < end && isLowerASCII(specification.charAt(first)); ++first) {
404		continue; // keep going
405		}
406
407		// If we get to the end of the string then it is CountryCase
408	0	if (first == end) {
409	0	return specification.substring(start, end);
410		}
411
412		// Bummer, we need to do work
413	0	int len = end - start;
414	0	char[] buf = new char[len];
415	0	int i = 0;
416	0	for (int j = start; j < end; ++j) {
417	0	buf[i++] = j < first ? specification.charAt(j) : toLowerASCII(specification.charAt(j));
418		}
419	0	return new String(buf);
420		}
421
422		/**
423		* The iso3166 country code's canonical form is upper case.
424		*
425		* @param specification
426		* the bcp47 specification of the language
427		* @param start
428		* the start of the code
429		* @param end
430		* the position of the character following the code
431		* @return the canonical representation for the code
432		*/
433		public static String getCountry(String specification, int start, int end) {
434
435		// An empty string means no work
436	0	if (start == end) {
437	0	return null;
438		}
439
440		// Avoid construction by analyzing the string
441		// to see if it is already CountryCase.
442		// Find the first character that is not CountryCase
443		int first;
444	0	for (first = start; first < end && isUpperASCII(specification.charAt(first)); ++first) {
445		continue; // keep going
446		}
447
448		// If we get to the end of the string then it is CountryCase
449	0	if (first == end) {
450	0	return specification.substring(start, end);
451		}
452
453		// Bummer, we need to do work
454	0	int len = end - start;
455	0	char[] buf = new char[len];
456	0	int i = 0;
457	0	for (int j = start; j < end; ++j) {
458	0	buf[i++] = j < first ? specification.charAt(j) : toUpperASCII(specification.charAt(j));
459		}
460	0	return new String(buf);
461		}
462
463		/**
464		* The iso15924 script code's canonical form is title case.
465		*
466		* @param specification
467		* the bcp47 specification of the language
468		* @param start
469		* the start of the code
470		* @param end
471		* the position of the character following the code
472		* @return the canonical representation for the code
473		*/
474		public static String getScript(String specification, int start, int end) {
475
476		// An empty string means no work
477	0	if (start == end) {
478	0	return null;
479		}
480
481		// Avoid construction by analyzing the string
482		// to see if it is already ScriptCase.
483		// Find the first character that is not ScriptCase
484	0	int first = start;
485	0	if (isUpperASCII(specification.charAt(start))) {
486	0	for (first = start + 1; first < end && isLowerASCII(specification.charAt(first)); ++first) {
487		continue; // keep going
488		}
489
490		// If we get to the end of the string then it is ScriptCase
491	0	if (first == end) {
492	0	return specification.substring(start, end);
493		}
494		}
495
496		// Bummer, we need to do work.
497	0	int len = end - start;
498	0	char[] buf = new char[len];
499	0	buf[0] = first == start ? toUpperASCII(specification.charAt(first)) : specification.charAt(first);
500	0	int i = 1;
501	0	for (int j = start + 1; j < end; ++j) {
502	0	buf[i++] = j < first ? specification.charAt(j) : toLowerASCII(specification.charAt(j));
503		}
504	0	return new String(buf);
505		}
506
507		/**
508		* Determine whether the character is one of A-Z.
509		*
510		* @param c the character to examine
511		* @return true if it is in A-Z
512		*/
513		private static boolean isUpperASCII(char c) {
514	0	return c >= 'A' && c <= 'Z';
515		}
516
517		/**
518		* Determine whether the character is one of a-z.
519		*
520		* @param c the character to examine
521		* @return true if it is in a-z
522		*/
523		private static boolean isLowerASCII(char c) {
524	0	return c >= 'a' && c <= 'z';
525		}
526
527		/**
528		* Convert a character, in in a-z to its upper case value, otherwise leave it alone.
529		*
530		* @param c the character to convert, if in a-z
531		* @return the upper case ASCII representation of the character or the character itself.
532		*/
533		private static char toUpperASCII(char c) {
534	0	return isLowerASCII(c) ? (char) (c - 32) : c;
535		}
536
537		/**
538		* Convert a character, in in A-Z to its lower case value, otherwise leave it alone.
539		*
540		* @param c the character to convert, if in A-Z
541		* @return the lower case ASCII representation of the character or the character itself.
542		*/
543		private static char toLowerASCII(char c) {
544	0	return isUpperASCII(c) ? (char) (c + 32) : c;
545		}
546		}
547
548		/**
549		* The original specification provided by the user.
550		*/
551		private String given;
552		/**
553		* The effective specification.
554		*/
555		private String found;
556		/**
557		* The lower case iso639 language code.
558		*/
559		private String code;
560		/**
561		* The Title case iso15924 script code.
562		*/
563		private String script;
564		/**
565		* The UPPER case iso3166 country code.
566		*/
567		private String country;
568		/**
569		* The name as defined by Languages.
570		*/
571		private String name;
572		/**
573		* Flag to store whether the code is valid.
574		*/
575		private boolean valid;
576		private boolean knowsDirection;
577		private boolean ltor;
578		}