1   /**
2    * Distribution License:
3    * JSword is free software; you can redistribute it and/or modify it under
4    * the terms of the GNU Lesser General Public License, version 2.1 as published by
5    * the Free Software Foundation. This program is distributed in the hope
6    * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7    * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8    * See the GNU Lesser General Public License for more details.
9    *
10   * The License is available on the internet at:
11   *       http://www.gnu.org/copyleft/lgpl.html
12   * or by writing to:
13   *      Free Software Foundation, Inc.
14   *      59 Temple Place - Suite 330
15   *      Boston, MA 02111-1307, USA
16   *
17   * Copyright: 2005
18   *     The copyright to this program is held by it's authors.
19   *
20   * ID: $Id: StringUtil.java 2090 2011-03-07 04:13:05Z dmsmith $
21   */
22  package org.crosswire.common.util;
23  
24  import java.io.BufferedReader;
25  import java.io.IOException;
26  import java.io.Reader;
27  import java.util.ArrayList;
28  import java.util.List;
29  
30  /**
31   * A generic class of String utils. It would be good if we could put this stuff
32   * in java.lang ...
33   * 
34   * @see gnu.lgpl.License for license details.<br>
35   *      The copyright to this program is held by it's authors.
36   * @author Joe Walker [joe at eireneh dot com]
37   */
38  public final class StringUtil {
39      /**
40       * Prevent instantiation
41       */
42      private StringUtil() {
43      }
44  
45      /**
46       * The newline character
47       */
48      public static final String NEWLINE = System.getProperty("line.separator", "\r\n");
49  
50      /**
51       * This method reads an InputStream <b>In its entirety</b>, and passes The
52       * text back as a string. If you are reading from a source that can block
53       * then be prepared for a long wait for this to return.
54       * 
55       * @param in
56       *            The Stream to read from.
57       * @return A string containing all the text from the Stream.
58       */
59      public static String read(Reader in) throws IOException {
60          StringBuilder retcode = new StringBuilder();
61          // Quiet Android from complaining about using the default BufferReader buffer size.
62          // The actual buffer size is undocumented. So this is a good idea any way.
63          BufferedReader din = new BufferedReader(in, 8192);
64  
65          while (true) {
66              String line = din.readLine();
67  
68              if (line == null) {
69                  break;
70              }
71  
72              retcode.append(line);
73              retcode.append(NEWLINE);
74          }
75  
76          return retcode.toString();
77      }
78  
79      /**
80       * This function creates a readable title from a variable name type input.
81       * For example calling: StringUtil.createTitle("one_two") = "One Two"
82       * StringUtil.createTitle("oneTwo") = "One Two"
83       */
84      public static String createTitle(String variable) {
85          StringBuilder retcode = new StringBuilder();
86          boolean lastlower = false;
87          boolean lastspace = true;
88  
89          for (int i = 0; i < variable.length(); i++) {
90              char c = variable.charAt(i);
91  
92              if (lastlower && Character.isUpperCase(c) && !lastspace) {
93                  retcode.append(' ');
94              }
95  
96              lastlower = !Character.isUpperCase(c);
97  
98              if (lastspace) {
99                  c = Character.toUpperCase(c);
100             }
101 
102             if (c == '_') {
103                 c = ' ';
104             }
105 
106             if (!lastspace || c != ' ') {
107                 retcode.append(c);
108             }
109 
110             lastspace = c == ' ';
111         }
112 
113         return retcode.toString();
114     }
115 
116     /**
117      * For example getInitials("Java DataBase Connectivity") = "JDC" and
118      * getInitials("Church of England") = "CoE".
119      * 
120      * @param sentence
121      *            The phrase from which to get the initial letters.
122      * @return The initial letters in the given words.
123      */
124     public static String getInitials(String sentence) {
125         String[] words = StringUtil.split(sentence);
126 
127         StringBuilder retcode = new StringBuilder();
128         for (int i = 0; i < words.length; i++) {
129             String word = words[i];
130 
131             char first = 0;
132             for (int j = 0; first == 0 && j < word.length(); j++) {
133                 char c = word.charAt(j);
134                 if (Character.isLetter(c)) {
135                     first = c;
136                 }
137             }
138 
139             if (first != 0) {
140                 retcode.append(first);
141             }
142         }
143 
144         return retcode.toString();
145     }
146 
147     /**
148      * <p>
149      * Splits the provided text into an array, using whitespace as the
150      * separator. Whitespace is defined by {@link Character#isWhitespace(char)}.
151      * </p>
152      * 
153      * <p>
154      * The separator is not included in the returned String array. Adjacent
155      * separators are treated as one separator.
156      * </p>
157      * 
158      * <p>
159      * A <code>null</code> input String returns <code>null</code>.
160      * </p>
161      * 
162      * <pre>
163      * StringUtils.split(null)       = null
164      * StringUtils.split(&quot;&quot;)         = []
165      * StringUtils.split(&quot;abc def&quot;)  = [&quot;abc&quot;, &quot;def&quot;]
166      * StringUtils.split(&quot;abc  def&quot;) = [&quot;abc&quot;, &quot;def&quot;]
167      * StringUtils.split(&quot; abc &quot;)    = [&quot;abc&quot;]
168      * </pre>
169      * 
170      * @param str
171      *            the String to parse, may be null
172      * @return an array of parsed Strings, <code>null</code> if null String
173      *         input
174      */
175     public static String[] split(String str) {
176         return split(str, null, -1);
177     }
178 
179     /**
180      * <p>
181      * Splits the provided text into an array, separator specified. This is an
182      * alternative to using StringTokenizer.
183      * </p>
184      * 
185      * <p>
186      * The separator is not included in the returned String array. Adjacent
187      * separators are treated as one separator.
188      * </p>
189      * 
190      * <p>
191      * A <code>null</code> input String returns <code>null</code>.
192      * </p>
193      * 
194      * <pre>
195      * StringUtils.split(null, *)         = null
196      * StringUtils.split(&quot;&quot;, *)           = []
197      * StringUtils.split(&quot;a.b.c&quot;, '.')    = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
198      * StringUtils.split(&quot;a..b.c&quot;, '.')   = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
199      * StringUtils.split(&quot;a:b:c&quot;, '.')    = [&quot;a:b:c&quot;]
200      * StringUtils.split(&quot;a\tb\nc&quot;, null) = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
201      * StringUtils.split(&quot;a b c&quot;, ' ')    = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
202      * </pre>
203      * 
204      * @param str
205      *            the String to parse, may be null
206      * @param separatorChar
207      *            the character used as the delimiter, <code>null</code> splits
208      *            on whitespace
209      * @return an array of parsed Strings
210      * @since 2.0
211      */
212     public static String[] split(String str, char separatorChar) {
213         // Performance tuned for 2.0 (JDK1.4)
214 
215         if (str == null) {
216             return EMPTY_STRING_ARRAY.clone();
217         }
218         int len = str.length();
219         if (len == 0) {
220             return EMPTY_STRING_ARRAY.clone();
221         }
222         List<String> list = new ArrayList<String>();
223         int i = 0;
224         int start = 0;
225         boolean match = false;
226         while (i < len) {
227             if (str.charAt(i) == separatorChar) {
228                 if (match) {
229                     list.add(str.substring(start, i));
230                     match = false;
231                 }
232                 start = ++i;
233                 continue;
234             }
235             match = true;
236             i++;
237         }
238         if (match) {
239             list.add(str.substring(start, i));
240         }
241         return list.toArray(new String[list.size()]);
242     }
243 
244     /**
245      * <p>
246      * Splits the provided text into an array, separator specified. This is an
247      * alternative to using StringTokenizer.
248      * </p>
249      * 
250      * <p>
251      * The separator is not included in the returned String array. Adjacent
252      * separators are treated individually.
253      * </p>
254      * 
255      * <p>
256      * A <code>null</code> input String returns <code>null</code>.
257      * </p>
258      * 
259      * <pre>
260      * StringUtils.split(null, *)         = null
261      * StringUtils.split(&quot;&quot;, *)           = []
262      * StringUtils.split(&quot;a.b.c&quot;, '.')    = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
263      * StringUtils.split(&quot;a..b.c&quot;, '.')   = [&quot;a&quot;, &quot;&quot;, &quot;b&quot;, &quot;c&quot;]
264      * StringUtils.split(&quot;a:b:c&quot;, '.')    = [&quot;a:b:c&quot;]
265      * StringUtils.split(&quot;a\tb\nc&quot;, null) = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
266      * StringUtils.split(&quot;a b c&quot;, ' ')    = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
267      * </pre>
268      * 
269      * @param str
270      *            the String to parse, may be null
271      * @param separatorChar
272      *            the character used as the delimiter, <code>null</code> splits
273      *            on whitespace
274      * @return an array of parsed Strings
275      * @since 2.0
276      */
277     public static String[] splitAll(String str, char separatorChar) {
278         // Performance tuned for 2.0 (JDK1.4)
279 
280         if (str == null) {
281             return EMPTY_STRING_ARRAY.clone();
282         }
283         int len = str.length();
284         if (len == 0) {
285             return EMPTY_STRING_ARRAY.clone();
286         }
287         List<String> list = new ArrayList<String>();
288         int i = 0;
289         int start = 0;
290         boolean match = false;
291         while (i < len) {
292             if (str.charAt(i) == separatorChar) {
293                 list.add(str.substring(start, i));
294                 start = ++i;
295                 match = false;
296                 continue;
297             }
298             match = true;
299             i++;
300         }
301         if (match) {
302             list.add(str.substring(start, i));
303         }
304         return list.toArray(new String[list.size()]);
305     }
306 
307     /**
308      * <p>
309      * Splits the provided text into an array, separator specified. This is an
310      * alternative to using StringTokenizer.
311      * </p>
312      * 
313      * <p>
314      * The separator is not included in the returned String array. Adjacent
315      * separators are treated individually.
316      * </p>
317      * 
318      * <p>
319      * A <code>null</code> input String returns <code>null</code>.
320      * </p>
321      * 
322      * <pre>
323      * StringUtils.split(null, *)         = null
324      * StringUtils.split(&quot;&quot;, *)           = []
325      * StringUtils.split(&quot;a.b.c&quot;, '.')    = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
326      * StringUtils.split(&quot;a..b.c&quot;, '.')   = [&quot;a&quot;, &quot;&quot;, &quot;b&quot;, &quot;c&quot;]
327      * StringUtils.split(&quot;a:b:c&quot;, '.')    = [&quot;a:b:c&quot;]
328      * StringUtils.split(&quot;a b c&quot;, ' ')    = [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]
329      * </pre>
330      * 
331      * @param str
332      *            the String to parse, may be null
333      * @param separatorChar
334      *            the character used as the delimiter
335      * @param max
336      *            the maximum number of elements to include in the array. A zero
337      *            or negative value implies no limit
338      * @return an array of parsed Strings
339      * @since 2.0
340      */
341     public static String[] splitAll(String str, char separatorChar, int max) {
342         // Performance tuned for 2.0 (JDK1.4)
343 
344         if (str == null) {
345             return EMPTY_STRING_ARRAY.clone();
346         }
347         int len = str.length();
348         if (len == 0) {
349             return EMPTY_STRING_ARRAY.clone();
350         }
351         List<String> list = new ArrayList<String>();
352         int sizePlus1 = 1;
353         int i = 0;
354         int start = 0;
355         boolean match = false;
356         while (i < len) {
357             if (str.charAt(i) == separatorChar) {
358                 if (sizePlus1++ == max) {
359                     i = len;
360                 }
361                 list.add(str.substring(start, i));
362                 start = ++i;
363                 match = false;
364                 continue;
365             }
366             match = true;
367             i++;
368         }
369         if (match) {
370             list.add(str.substring(start, i));
371         }
372         return list.toArray(new String[list.size()]);
373     }
374 
375     /**
376      * <p>
377      * Splits the provided text into an array, separators specified. This is an
378      * alternative to using StringTokenizer.
379      * </p>
380      * 
381      * <p>
382      * The separator is not included in the returned String array. Adjacent
383      * separators are treated as one separator.
384      * </p>
385      * 
386      * <p>
387      * A <code>null</code> input String returns <code>null</code>. A
388      * <code>null</code> separatorChars splits on whitespace.
389      * </p>
390      * 
391      * <pre>
392      * StringUtils.split(null, *)         = null
393      * StringUtils.split(&quot;&quot;, *)           = []
394      * StringUtils.split(&quot;abc def&quot;, null) = [&quot;abc&quot;, &quot;def&quot;]
395      * StringUtils.split(&quot;abc def&quot;, &quot; &quot;)  = [&quot;abc&quot;, &quot;def&quot;]
396      * StringUtils.split(&quot;abc  def&quot;, &quot; &quot;) = [&quot;abc&quot;, &quot;def&quot;]
397      * StringUtils.split(&quot;ab:cd:ef&quot;, &quot;:&quot;) = [&quot;ab&quot;, &quot;cd&quot;, &quot;ef&quot;]
398      * </pre>
399      * 
400      * @param str
401      *            the String to parse, may be null
402      * @param separatorChars
403      *            the characters used as the delimiters, <code>null</code>
404      *            splits on whitespace
405      * @return an array of parsed Strings, <code>null</code> if null String
406      *         input
407      */
408     public static String[] split(String str, String separatorChars) {
409         return split(str, separatorChars, -1);
410     }
411 
412     /**
413      * <p>
414      * Splits the provided text into an array, separators specified. This is an
415      * alternative to using StringTokenizer.
416      * </p>
417      * 
418      * <p>
419      * The separator is not included in the returned String array. Adjacent
420      * separators are treated as one separator.
421      * </p>
422      * 
423      * <p>
424      * A <code>null</code> input String returns <code>null</code>. A
425      * <code>null</code> separatorChars splits on whitespace.
426      * </p>
427      * 
428      * <pre>
429      * StringUtils.split(null, *, *)            = null
430      * StringUtils.split(&quot;&quot;, *, *)              = []
431      * StringUtils.split(&quot;ab de fg&quot;, null, 0)   = [&quot;ab&quot;, &quot;cd&quot;, &quot;ef&quot;]
432      * StringUtils.split(&quot;ab   de fg&quot;, null, 0) = [&quot;ab&quot;, &quot;cd&quot;, &quot;ef&quot;]
433      * StringUtils.split(&quot;ab:cd:ef&quot;, &quot;:&quot;, 0)    = [&quot;ab&quot;, &quot;cd&quot;, &quot;ef&quot;]
434      * StringUtils.split(&quot;ab:cd:ef&quot;, &quot;:&quot;, 2)    = [&quot;ab&quot;, &quot;cd:ef&quot;]
435      * </pre>
436      * 
437      * @param str
438      *            the String to parse, may be null
439      * @param separatorChars
440      *            the characters used as the delimiters, <code>null</code>
441      *            splits on whitespace
442      * @param max
443      *            the maximum number of elements to include in the array. A zero
444      *            or negative value implies no limit
445      * @return an array of parsed Strings
446      */
447     public static String[] split(String str, String separatorChars, int max) {
448         // Performance tuned for 2.0 (JDK1.4)
449         // Direct code is quicker than StringTokenizer.
450         // Also, StringTokenizer uses isSpace() not isWhitespace()
451 
452         if (str == null) {
453             return EMPTY_STRING_ARRAY.clone();
454         }
455         int len = str.length();
456         if (len == 0) {
457             return EMPTY_STRING_ARRAY.clone();
458         }
459         List<String> list = new ArrayList<String>();
460         int sizePlus1 = 1;
461         int i = 0;
462         int start = 0;
463         boolean match = false;
464         if (separatorChars == null) {
465             // Null separator means use whitespace
466             while (i < len) {
467                 if (Character.isWhitespace(str.charAt(i))) {
468                     if (match) {
469                         if (sizePlus1++ == max) {
470                             i = len;
471                         }
472                         list.add(str.substring(start, i));
473                         match = false;
474                     }
475                     start = ++i;
476                     continue;
477                 }
478                 match = true;
479                 i++;
480             }
481         } else if (separatorChars.length() == 1) {
482             // Optimize 1 character case
483             char sep = separatorChars.charAt(0);
484             while (i < len) {
485                 if (str.charAt(i) == sep) {
486                     if (match) {
487                         if (sizePlus1++ == max) {
488                             i = len;
489                         }
490                         list.add(str.substring(start, i));
491                         match = false;
492                     }
493                     start = ++i;
494                     continue;
495                 }
496                 match = true;
497                 i++;
498             }
499         } else {
500             // standard case
501             while (i < len) {
502                 if (separatorChars.indexOf(str.charAt(i)) >= 0) {
503                     if (match) {
504                         if (sizePlus1++ == max) {
505                             i = len;
506                         }
507                         list.add(str.substring(start, i));
508                         match = false;
509                     }
510                     start = ++i;
511                     continue;
512                 }
513                 match = true;
514                 i++;
515             }
516         }
517         if (match) {
518             list.add(str.substring(start, i));
519         }
520         return list.toArray(new String[list.size()]);
521     }
522 
523     /**
524      * <p>
525      * Joins the elements of the provided array into a single String containing
526      * the provided list of elements.
527      * </p>
528      * 
529      * <p>
530      * No delimiter is added before or after the list. A <code>null</code>
531      * separator is the same as an empty String (""). Null objects or empty
532      * strings within the array are represented by empty strings.
533      * </p>
534      * 
535      * <pre>
536      * StringUtils.join(null, *)                = null
537      * StringUtils.join([], *)                  = &quot;&quot;
538      * StringUtils.join([null], *)              = &quot;&quot;
539      * StringUtils.join([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], &quot;--&quot;)  = &quot;a--b--c&quot;
540      * StringUtils.join([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], null)  = &quot;abc&quot;
541      * StringUtils.join([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], &quot;&quot;)    = &quot;abc&quot;
542      * StringUtils.join([null, &quot;&quot;, &quot;a&quot;], ',')   = &quot;,,a&quot;
543      * </pre>
544      * 
545      * @param array
546      *            the array of values to join together, may be null
547      * @param aSeparator
548      *            the separator character to use, null treated as ""
549      * @return the joined String, <code>null</code> if null array input
550      */
551     public static String join(Object[] array, String aSeparator) {
552         String separator = aSeparator;
553         if (array == null) {
554             return null;
555         }
556         if (separator == null) {
557             separator = "";
558         }
559         int arraySize = array.length;
560 
561         // ArraySize == 0: Len = 0
562         // ArraySize > 0: Len = NofStrings *(len(firstString) + len(separator))
563         // (Assuming that all Strings are roughly equally long)
564         int bufSize = arraySize == 0 ? 0 : arraySize * ((array[0] == null ? 16 : array[0].toString().length()) + separator.length());
565 
566         StringBuilder buf = new StringBuilder(bufSize);
567 
568         for (int i = 0; i < arraySize; i++) {
569             if (i > 0) {
570                 buf.append(separator);
571             }
572             if (array[i] != null) {
573                 buf.append(array[i]);
574             }
575         }
576         return buf.toString();
577     }
578 
579     /**
580      * An empty immutable <code>String</code> array.
581      */
582     public static final String[] EMPTY_STRING_ARRAY = new String[0];
583 
584 }
585