| StringUtil.java |
1 /**
2 * Distribution License:
3 * JSword is free software; you can redistribute it and/or modify it under
4 * the terms of the GNU Lesser General Public License, version 2.1 as published by
5 * the Free Software Foundation. This program is distributed in the hope
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the
7 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8 * See the GNU Lesser General Public License for more details.
9 *
10 * The License is available on the internet at:
11 * http://www.gnu.org/copyleft/lgpl.html
12 * or by writing to:
13 * Free Software Foundation, Inc.
14 * 59 Temple Place - Suite 330
15 * Boston, MA 02111-1307, USA
16 *
17 * Copyright: 2005
18 * The copyright to this program is held by it's authors.
19 *
20 * ID: $Id: StringUtil.java 2090 2011-03-07 04:13:05Z dmsmith $
21 */
22 package org.crosswire.common.util;
23
24 import java.io.BufferedReader;
25 import java.io.IOException;
26 import java.io.Reader;
27 import java.util.ArrayList;
28 import java.util.List;
29
30 /**
31 * A generic class of String utils. It would be good if we could put this stuff
32 * in java.lang ...
33 *
34 * @see gnu.lgpl.License for license details.<br>
35 * The copyright to this program is held by it's authors.
36 * @author Joe Walker [joe at eireneh dot com]
37 */
38 public final class StringUtil {
39 /**
40 * Prevent instantiation
41 */
42 private StringUtil() {
43 }
44
45 /**
46 * The newline character
47 */
48 public static final String NEWLINE = System.getProperty("line.separator", "\r\n");
49
50 /**
51 * This method reads an InputStream <b>In its entirety</b>, and passes The
52 * text back as a string. If you are reading from a source that can block
53 * then be prepared for a long wait for this to return.
54 *
55 * @param in
56 * The Stream to read from.
57 * @return A string containing all the text from the Stream.
58 */
59 public static String read(Reader in) throws IOException {
60 StringBuilder retcode = new StringBuilder();
61 // Quiet Android from complaining about using the default BufferReader buffer size.
62 // The actual buffer size is undocumented. So this is a good idea any way.
63 BufferedReader din = new BufferedReader(in, 8192);
64
65 while (true) {
66 String line = din.readLine();
67
68 if (line == null) {
69 break;
70 }
71
72 retcode.append(line);
73 retcode.append(NEWLINE);
74 }
75
76 return retcode.toString();
77 }
78
79 /**
80 * This function creates a readable title from a variable name type input.
81 * For example calling: StringUtil.createTitle("one_two") = "One Two"
82 * StringUtil.createTitle("oneTwo") = "One Two"
83 */
84 public static String createTitle(String variable) {
85 StringBuilder retcode = new StringBuilder();
86 boolean lastlower = false;
87 boolean lastspace = true;
88
89 for (int i = 0; i < variable.length(); i++) {
90 char c = variable.charAt(i);
91
92 if (lastlower && Character.isUpperCase(c) && !lastspace) {
93 retcode.append(' ');
94 }
95
96 lastlower = !Character.isUpperCase(c);
97
98 if (lastspace) {
99 c = Character.toUpperCase(c);
100 }
101
102 if (c == '_') {
103 c = ' ';
104 }
105
106 if (!lastspace || c != ' ') {
107 retcode.append(c);
108 }
109
110 lastspace = c == ' ';
111 }
112
113 return retcode.toString();
114 }
115
116 /**
117 * For example getInitials("Java DataBase Connectivity") = "JDC" and
118 * getInitials("Church of England") = "CoE".
119 *
120 * @param sentence
121 * The phrase from which to get the initial letters.
122 * @return The initial letters in the given words.
123 */
124 public static String getInitials(String sentence) {
125 String[] words = StringUtil.split(sentence);
126
127 StringBuilder retcode = new StringBuilder();
128 for (int i = 0; i < words.length; i++) {
129 String word = words[i];
130
131 char first = 0;
132 for (int j = 0; first == 0 && j < word.length(); j++) {
133 char c = word.charAt(j);
134 if (Character.isLetter(c)) {
135 first = c;
136 }
137 }
138
139 if (first != 0) {
140 retcode.append(first);
141 }
142 }
143
144 return retcode.toString();
145 }
146
147 /**
148 * <p>
149 * Splits the provided text into an array, using whitespace as the
150 * separator. Whitespace is defined by {@link Character#isWhitespace(char)}.
151 * </p>
152 *
153 * <p>
154 * The separator is not included in the returned String array. Adjacent
155 * separators are treated as one separator.
156 * </p>
157 *
158 * <p>
159 * A <code>null</code> input String returns <code>null</code>.
160 * </p>
161 *
162 * <pre>
163 * StringUtils.split(null) = null
164 * StringUtils.split("") = []
165 * StringUtils.split("abc def") = ["abc", "def"]
166 * StringUtils.split("abc def") = ["abc", "def"]
167 * StringUtils.split(" abc ") = ["abc"]
168 * </pre>
169 *
170 * @param str
171 * the String to parse, may be null
172 * @return an array of parsed Strings, <code>null</code> if null String
173 * input
174 */
175 public static String[] split(String str) {
176 return split(str, null, -1);
177 }
178
179 /**
180 * <p>
181 * Splits the provided text into an array, separator specified. This is an
182 * alternative to using StringTokenizer.
183 * </p>
184 *
185 * <p>
186 * The separator is not included in the returned String array. Adjacent
187 * separators are treated as one separator.
188 * </p>
189 *
190 * <p>
191 * A <code>null</code> input String returns <code>null</code>.
192 * </p>
193 *
194 * <pre>
195 * StringUtils.split(null, *) = null
196 * StringUtils.split("", *) = []
197 * StringUtils.split("a.b.c", '.') = ["a", "b", "c"]
198 * StringUtils.split("a..b.c", '.') = ["a", "b", "c"]
199 * StringUtils.split("a:b:c", '.') = ["a:b:c"]
200 * StringUtils.split("a\tb\nc", null) = ["a", "b", "c"]
201 * StringUtils.split("a b c", ' ') = ["a", "b", "c"]
202 * </pre>
203 *
204 * @param str
205 * the String to parse, may be null
206 * @param separatorChar
207 * the character used as the delimiter, <code>null</code> splits
208 * on whitespace
209 * @return an array of parsed Strings
210 * @since 2.0
211 */
212 public static String[] split(String str, char separatorChar) {
213 // Performance tuned for 2.0 (JDK1.4)
214
215 if (str == null) {
216 return EMPTY_STRING_ARRAY.clone();
217 }
218 int len = str.length();
219 if (len == 0) {
220 return EMPTY_STRING_ARRAY.clone();
221 }
222 List<String> list = new ArrayList<String>();
223 int i = 0;
224 int start = 0;
225 boolean match = false;
226 while (i < len) {
227 if (str.charAt(i) == separatorChar) {
228 if (match) {
229 list.add(str.substring(start, i));
230 match = false;
231 }
232 start = ++i;
233 continue;
234 }
235 match = true;
236 i++;
237 }
238 if (match) {
239 list.add(str.substring(start, i));
240 }
241 return list.toArray(new String[list.size()]);
242 }
243
244 /**
245 * <p>
246 * Splits the provided text into an array, separator specified. This is an
247 * alternative to using StringTokenizer.
248 * </p>
249 *
250 * <p>
251 * The separator is not included in the returned String array. Adjacent
252 * separators are treated individually.
253 * </p>
254 *
255 * <p>
256 * A <code>null</code> input String returns <code>null</code>.
257 * </p>
258 *
259 * <pre>
260 * StringUtils.split(null, *) = null
261 * StringUtils.split("", *) = []
262 * StringUtils.split("a.b.c", '.') = ["a", "b", "c"]
263 * StringUtils.split("a..b.c", '.') = ["a", "", "b", "c"]
264 * StringUtils.split("a:b:c", '.') = ["a:b:c"]
265 * StringUtils.split("a\tb\nc", null) = ["a", "b", "c"]
266 * StringUtils.split("a b c", ' ') = ["a", "b", "c"]
267 * </pre>
268 *
269 * @param str
270 * the String to parse, may be null
271 * @param separatorChar
272 * the character used as the delimiter, <code>null</code> splits
273 * on whitespace
274 * @return an array of parsed Strings
275 * @since 2.0
276 */
277 public static String[] splitAll(String str, char separatorChar) {
278 // Performance tuned for 2.0 (JDK1.4)
279
280 if (str == null) {
281 return EMPTY_STRING_ARRAY.clone();
282 }
283 int len = str.length();
284 if (len == 0) {
285 return EMPTY_STRING_ARRAY.clone();
286 }
287 List<String> list = new ArrayList<String>();
288 int i = 0;
289 int start = 0;
290 boolean match = false;
291 while (i < len) {
292 if (str.charAt(i) == separatorChar) {
293 list.add(str.substring(start, i));
294 start = ++i;
295 match = false;
296 continue;
297 }
298 match = true;
299 i++;
300 }
301 if (match) {
302 list.add(str.substring(start, i));
303 }
304 return list.toArray(new String[list.size()]);
305 }
306
307 /**
308 * <p>
309 * Splits the provided text into an array, separator specified. This is an
310 * alternative to using StringTokenizer.
311 * </p>
312 *
313 * <p>
314 * The separator is not included in the returned String array. Adjacent
315 * separators are treated individually.
316 * </p>
317 *
318 * <p>
319 * A <code>null</code> input String returns <code>null</code>.
320 * </p>
321 *
322 * <pre>
323 * StringUtils.split(null, *) = null
324 * StringUtils.split("", *) = []
325 * StringUtils.split("a.b.c", '.') = ["a", "b", "c"]
326 * StringUtils.split("a..b.c", '.') = ["a", "", "b", "c"]
327 * StringUtils.split("a:b:c", '.') = ["a:b:c"]
328 * StringUtils.split("a b c", ' ') = ["a", "b", "c"]
329 * </pre>
330 *
331 * @param str
332 * the String to parse, may be null
333 * @param separatorChar
334 * the character used as the delimiter
335 * @param max
336 * the maximum number of elements to include in the array. A zero
337 * or negative value implies no limit
338 * @return an array of parsed Strings
339 * @since 2.0
340 */
341 public static String[] splitAll(String str, char separatorChar, int max) {
342 // Performance tuned for 2.0 (JDK1.4)
343
344 if (str == null) {
345 return EMPTY_STRING_ARRAY.clone();
346 }
347 int len = str.length();
348 if (len == 0) {
349 return EMPTY_STRING_ARRAY.clone();
350 }
351 List<String> list = new ArrayList<String>();
352 int sizePlus1 = 1;
353 int i = 0;
354 int start = 0;
355 boolean match = false;
356 while (i < len) {
357 if (str.charAt(i) == separatorChar) {
358 if (sizePlus1++ == max) {
359 i = len;
360 }
361 list.add(str.substring(start, i));
362 start = ++i;
363 match = false;
364 continue;
365 }
366 match = true;
367 i++;
368 }
369 if (match) {
370 list.add(str.substring(start, i));
371 }
372 return list.toArray(new String[list.size()]);
373 }
374
375 /**
376 * <p>
377 * Splits the provided text into an array, separators specified. This is an
378 * alternative to using StringTokenizer.
379 * </p>
380 *
381 * <p>
382 * The separator is not included in the returned String array. Adjacent
383 * separators are treated as one separator.
384 * </p>
385 *
386 * <p>
387 * A <code>null</code> input String returns <code>null</code>. A
388 * <code>null</code> separatorChars splits on whitespace.
389 * </p>
390 *
391 * <pre>
392 * StringUtils.split(null, *) = null
393 * StringUtils.split("", *) = []
394 * StringUtils.split("abc def", null) = ["abc", "def"]
395 * StringUtils.split("abc def", " ") = ["abc", "def"]
396 * StringUtils.split("abc def", " ") = ["abc", "def"]
397 * StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"]
398 * </pre>
399 *
400 * @param str
401 * the String to parse, may be null
402 * @param separatorChars
403 * the characters used as the delimiters, <code>null</code>
404 * splits on whitespace
405 * @return an array of parsed Strings, <code>null</code> if null String
406 * input
407 */
408 public static String[] split(String str, String separatorChars) {
409 return split(str, separatorChars, -1);
410 }
411
412 /**
413 * <p>
414 * Splits the provided text into an array, separators specified. This is an
415 * alternative to using StringTokenizer.
416 * </p>
417 *
418 * <p>
419 * The separator is not included in the returned String array. Adjacent
420 * separators are treated as one separator.
421 * </p>
422 *
423 * <p>
424 * A <code>null</code> input String returns <code>null</code>. A
425 * <code>null</code> separatorChars splits on whitespace.
426 * </p>
427 *
428 * <pre>
429 * StringUtils.split(null, *, *) = null
430 * StringUtils.split("", *, *) = []
431 * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"]
432 * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"]
433 * StringUtils.split("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
434 * StringUtils.split("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
435 * </pre>
436 *
437 * @param str
438 * the String to parse, may be null
439 * @param separatorChars
440 * the characters used as the delimiters, <code>null</code>
441 * splits on whitespace
442 * @param max
443 * the maximum number of elements to include in the array. A zero
444 * or negative value implies no limit
445 * @return an array of parsed Strings
446 */
447 public static String[] split(String str, String separatorChars, int max) {
448 // Performance tuned for 2.0 (JDK1.4)
449 // Direct code is quicker than StringTokenizer.
450 // Also, StringTokenizer uses isSpace() not isWhitespace()
451
452 if (str == null) {
453 return EMPTY_STRING_ARRAY.clone();
454 }
455 int len = str.length();
456 if (len == 0) {
457 return EMPTY_STRING_ARRAY.clone();
458 }
459 List<String> list = new ArrayList<String>();
460 int sizePlus1 = 1;
461 int i = 0;
462 int start = 0;
463 boolean match = false;
464 if (separatorChars == null) {
465 // Null separator means use whitespace
466 while (i < len) {
467 if (Character.isWhitespace(str.charAt(i))) {
468 if (match) {
469 if (sizePlus1++ == max) {
470 i = len;
471 }
472 list.add(str.substring(start, i));
473 match = false;
474 }
475 start = ++i;
476 continue;
477 }
478 match = true;
479 i++;
480 }
481 } else if (separatorChars.length() == 1) {
482 // Optimize 1 character case
483 char sep = separatorChars.charAt(0);
484 while (i < len) {
485 if (str.charAt(i) == sep) {
486 if (match) {
487 if (sizePlus1++ == max) {
488 i = len;
489 }
490 list.add(str.substring(start, i));
491 match = false;
492 }
493 start = ++i;
494 continue;
495 }
496 match = true;
497 i++;
498 }
499 } else {
500 // standard case
501 while (i < len) {
502 if (separatorChars.indexOf(str.charAt(i)) >= 0) {
503 if (match) {
504 if (sizePlus1++ == max) {
505 i = len;
506 }
507 list.add(str.substring(start, i));
508 match = false;
509 }
510 start = ++i;
511 continue;
512 }
513 match = true;
514 i++;
515 }
516 }
517 if (match) {
518 list.add(str.substring(start, i));
519 }
520 return list.toArray(new String[list.size()]);
521 }
522
523 /**
524 * <p>
525 * Joins the elements of the provided array into a single String containing
526 * the provided list of elements.
527 * </p>
528 *
529 * <p>
530 * No delimiter is added before or after the list. A <code>null</code>
531 * separator is the same as an empty String (""). Null objects or empty
532 * strings within the array are represented by empty strings.
533 * </p>
534 *
535 * <pre>
536 * StringUtils.join(null, *) = null
537 * StringUtils.join([], *) = ""
538 * StringUtils.join([null], *) = ""
539 * StringUtils.join(["a", "b", "c"], "--") = "a--b--c"
540 * StringUtils.join(["a", "b", "c"], null) = "abc"
541 * StringUtils.join(["a", "b", "c"], "") = "abc"
542 * StringUtils.join([null, "", "a"], ',') = ",,a"
543 * </pre>
544 *
545 * @param array
546 * the array of values to join together, may be null
547 * @param aSeparator
548 * the separator character to use, null treated as ""
549 * @return the joined String, <code>null</code> if null array input
550 */
551 public static String join(Object[] array, String aSeparator) {
552 String separator = aSeparator;
553 if (array == null) {
554 return null;
555 }
556 if (separator == null) {
557 separator = "";
558 }
559 int arraySize = array.length;
560
561 // ArraySize == 0: Len = 0
562 // ArraySize > 0: Len = NofStrings *(len(firstString) + len(separator))
563 // (Assuming that all Strings are roughly equally long)
564 int bufSize = arraySize == 0 ? 0 : arraySize * ((array[0] == null ? 16 : array[0].toString().length()) + separator.length());
565
566 StringBuilder buf = new StringBuilder(bufSize);
567
568 for (int i = 0; i < arraySize; i++) {
569 if (i > 0) {
570 buf.append(separator);
571 }
572 if (array[i] != null) {
573 buf.append(array[i]);
574 }
575 }
576 return buf.toString();
577 }
578
579 /**
580 * An empty immutable <code>String</code> array.
581 */
582 public static final String[] EMPTY_STRING_ARRAY = new String[0];
583
584 }
585