View Javadoc

1   //
2   //Copyright (c) 2003, Caltha - Gajda, Krzewski, Mach, Potempski Sp.J.
3   //All rights reserved.
4   //
5   //Redistribution and use in source and binary forms, with or without modification, 
6   //are permitted provided that the following conditions are met:
7   //
8   //* Redistributions of source code must retain the above copyright notice, 
9   //this list of conditions and the following disclaimer.
10  //* Redistributions in binary form must reproduce the above copyright notice, 
11  //this list of conditions and the following disclaimer in the documentation 
12  //and/or other materials provided with the distribution.
13  //* Neither the name of the Caltha - Gajda, Krzewski, Mach, Potempski Sp.J. 
14  //nor the names of its contributors may be used to endorse or promote products 
15  //derived from this software without specific prior written permission.
16  //
17  //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
18  //AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
19  //WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  //IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
21  //INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
22  //BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
23  //OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
24  //WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
25  //ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
26  //POSSIBILITY OF SUCH DAMAGE.
27  //
28  
29  package org.objectledge.utils;
30  
31  import java.io.IOException;
32  import java.io.OutputStream;
33  import java.io.OutputStreamWriter;
34  import java.io.UnsupportedEncodingException;
35  import java.text.DecimalFormat;
36  import java.text.NumberFormat;
37  import java.util.ArrayList;
38  import java.util.Arrays;
39  import java.util.Iterator;
40  import java.util.LinkedHashSet;
41  import java.util.List;
42  import java.util.Locale;
43  import java.util.Map;
44  import java.util.Set;
45  import java.util.StringTokenizer;
46  
47  /**
48   * This class contains various functions for manipulating Java Strings.
49   *
50   * @author <a href="mailto:damian@caltha.pl">Damian Gajda</a>
51   * @author <a href="mailto:rafal@caltha.pl">Rafal Krzewski</a>
52   * @author <a href="mailto:pablo@caltha.pl">Pawel Potempski</a>
53   *
54   * @version $Id: StringUtils.java,v 1.42 2006/06/30 12:07:29 zwierzem Exp $
55   */
56  public class StringUtils
57  {
58      /***
59       * A private constructor to prevent instantiation of this static method only class.
60       */
61      private StringUtils()
62      {
63          // static access only
64      }
65      
66      /***
67       *  Prepares a given String to be used as a HTTP cookie name.
68     	 * 
69    	 *
70    	 * @see StringUtils#cookieNameSafeString(String, char)
71     	 * @param input Cookie name unsafe string.
72     	 * @return a modified string.
73     	 */
74      public static String cookieNameSafeString(String input)
75      {
76          return cookieNameSafeString(input, '.');
77      }
78  
79      /*** 
80       * Prepares a given String to be used as a HTTP cookie name.
81       *
82       * <p>It replaces characters used in cookies (exactly <code>;</code>
83       * semicolon, <code>,</code> comma, <code>=</code> equals, <code>$</code>
84       * and whitespace) with a given character.  If this character is equal to
85       * any of the unsafe characters <code>.</code> dot is used.</p>
86       *
87       * @param input Cookie name unsafe string.
88       * @param replaceChar Character to be used as a replacement for unsafe
89       *        characters.
90       * @return a modified string.
91       */
92      public static String cookieNameSafeString(String input, char replaceChar)
93      {
94          // check for unsafe replacement character
95          if (replaceChar == '=' || replaceChar == ',' || replaceChar == ';' || 
96              replaceChar == '$' || Character.isWhitespace(replaceChar))
97          {
98              replaceChar = '.';
99          }
100 
101         if (input != null)
102         {
103             StringBuilder sb = new StringBuilder(input);
104             int length = sb.length();
105             for (int i = 0; i < length; i++)
106             {
107                 char c = sb.charAt(i);
108                 // replace unwanted chars
109                 if (Character.isWhitespace(c) || c == '=' || c == ';' || c == ',' || c == '$' 
110                     || !(( c >= 'a' && c <= 'z') || ( c >= 'A' && c <= 'Z') || ( c >= '0' && c <= '9')))
111                 {
112                     sb.setCharAt(i, replaceChar);
113                 }
114             }
115             input = sb.toString();
116         }
117         return input;
118     }
119     
120 	/***
121 	 * Backslash escape reserved characters in a string.
122 	 *
123 	 * @param in the string to process.
124 	 * @param reserved the reserved characters.
125 	 * @return the string reserved characters escaped.
126 	 */
127 	public static String backslashEscape(String in, String reserved)
128 	{
129         if(in == null)
130         {
131             return null;
132         }
133 		StringBuilder out = new StringBuilder();
134 		StringTokenizer st = new StringTokenizer(in,reserved,true);
135 		while(st.hasMoreTokens())
136 		{
137 			String t = st.nextToken();
138 			if(t.length() == 1 && reserved.indexOf(t) >= 0)
139 			{
140 				out.append('//');
141 			}
142 			out.append(t);
143 		}               
144 		return out.toString();
145 	}
146 
147 	/***
148 	 * Escapes characters outside the US-ASCII range as Java unicode scapes
149 	 * (&#2F;uxxxx where x is a hexadecimal digit).
150 	 *
151 	 * @param s the string to process.
152 	 * @return processed string.
153 	 */
154 	public static String escapeNonASCIICharacters(String s)
155 	{
156 		StringBuilder buff = new StringBuilder();
157 		char[] chars = s.toCharArray();
158 		for(int i=0; i<chars.length; i++)
159 		{
160 			if(chars[i] < 128)
161 			{
162 				buff.append(chars[i]);
163 			}
164 			else
165 			{
166 				buff.append("//u");
167 				String ucode = Integer.toString(chars[i], 16);
168 				for(int j=4-ucode.length(); j>0; j--)
169 				{
170 					buff.append('0');
171 				}
172 				buff.append(ucode);
173 			}
174 		}
175 		return buff.toString();
176 	}
177 
178     /***
179      * Expand unicode escapes.
180      * 
181      * @param s the string to process.
182      * @return processed string.
183      * @throws IllegalArgumentException if the string contains invalid unicode
184      *   escapes.
185      */
186     public static String expandUnicodeEscapes(String s) throws IllegalArgumentException
187     {
188         StringBuilder buffer = new StringBuilder();
189         int last = 0;
190         int cur = s.indexOf("//u", last);
191         if (cur >= 0)
192         {
193             if (cur <= s.length() - 6)
194             {
195                 buffer.setLength(0);
196                 while (cur >= 0)
197                 {
198                     buffer.append(s.substring(last, cur));
199                     String ucodeStr = s.substring(cur + 2, cur + 6);
200                     try
201                     {
202                         int ucode = Integer.parseInt(ucodeStr, 16);
203                         if (Character.isDefined((char)ucode))
204                         {
205                             buffer.append((char)ucode);
206                         }
207                         else
208                         {
209                             throw new IllegalArgumentException("invalid unicode character code " +
210                                     "in an unicode escape");
211                         }
212                     }
213                     catch (NumberFormatException e)
214                     {
215                         throw new IllegalArgumentException("invalid hexadecimal number in an " +
216                                 "unicode escape");
217                     }
218                     last = cur + 6;
219                     cur = s.indexOf("//u", last);
220                 }
221             }
222             else
223             {
224                 throw new IllegalArgumentException("truncated unicode escape");
225             }
226         }
227         buffer.append(s.substring(last));
228         return buffer.toString();
229     }
230 
231     /***
232      * Performs variable substitution on a template string.
233      *
234      * <p>The variables are formed of the dollar sign followed by a digit, ranging
235      * from 1 to 9. Special variable formed of the dollar singn followed by an
236      * asterisk is substitued with the remaining values separated by
237      * commas. Dollar sign followed by another dollar sign expands to single
238      * dollar sign. If there are not enough values provided, undefined variables
239      * will be substitutes with empty strings. Examples:
240      * <table>
241      * <tr><td>template</td><td>values</td><td>result</td></tr>
242      * <tr><td>$1 rules</td><td>{"ziu"}</td><td>ziu rules</td></tr>
243      * <tr><td>$1 said "$2"</td><td>{"Fred","Foo!"}</td><td>Fred said
244      * "Foo!"</td></tr>
245      * <tr><td>"$2" said $1</td><td>{"Fred","Foo!"}</td><td>"Foo!" said
246      * Fred</td></tr>
247      * <tr><td>$1 likes the following colors:
248      * $*</td><td>{"Mike","blue","grey","cyan"}</td>
249      * <td>Mike likes the following colors: blue, grey, cyan</td></tr>
250      * <tr><td>$1 likes the following $2:
251      * $*</td><td>{"Mike","food","pizza","french fries"}</td><td>Mike likes
252      * the following food: pizza, french fries</td></tr>
253      * </table></p>
254      *
255      * @param template the template string.
256      * @param values the values of the variables.
257      * @return the output string.
258      */
259     public static String substitute(String template, String[] values)
260     {
261         StringBuilder buff = new StringBuilder();
262         int maxUsed = 0;
263         char[] t = template.toCharArray();
264         for (int i = 0; i < t.length; i++)
265         {
266             if (t[i] == '$' && i < t.length - 1)
267             {
268                 if (t[i + 1] == '$')
269                 {
270                     buff.append('$');
271                     i++;
272                 }
273                 else if (t[i + 1] > '0' && t[i + 1] < '9')
274                 {
275                     int v = t[i + 1] - '0';
276                     if (v - 1 < values.length)
277                     {
278                         buff.append(values[v - 1]);
279                     }
280                     if (v > maxUsed)
281                     {
282                         maxUsed = v;
283                     }
284                     i++;
285                 }
286                 else if (t[i + 1] == '*')
287                 {
288                     if (maxUsed < values.length)
289                     {
290                         for (int v = maxUsed; v < values.length; v++)
291                         {
292                             buff.append(values[v]);
293                             buff.append(", ");
294                         }
295                         buff.setLength(buff.length() - 2);
296                     }
297                     i++;
298                 }
299                 else
300                 {
301                     buff.append('$');
302                 }
303             }
304             else
305             {
306                 buff.append(t[i]);
307             }
308         }
309         return buff.toString();
310     }
311     
312 	/***
313 	 * Build a locale from string.
314 	 *
315 	 * @param name a string representation of a locale
316 	 * @return a <code>Locale</code> object
317 	 */
318 	public static Locale getLocale(String name)
319 	{
320         String[] t = name.split("_");
321         switch(t.length)
322         {
323         case 1:
324             return new Locale(t[0]);
325         case 2:
326             return new Locale(t[0], t[1]);
327         case 3:
328             return new Locale(t[0], t[1], t[2]);
329         default:
330             StringBuilder v = new StringBuilder();
331             for(int i = 2 ; i < t.length; i++)
332             {
333                 v.append(t[i]);
334                 if(i < t.length - 1)
335                 {
336                     v.append('_');
337                 }
338             }
339             return new Locale(t[0], t[1], v.toString());
340         }
341 	}
342 	
343 	/***
344 	 * Determines the number of bytes the string will ocuppy in a specifc 
345 	 * character encoding.
346 	 * 
347 	 * @param string the string.
348 	 * @param encoding the requested encoding.
349 	 * @return the size of the string.
350 	 * @throws IOException if happens.
351 	 */
352 	public static int getByteCount(String string, String encoding) 
353 		throws IOException
354 	{
355 		if(encoding.startsWith("ISO-8859"))
356 		{
357 			return string.length();
358 		}
359 		if(encoding.equals("UTF-16"))
360 		{
361 			return string.length() * 2;
362 		}
363 		if(string.length() < 65536)
364 		{
365 			byte[] bytes = string.getBytes(encoding);
366 			return bytes.length;
367 		}
368 		else
369 		{
370 			CountOutputStream counter = new CountOutputStream();
371             OutputStreamWriter writer = null; 
372             try
373             {
374                 writer = new OutputStreamWriter(counter, encoding);
375                 writer.write(string);
376             }
377             finally
378             {
379                 ///CLOVER:OFF
380                 if(writer != null)
381                 {
382                     writer.close();
383                 }
384                 ///CLOVER:ON
385             }
386 			return counter.getCount();
387 		}
388 	}
389 	
390 	/***
391 	 * Helper class to count the string length. 
392 	 */
393 	private static class CountOutputStream
394 		extends OutputStream
395 	{
396 		private int count;
397 
398         ///CLOVER:OFF
399 		/* overriden */
400 		public void write(int b) throws IOException
401 		{
402 			count++;
403 		}
404         
405 		/* overiden */
406 		public void write(byte[] b) throws IOException
407 		{
408 			count += b.length;
409 		}
410         ///CLOVER:ON
411 
412 		/* overiden */
413 		public void write(byte[] b, int offset, int length) throws IOException
414 		{
415 			count += length;
416 		}
417         
418 		public int getCount()
419 		{
420 			return count;
421 		}
422 	}  
423     
424     /***
425      * Appends the specified number of space charcter to a string buffer and returns it.
426      * 
427      * @param buffer the buffer.
428      * @param d number of spaces.
429      * @return the buffer.
430      */
431     public static StringBuilder indent(StringBuilder buffer, int d)
432     {
433         for(int i=0; i<d; i++)
434         {
435             buffer.append(' ');
436         }
437         return buffer;
438     }
439 
440     /***
441      * Fill the string to the expected length with the specified filling character.
442      * Filling charaters will be added at the beginning of this input string.
443      * 
444      * @param input the input string.
445      * @param total the expected length of result string.
446      * @param filling the filling character.
447      * @return the result string.
448      */
449     public static String fillString(String input,int total,char filling)
450     {
451         StringBuilder sb = new StringBuilder();
452         int missing = total-input.length();
453         for(int i=0; i<missing; i++)
454         {
455             sb.append(filling);
456         }
457         sb.append(input);
458         return sb.toString();
459     }
460     
461     /*** 
462      * Creates ascii based unicode representation of the string.
463      * Each unicode character of the input will be tranformed to 8 ascii 
464      * characters in the following format:
465      * "\"&lt;octal lower byte&gt;"\"&lt;octal higher byte&gt;
466      * i.e. the output string looks as follows:
467      * "\124\000\102\001\165\000\155\000".
468      *
469      * @param input the input string.
470      * @return the output.
471      */
472     public static String toOctalUnicode(String input)
473     {
474         StringBuilder sb = new StringBuilder();
475         for(int i = 0; i < input.length(); i++)
476         {
477             int value = input.charAt(i);
478             String upper = fillString(Integer.toOctalString(value/256), 3, '0');
479             String lower = fillString(Integer.toOctalString(value%256), 3, '0');            
480             sb.append("//");
481             sb.append(lower);
482             sb.append("//");
483             sb.append(upper);
484         }
485         return sb.toString();
486     }    
487 
488     /***
489      * Wrap the text to the specified number of columns.
490      *
491      * <p>The input string is expected to be a series of lines of text
492      * delimeted by \n characters. The output string contains the text
493      * reformatted in such way that each line is at most <code>width</code>
494      * characters wide. For each line of input text that is longer than the
495      * limit, last whitespace character before the limit is searched, and is
496      * replaced by a newline. Any whitespace characters immediately following
497      * that character are discarded. If the input text contains a sequence of 
498      * non-whitespace characters longer than the specified limit, the sequence
499      * will be broken by newlines to fit in the limit.</p>
500      * 
501      * @param in the text to format.
502      * @param width the width of the output text.
503      * @return wrapped text.
504      */
505     public static String wrap(String in, int width)
506     {
507         if(in.length() <= width)
508         {
509             return in;
510         }
511         StringTokenizer st = new StringTokenizer(in, "\n");
512         StringBuilder out = new StringBuilder();
513         StringBuilder lineOut = new StringBuilder();
514         String line;
515         int a,b;
516         while(st.hasMoreTokens())
517         {
518             line = st.nextToken();
519             if(line.length() <= width)
520             {
521                 out.append(line).append('\n');
522                 continue;
523             }
524             lineOut.setLength(0);
525             a = 0;
526             b = width;
527             while(b < line.length())
528             {
529                 while(b > a && !Character.isWhitespace(line.charAt(b)))
530                 {
531                     b--;
532                 }
533                 if(b == a)
534                 {
535                     b = a + width;
536                 }
537                 lineOut.append(line.substring(a,b)).append('\n') ;
538                 a = b;
539                 while(a < line.length() && Character.isWhitespace(line.charAt(a)))
540                 {
541                     a++;
542                 }
543                 b = a + width;
544             }
545             lineOut.append(line.substring(a));
546             out.append(lineOut);
547         }
548         return out.toString();
549     }
550     
551     /***
552      * Justifies a list of strings.
553      * 
554      * @param strings the strings.
555      * @param w the width of the text column.
556      */
557     public static String justify(List<String> strings, int w)
558     {
559         StringBuilder buff = new StringBuilder(w);
560         int t = 0;
561         for(int i=0; i<strings.size(); i++)
562         {
563             t += strings.get(i).length();
564         }
565         int s = w - t;
566         int g = strings.size() - 1;
567         if(g < 0)
568         {
569             // do nothing
570         }
571         else if(g == 0)
572         {
573             buff.append(strings.get(0));
574         }
575         else if(s <= g)
576         {
577             for(int i=0; i<strings.size(); i++)
578             {
579                 buff.append(strings.get(i).trim());
580                 buff.append(' ');
581             }
582         }
583         else
584         {
585             int gw = s / g;
586             int d = s % g > 0 ? g / (s % g) : 0; 
587             for(int i=0; i<strings.size(); i++)
588             {
589                 buff.append(strings.get(i).trim());
590                 for(int j=0; j<gw; j++)
591                 {
592                     buff.append(' ');
593                 }
594                 if(d > 0 && i % d == 0)
595                 {
596                     buff.append(' ');
597                 }
598             }
599         }
600         return buff.toString();
601     }
602 
603     /***
604      * Shorten the string to the specifed lenght.
605      * 
606      * <p>If the string is shorter than the maxLength limit it is returned intact. If the string is 
607      * longer,it will be truncated and the suffix will be added. If truncation is necessary, the 
608      * preferred truncation point is the last punctuation character before the limit, unless it
609      * occurs later than minLength in the string. In this case the prefferred truncation point
610      * is the last whitespace before the maxLength limit unless it occurs later than minLength.
611      * At last resort, the string is truncated at maxLenght limit.</p>
612      * 
613      * @param source the string to process.
614      * @param minLength minimum length of the shortened string.
615      * @param maxLength maximum length limit.
616      * @param suffix suffix to add if the string is actually shortened. Should be " ..." or
617      * " \u2026" (using Unicode horizontal ellipsis glyph).
618      * @return shortened string.
619      */
620     public static String shortenString(String source, int minLength, int maxLength, String suffix)
621     {
622         if(source == null || maxLength >= source.length())
623         {
624             return source;
625         }
626         // try to find a punctuation character before length limit
627         int i;
628         for(i = maxLength - 1; i >=0 ; i--)
629         {
630             int type = Character.getType(source.charAt(i));
631             if(type >= Character.DASH_PUNCTUATION && type <= Character.OTHER_PUNCTUATION)
632             {
633                 break;
634             }
635         }
636         if(i < minLength)
637         {
638             // try to find a whitespace chracter before length limit
639             for(i = maxLength - 1; i >=0 ; i--)
640             {
641                 if(Character.isWhitespace(source.charAt(i)))
642                 {
643                     break;
644                 }
645             }
646         }
647         if(i < minLength)
648         {
649             i = maxLength;
650         }
651         return source.substring(0, i) + suffix;
652     }
653     
654     // pathnames ////////////////////////////////////////////////////////////
655     
656     /***
657      * Normalizes a pathname.
658      *
659      * <p>This method removes redundant / characters, removes . and .. path elements,
660      * taking care that the paths dont reach outside filesystem root, removes trailing /
661      * from directories and adding leading / as neccessary.</p>
662      * 
663      * @param path the path.
664      * @return normalized path.
665      * @throws IllegalArgumentException if the path reaches outside the filesystem root.
666      */
667     public static String normalizedPath(String path)
668         throws IllegalArgumentException
669     {
670         if(path.length()==0 || path.equals("/"))
671         {
672             return "/";
673         }
674         StringTokenizer st = new StringTokenizer(path, "/");
675         ArrayList<String> temp = new ArrayList<String>(st.countTokens());
676         while(st.hasMoreTokens())
677         {
678             String t = st.nextToken();
679             if(t.equals("."))
680             {
681                 continue;
682             }
683             else if(t.equals(".."))
684             {
685                 if(temp.isEmpty())
686                 {
687                     throw new IllegalArgumentException("path outside filesystem root: "+path);  
688                 }
689                 else
690                 {
691                     temp.remove(temp.size()-1);
692                 }
693             }
694             else
695             {
696                 temp.add(t);
697             }
698         }
699         StringBuilder sb = new StringBuilder();
700         for(int i=0; i<temp.size(); i++)
701         {
702             sb.append('/').append(temp.get(i));
703         }
704         return sb.toString();
705     }
706     
707     /***
708      * Returns the base name of a file.
709      * 
710      * <p>This method returns the contents of the pathname after the last '/' 
711      * character. </p>
712      *
713      * @param path the pathname of the file.
714      * @return the basename of the file.     
715      */
716     public static String basePath(String path)
717     {
718         int pos = path.lastIndexOf('/');
719         if(pos < 0)
720         {
721             return path;
722         }
723         else
724         {
725             return path.substring(pos+1);
726         }
727     }
728     
729     /***
730      * Returns hte directory name of a file.
731      * 
732      * <p>This method returns the normalized path before the last '/' character
733      * in the path.</p>
734      * 
735      * @param path the pathname of the file.
736      * @return the directory name of the file.   
737      */
738     public static String directoryPath(String path)
739     {
740         path = normalizedPath(path);
741         return path.substring(0, path.lastIndexOf('/'));        
742     }
743     
744     /***
745      * Returns the relative pathname of a file with respect to given
746      * base directory.
747      *
748      * @param path the pathname of a file.
749      * @param base the base pathname.
750      * @return the relative pathname.
751      * @throws IllegalArgumentException if the file is contained
752      *         outside of base.
753      */
754     public static String relativePath(String path, String base)
755         throws IllegalArgumentException
756     {
757         base = normalizedPath(base);
758         path = normalizedPath(path);
759         if(!path.startsWith(base))
760         {
761             throw new IllegalArgumentException(path+" is not contained in "+base);
762         }
763         return path.substring(base.length());
764     }    
765     
766     /***
767      * Expand macros in a string.
768      *  
769      * @param s the String to process.
770      * @param t the macros (token -&gt; value)
771      * @return an expanded String
772      */
773     public static String expand(String s, Map t)
774     {
775         if(t==null || t.size()==0)
776         {
777             return s;
778         }
779         StringBuilder buff = new StringBuilder(s.length());
780         Iterator keys = t.keySet().iterator();
781         int pos, lastpos;
782         String k, v;
783         while(keys.hasNext())
784         {
785             k = (String)keys.next();
786             pos = s.indexOf(k);
787             if(pos < 0)
788             {
789                 continue;
790             }
791             lastpos = 0;
792             v = (String)t.get(k);
793             buff.setLength(0);
794             while(pos >= 0)
795             {
796                 buff.append(s.substring(lastpos, pos));
797                 buff.append(v);
798                 lastpos = pos+k.length();
799                 pos = s.indexOf(k, lastpos);
800             }
801             buff.append(s.substring(lastpos));
802             s = buff.toString();
803         }
804         return s;
805     }
806     
807     /***
808      * Escapes xml characters.
809      *
810      * @param string a string to escape
811      * @return the processed string.
812      */
813     public static String escapeXMLCharacters(String string)
814     {
815         StringBuilder sb = new StringBuilder();
816         for (int i = 0; i < string.length(); i++)
817         {
818             char c = string.charAt(i);
819             switch (c)
820             {
821                 case '<' :
822                     sb.append("&lt;");
823                     break;
824                 case '>':
825                     sb.append("&gt;");
826                     break;
827                 case '&':
828                     sb.append("&amp;");
829                     break;
830                 default:
831                     sb.append(c);
832             }
833         }
834         return sb.toString();
835     }
836     
837     /***
838      * Convert newlines in the string into &lt;br/&gt; tags.
839      *
840      * @param s the string to process.
841      * @return processed string.
842      */
843     public static String htmlLineBreaks(String s)
844     {
845         if(s == null)
846         {
847             return "";
848         }
849         StringBuilder out = new StringBuilder();
850         char[] chars = s.toCharArray();
851         for(int i=0; i<chars.length; i++)
852         {
853             if(chars[i] == '\n')
854             {
855                 out.append("<br />");
856             }
857             else if(chars[i] == '\r')
858             {
859                 if(i<chars.length-1 && chars[i+1] == '\n')
860                 {
861                     i++;
862                 }
863                 out.append("<br />");
864             }
865             else
866             {
867                 out.append(chars[i]);
868             }
869         }
870         return out.toString();
871     }
872     
873     /***
874      * Returns human readable representation of interval value in days, hours etc.
875      * 
876      * @param interval in seconds.
877      * @return human readable interval specification.
878      */
879     public static String formatInterval(long interval)
880     {
881         long days = interval / (24 * 60 * 60);
882         interval -= days * 24 * 60 * 60;
883         long hours = interval / (60 * 60);
884         interval -= hours * 60 * 60;
885         long minutes = interval / 60;
886         interval -= minutes * 60;
887         long seconds = interval;
888         StringBuffer buff = new StringBuffer();
889         if(days > 0)
890         {
891             buff.append(days).append(" days, ");
892         }
893         if(days > 0 || hours > 0)
894         {
895             buff.append(hours).append(" hours, ");
896         }
897         if(days > 0 || hours > 0 || minutes > 0)
898         {
899             buff.append(minutes).append(" minutes, ");
900         }
901         buff.append(seconds).append(" seconds");
902         return buff.toString();
903     }
904 
905     /***
906      * Format a millisecond interval as number of seconds (with fracitonal part).
907      * 
908      * @param interval interval in milliseconds. 
909      * @return interval as number of seconds (with fracitonal part).
910      */
911     public static String formatMilliIntervalAsSeconds(long interval)
912     {
913         long seconds = interval / 1000;
914         long millis = interval - seconds * 1000;
915         
916         StringBuilder buff = new StringBuilder();
917         buff.append(seconds).append(".");
918         buff.append(millis).append("s");
919         return buff.toString();
920     }
921     
922     /***
923      * Renders a human readable event rate esitmation.
924      * 
925      * @param events number of events.
926      * @param time timespan in seconds.
927      * @param event event name.
928      * @return a human readable event rate esitmation.
929      */
930     public static String formatRate(double events, double time, String event)
931     {
932         StringBuffer buff = new StringBuffer();
933         NumberFormat format = new DecimalFormat("#.##");
934         if(events > time)
935         {
936             buff.append(format.format(events/time)+" "+event+"s / 1s on average");
937         }
938         else
939         {
940             double interval = time/events;
941             int d = (int)(interval / (24 * 3600));
942             interval -= d * 24 * 3600;
943             int h = (int)(interval / 3600);
944             interval -= h * 3600;
945             int m = (int)(interval / 60);
946             interval -= m * 60;
947             buff.append("1 "+event+" / ");
948             if(d > 0)
949             {
950                 buff.append(d+"d ");
951             }
952             if(h > 0 || d > 0)
953             {
954                 buff.append(h+"h ");
955             }
956             if(m > 0 || h > 0 || d > 0)
957             {
958                 buff.append(m+"m ");
959             }
960             buff.append(format.format(interval)+" s on average");
961         }                
962         return buff.toString();
963     }    
964     
965     /***
966      * Checks if a given string is <code>null</code> or empty.
967      * 
968      * @param str string to be checked.
969      * @return true if the string is <code>null</code> or empty.
970      */
971     public static boolean isEmpty(String str)
972     {
973         if( str == null)
974         { 
975             return true;
976         }
977         if( str.length()==0)
978         {
979             return true;
980         }
981         return false;
982     }
983 
984     /***
985      * Capitalize given string by transforming the first character to upper case.
986      * 
987      * @param str the string.
988      * @return the capitalized string.
989      */
990     public static String capitalize(String str)
991     {
992         StringBuilder buff = new StringBuilder(str.length());
993         buff.append(Character.toUpperCase(str.charAt(0)));
994         buff.append(str.substring(1));
995         return buff.toString();
996     }
997     
998     public static final String UTF_8_ENCODING = "UTF-8";
999 
1000     /***
1001      * Convert from UTF-8 bytes to a String.
1002      * 
1003      * @param bytes UTF-8 bytes.
1004      * @return a String.
1005      */
1006     public static String fromUTF8(byte[] bytes)
1007     {
1008         if(bytes == null)
1009         {
1010             return null;
1011         }
1012         try
1013         {
1014             return new String(bytes, UTF_8_ENCODING);
1015         }
1016         catch(UnsupportedEncodingException e)
1017         {
1018             throw new IllegalStateException("UTF-8 not supported?", e);
1019         }
1020     }
1021 
1022     /***
1023      * Convert from a String to UTF-8 bytes.
1024      * 
1025      * @param string String.
1026      * @return UTF-8 bytes.
1027      */
1028     public static byte[] toUTF8(String string)
1029     {
1030         if(string == null)
1031         {
1032             return null;
1033         }        
1034         try
1035         {
1036             return string.getBytes(UTF_8_ENCODING);
1037         }
1038         catch(UnsupportedEncodingException e)
1039         {
1040             throw new IllegalStateException("UTF-8 not supported?", e);
1041         }
1042     }    
1043     
1044     /***
1045      * Append strings to the end of a string array.
1046      * 
1047      * @param values original array.
1048      * @param additional strings to be added.
1049      * @return new array containg combined values.
1050      */
1051     public static String[] push(String[] values, String ... additional)
1052     {
1053         String[] result = new String[values.length + additional.length];
1054         System.arraycopy(values, 0, result, 0, values.length);
1055         System.arraycopy(additional, 0, result, values.length, additional.length);
1056         return result;
1057     }
1058     
1059     /***
1060      * Remove strings from the end of a string array.
1061      * 
1062      * @param values original array.
1063      * @param n the number of strings to be removed.
1064      * @return new array containt smallr number of values.
1065      */
1066     public static String[] pop(String[] values, int n)
1067     {
1068         String[] result = new String[values.length - n];
1069         System.arraycopy(values, 0, result, 0, values.length - n);
1070         return result;
1071     }
1072 
1073     /***
1074      * Splits the string by the given separator and returns results as a set of strings.
1075      * The set is ordered (LinkedHashSet). 
1076      * @param in the input string
1077      * @param separator the delimiter
1078      * @return the set of strings.
1079      */
1080     public static Set<String> split(String in, String separator)
1081     {
1082         if(in == null || in.equals(""))
1083         {
1084             return new LinkedHashSet<String>();
1085         }
1086         Set<String> set = new LinkedHashSet<String>(in.length() / 2);
1087         set.addAll(Arrays.asList(in.split(separator)));
1088         return set;
1089     }
1090     
1091     /***
1092      * Joins a set of strings comobinig them usign a given separator.
1093      *  
1094      * @param strings the input set of strings.
1095      * @param separator the joining string.
1096      * @return the resulting string.
1097      */
1098     public static String join(Set<String> strings, String separator)
1099     {
1100         StringBuilder b = new StringBuilder(256);
1101         int i = 0;
1102         for (String str : strings)
1103         {
1104             if(str != null && str.length() > 0)
1105             {
1106                 if(i > 0)
1107                 {
1108                     b.append(separator);
1109                 }
1110                 b.append(str);
1111                 i++;
1112             }
1113         }
1114         return b.toString();
1115     }
1116 
1117     
1118     /***
1119      * Format size value in <code>B</code>, <code>kB</code>, <code>MB</code>,
1120      * for example <code>15kB</code> or <code>23.5MB</code>.
1121      *
1122      * @param value the size in bytes.
1123      * @param precision number of digits in decimal fraction.
1124      * @return the size as string with a proper unit suffix.
1125      */
1126     public static String bytesSize(long value, int precision)
1127     {        
1128         StringBuilder b = new StringBuilder();
1129         if(value < 1024L)
1130         {
1131             return b.append(value).append("B").toString();
1132         }
1133         double floatValue = value;
1134         if(value < 1048576L)
1135         {
1136             b.append(floatValue/1024.0);
1137             cutDigits(precision, b);
1138             return b.append("kB").toString();
1139         }
1140         b.append(floatValue/1048576.0);
1141         cutDigits(precision, b);
1142         return b.append("MB").toString();        
1143     }
1144 
1145     private static void cutDigits(int precision, StringBuilder b)
1146     {
1147         int index = b.indexOf(".");
1148         if(index != -1 && b.length() > index+precision)
1149         {
1150             b.setLength(index+precision);
1151         }
1152     }
1153 
1154     private static enum ByteSizeState
1155     {
1156         START, BYTE, PREFRACTION, FRACTION, NUMBER, ERROR
1157     }
1158     
1159     /***
1160      * Parse a size value given as a number with <code>B</code>, <code>kB</code>, <code>MB</code>
1161      * suffix, for example <code>15kB</code> or <code>23.5MB</code>.
1162      *
1163      * @param value the size as string with a proper unit suffix.
1164      * @return the size in bytes.
1165      */
1166     public static long parseBytesSize(String value)
1167     {
1168         if(isEmpty(value))
1169         {
1170             return -1L; // error
1171         }
1172         
1173         value = value.toLowerCase().trim();
1174         long multiplier = 1L;
1175         long order = 1L;
1176         long size = 0L;
1177         long sizeFraction = 0L;
1178         ByteSizeState state = ByteSizeState.START;
1179         for (int i = value.length() - 1; i >= 0 && state != ByteSizeState.ERROR; i--)
1180         {
1181             char c = value.charAt(i);
1182             switch(state)
1183             {
1184             case START:
1185                 if(c == 'b')
1186                 {
1187                     state = ByteSizeState.BYTE;
1188                 }
1189                 else if(c >= '0' && c <= '9')
1190                 {
1191                     state = ByteSizeState.FRACTION;
1192                     size = (c - '0');
1193                     order = 10L;
1194                 }
1195                 else
1196                 {
1197                     state = ByteSizeState.ERROR;
1198                 }
1199                 break;
1200             case BYTE:
1201                 state = ByteSizeState.PREFRACTION;
1202                 if(c == 'k') multiplier = 1024L;
1203                 else if(c == 'm') multiplier = 1048576L;
1204                 else if(c == 'g') multiplier = 1073741824L;
1205                 else if(c == ' ' || c == '\t')
1206                 {
1207                     // PREFRACTION
1208                 }
1209                 else if(c >= '0' && c <= '9')
1210                 {
1211                     size = (c - '0');
1212                     order = 10L;
1213                     state = ByteSizeState.FRACTION;
1214                 }
1215                 else
1216                 {
1217                     state = ByteSizeState.ERROR;
1218                 }
1219                 break;
1220             case PREFRACTION:
1221                 if(c >= '0' && c <= '9')
1222                 {
1223                     state = ByteSizeState.FRACTION;
1224                     size = (c - '0');
1225                     order = 10L;
1226                 }
1227                 else if(c == ' ' || c == '\t')
1228                 {
1229                     // keep the state
1230                 }
1231                 else
1232                 {
1233                     state = ByteSizeState.ERROR;
1234                 }
1235                 break;
1236             case FRACTION:
1237                 if(c >= '0' && c <= '9')
1238                 {
1239                     size += (c - '0') * order;
1240                     order *= 10L;
1241                 }
1242                 else if(c == ',' || c =='.')
1243                 {
1244                     state = ByteSizeState.NUMBER;
1245                     sizeFraction = Math.round((double)(size * multiplier) / (double)order); 
1246                                     // yes, very big sizes will be wrong
1247                     size = 0L;
1248                     order = 1L;
1249                 }
1250                 else
1251                 {
1252                     state = ByteSizeState.ERROR;
1253                 }
1254                 break;
1255             case NUMBER:
1256                 if(c >= '0' && c <= '9')
1257                 {
1258                     size += (c - '0') * order;
1259                     order *= 10;
1260                 }
1261                 else
1262                 {
1263                     state = ByteSizeState.ERROR;
1264                 }
1265                 break;
1266             default:
1267                 state = ByteSizeState.ERROR;
1268                 
1269             }
1270         }
1271         
1272         if(state == ByteSizeState.FRACTION || state == ByteSizeState.NUMBER)
1273         {
1274             return size * multiplier + sizeFraction;
1275         }
1276         else
1277         {
1278             return -1L; // error
1279         }
1280     }
1281     
1282 }
1283