View Javadoc

1   // 
2   //Copyright (c) 2003, 2004 Caltha - Gajda, Krzewski, Mach, Potempski Sp.J. 
3   //All rights reserved. 
4   //   
5   //Redistribution and use in source and binary forms, with or without modification,  
6   //are permitted provided that the following conditions are met: 
7   //   
8   //* Redistributions of source code must retain the above copyright notice,  
9   //this list of conditions and the following disclaimer. 
10  //* Redistributions in binary form must reproduce the above copyright notice,  
11  //this list of conditions and the following disclaimer in the documentation  
12  //and/or other materials provided with the distribution. 
13  //* Neither the name of the Caltha - Gajda, Krzewski, Mach, Potempski Sp.J.  
14  //nor the names of its contributors may be used to endorse or promote products  
15  //derived from this software without specific prior written permission. 
16  // 
17  //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  
18  //AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED  
19  //WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
20  //IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,  
21  //INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,  
22  //BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 
23  //OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,  
24  //WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)  
25  //ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE  
26  //POSSIBILITY OF SUCH DAMAGE. 
27  //
28  
29  package org.objectledge.encodings;
30  
31  import java.io.ByteArrayOutputStream;
32  import java.io.IOException;
33  import java.io.OutputStreamWriter;
34  import java.io.UnsupportedEncodingException;
35  import java.io.Writer;
36  
37  /**
38   * Tool for encoding URLs in a less restrictive manner, ie. allowing / (slash) character in encoded
39   * values, what is useful for putting path values in Query String fields.
40   *
41   * @author <a href="mailto:dgajda@caltha.pl">Damian Gajda</a>
42   * @version $Id: URLEncoder.java,v 1.7 2006/02/08 18:21:20 zwierzem Exp $
43   */
44  public class URLEncoder
45  {
46      private static final boolean[] PASS_THROUGH =  new boolean[256];
47      private static final String HEX_CHARS = "0123456789ABCDEF";
48  
49      static
50      {
51          for (int i = 0; i < 256; i++)
52          {
53              PASS_THROUGH[i] = false;
54          }
55          for (int i = 'a'; i <= 'z'; i++)
56          {
57              PASS_THROUGH[i] = true;
58          }
59          for (int i = 'A'; i <= 'Z'; i++)
60          {
61              PASS_THROUGH[i] = true;
62          }
63          for (int i = '0'; i <= '9'; i++)
64          {
65              PASS_THROUGH[i] = true;
66          }
67          PASS_THROUGH['-'] = true;
68          PASS_THROUGH['_'] = true;
69          PASS_THROUGH['.'] = true;
70          PASS_THROUGH['*'] = true;
71          PASS_THROUGH['/'] = true;
72      }
73  
74      /***
75       * Encodes a given text as a query string value with UTF-8 encoding.
76       *
77       * @param text Text to be encoded
78       * @param encodingName name of a chosen encoding.
79       * @return encoded text
80       * @throws UnsupportedEncodingException if the requested encoding is not supported.
81       */
82      public String encodeQueryStringValue(String text, String encodingName)
83          throws UnsupportedEncodingException
84      {
85          return encode(text, encodingName, true);
86      }
87  
88      /***
89       * Encodes a given text as a query string value with UTF-8 encoding.
90       *
91       * @param text Text to be encoded
92       * @param encodingName name of a chosen encoding.
93       * @return encoded text
94       * @throws UnsupportedEncodingException if the requested encoding is not supported.
95       */
96      public String encodeContentPath(String text, String encodingName)
97          throws UnsupportedEncodingException
98      {
99          return encode(text, encodingName, false);
100     }
101     
102     /***
103      * Encodes a given text as a query string value or content path with UTF-8 encoding.
104      *
105      * @param text Text to be encoded
106      * @param encodingName name of a chosen encoding.
107      * @param isQSValue <code>true</code> for encoding a query string value
108      * @return encoded text
109      * @throws UnsupportedEncodingException if the requested encoding is not supported.
110      */
111     private String encode(String text, String encodingName, boolean isQSValue)
112         throws UnsupportedEncodingException
113     {
114         if(text == null)
115         {
116             return null;
117         }
118         if(text.length() == 0)
119         {
120             return text;
121         }
122 
123         int length = text.length();
124         StringBuilder outputBuf = new StringBuilder(length*2);
125         
126         ByteArrayOutputStream os = new ByteArrayOutputStream(10); 
127         Writer encodingWriter = new OutputStreamWriter(os,encodingName);
128         byte[] bytes = null;
129         
130         // convert string
131         for (int i=0; i < length; i++)
132         {
133             // get a character from input String
134             char c = text.charAt(i);
135             
136             if(c < 256 && PASS_THROUGH[c])
137             {
138                 outputBuf.append(c);
139             }
140             else if(c == ' ')
141             {
142                 if(isQSValue)
143                 {
144                     outputBuf.append('+');
145                 }
146                 else
147                 {
148                     outputBuf.append("%20");
149                 }
150             }
151             else
152             {
153                 try
154                 {
155                     // encode using encoding and store as hex string
156                     encodingWriter.write(c);
157 
158                     // check if this is a two character being :) from new unicode
159                     if(c >= 0xd800 && c <= 0xdbff && i+1 < length) // - high surrogate area
160                     {
161                         char c2 = text.charAt(i+1);
162                         if(c2 >= 0xd800 && c2 <= 0xdbff) // - low surrogate area
163                         {
164                             encodingWriter.write(c2);
165                             // avoid reading character c2 again
166                             i++;
167                         }
168                     }
169     
170                     // dump encoded bytes
171                     encodingWriter.flush();
172                     bytes = os.toByteArray();
173                     // convert bytes to hex strings
174                     for (int j=0; j < bytes.length; j++)
175                     {
176                         outputBuf.append('%');
177                         outputBuf.append(HEX_CHARS.charAt((bytes[j] >> 4) & 0xf));
178                         outputBuf.append(HEX_CHARS.charAt(bytes[j] & 0xf));
179                     }
180                 }
181                 catch(IOException e)
182                 {
183                     // reset the OutputStream in finally section
184                 }
185                 finally
186                 {
187                     // clean accumulated bytes
188                     os.reset();
189                 }
190             }
191         }
192         return outputBuf.toString();
193     }
194 }