1 //
2 //Copyright (c) 2003, 2004 Caltha - Gajda, Krzewski, Mach, Potempski Sp.J.
3 //All rights reserved.
4 //
5 //Redistribution and use in source and binary forms, with or without modification,
6 //are permitted provided that the following conditions are met:
7 //
8 //* Redistributions of source code must retain the above copyright notice,
9 //this list of conditions and the following disclaimer.
10 //* Redistributions in binary form must reproduce the above copyright notice,
11 //this list of conditions and the following disclaimer in the documentation
12 //and/or other materials provided with the distribution.
13 //* Neither the name of the Caltha - Gajda, Krzewski, Mach, Potempski Sp.J.
14 //nor the names of its contributors may be used to endorse or promote products
15 //derived from this software without specific prior written permission.
16 //
17 //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 //AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 //WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 //IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21 //INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 //BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
23 //OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24 //WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 //ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 //POSSIBILITY OF SUCH DAMAGE.
27 //
28
29 package org.objectledge.encodings;
30
31 import java.io.ByteArrayOutputStream;
32 import java.io.IOException;
33 import java.io.OutputStreamWriter;
34 import java.io.UnsupportedEncodingException;
35 import java.io.Writer;
36
37 /**
38 * Tool for encoding URLs in a less restrictive manner, ie. allowing / (slash) character in encoded
39 * values, what is useful for putting path values in Query String fields.
40 *
41 * @author <a href="mailto:dgajda@caltha.pl">Damian Gajda</a>
42 * @version $Id: URLEncoder.java,v 1.7 2006/02/08 18:21:20 zwierzem Exp $
43 */
44 public class URLEncoder
45 {
46 private static final boolean[] PASS_THROUGH = new boolean[256];
47 private static final String HEX_CHARS = "0123456789ABCDEF";
48
49 static
50 {
51 for (int i = 0; i < 256; i++)
52 {
53 PASS_THROUGH[i] = false;
54 }
55 for (int i = 'a'; i <= 'z'; i++)
56 {
57 PASS_THROUGH[i] = true;
58 }
59 for (int i = 'A'; i <= 'Z'; i++)
60 {
61 PASS_THROUGH[i] = true;
62 }
63 for (int i = '0'; i <= '9'; i++)
64 {
65 PASS_THROUGH[i] = true;
66 }
67 PASS_THROUGH['-'] = true;
68 PASS_THROUGH['_'] = true;
69 PASS_THROUGH['.'] = true;
70 PASS_THROUGH['*'] = true;
71 PASS_THROUGH['/'] = true;
72 }
73
74 /***
75 * Encodes a given text as a query string value with UTF-8 encoding.
76 *
77 * @param text Text to be encoded
78 * @param encodingName name of a chosen encoding.
79 * @return encoded text
80 * @throws UnsupportedEncodingException if the requested encoding is not supported.
81 */
82 public String encodeQueryStringValue(String text, String encodingName)
83 throws UnsupportedEncodingException
84 {
85 return encode(text, encodingName, true);
86 }
87
88 /***
89 * Encodes a given text as a query string value with UTF-8 encoding.
90 *
91 * @param text Text to be encoded
92 * @param encodingName name of a chosen encoding.
93 * @return encoded text
94 * @throws UnsupportedEncodingException if the requested encoding is not supported.
95 */
96 public String encodeContentPath(String text, String encodingName)
97 throws UnsupportedEncodingException
98 {
99 return encode(text, encodingName, false);
100 }
101
102 /***
103 * Encodes a given text as a query string value or content path with UTF-8 encoding.
104 *
105 * @param text Text to be encoded
106 * @param encodingName name of a chosen encoding.
107 * @param isQSValue <code>true</code> for encoding a query string value
108 * @return encoded text
109 * @throws UnsupportedEncodingException if the requested encoding is not supported.
110 */
111 private String encode(String text, String encodingName, boolean isQSValue)
112 throws UnsupportedEncodingException
113 {
114 if(text == null)
115 {
116 return null;
117 }
118 if(text.length() == 0)
119 {
120 return text;
121 }
122
123 int length = text.length();
124 StringBuilder outputBuf = new StringBuilder(length*2);
125
126 ByteArrayOutputStream os = new ByteArrayOutputStream(10);
127 Writer encodingWriter = new OutputStreamWriter(os,encodingName);
128 byte[] bytes = null;
129
130 // convert string
131 for (int i=0; i < length; i++)
132 {
133 // get a character from input String
134 char c = text.charAt(i);
135
136 if(c < 256 && PASS_THROUGH[c])
137 {
138 outputBuf.append(c);
139 }
140 else if(c == ' ')
141 {
142 if(isQSValue)
143 {
144 outputBuf.append('+');
145 }
146 else
147 {
148 outputBuf.append("%20");
149 }
150 }
151 else
152 {
153 try
154 {
155 // encode using encoding and store as hex string
156 encodingWriter.write(c);
157
158 // check if this is a two character being :) from new unicode
159 if(c >= 0xd800 && c <= 0xdbff && i+1 < length) // - high surrogate area
160 {
161 char c2 = text.charAt(i+1);
162 if(c2 >= 0xd800 && c2 <= 0xdbff) // - low surrogate area
163 {
164 encodingWriter.write(c2);
165 // avoid reading character c2 again
166 i++;
167 }
168 }
169
170 // dump encoded bytes
171 encodingWriter.flush();
172 bytes = os.toByteArray();
173 // convert bytes to hex strings
174 for (int j=0; j < bytes.length; j++)
175 {
176 outputBuf.append('%');
177 outputBuf.append(HEX_CHARS.charAt((bytes[j] >> 4) & 0xf));
178 outputBuf.append(HEX_CHARS.charAt(bytes[j] & 0xf));
179 }
180 }
181 catch(IOException e)
182 {
183 // reset the OutputStream in finally section
184 }
185 finally
186 {
187 // clean accumulated bytes
188 os.reset();
189 }
190 }
191 }
192 return outputBuf.toString();
193 }
194 }