1/*2 * Copyright 2000-2001,2004 The Apache Software Foundation.3 * 4 * Licensed under the Apache License, Version 2.0 (the "License");5 * you may not use this file except in compliance with the License.6 * You may obtain a copy of the License at7 * 8 * http://www.apache.org/licenses/LICENSE-2.09 * 10 * Unless required by applicable law or agreed to in writing, software11 * distributed under the License is distributed on an "AS IS" BASIS,12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.13 * See the License for the specific language governing permissions and14 * limitations under the License.15 */1617packageorg.apache.jetspeed.util;
181920/***21Take a URI and encode it so that it can be stored on all filesystems and HTTP22values 2324@author <a href="mailto:burton@apache.org">Kevin A. Burton</a>25@author <a href="mailto:sgala@hisitech.com">Santiago Gala</a>26@version $Id: URIEncoder.java,v 1.11 2004/02/23 03:23:42 jford Exp $27*/28publicclassURIEncoder {
293031/***32 A list of invalid characters that can't exist within filenames. If they 33 appear then the DiskCache will escape them. The current list is in part34 based on Microsoft Knowledge Base article Q177506 (because DOS filesystems35 are more generally limited than UNIX filesystems).3637 SGP: Windows NT refuses to take "?", so I add it to the list.38 Additionally, if we encode "?", the jdk runtime logic decodes it twice 39 for "file:" urls, giving a filename with a space in it. I have fixed 40 it in JetspeedDiskCacheEntry.java, avoiding the creation of a new URL when41 getFile() is not null.42 */43publicstaticfinal String[] INVALID_CHARACTERS = { "//",
44"/",
45":",
46"*",
47"\"",
48"<",
49">",
50"|",
51"+",
52"?" };
53publicstaticfinal String[] CODED_CHARACTERS = { "#" + (int)'//' + ";",
54"#" + (int)'/' + ";",
55"#" + (int)':' + ";",
56"#" + (int)'*' + ";",
57"#" + (int)'"' + ";",
58"#" + (int)'<' + ";",
59"#" + (int)'>' + ";",
60"#" + (int)'|' + ";",
61"#" + (int)'+' + ";",
62"#" + (int)'?' + ";"63 };
6465/***66 Encode the given URI67 */68publicstatic String encode( String uri ) {
6970if ( uri == null ) {
71thrownew IllegalArgumentException( "URI may not be null. " );
72 }
7374/*7576 Very basic encoding... should work for most circumstances.7778 files like http://www.apache.org:80/index.html7980 will be changed to:8182 http_www.apache.org___80.index.html8384 - a "_" -> "__"85 - a "://" -> "_" 86 - a "/" -> "_" 87 - a ":" -> "___"88 */8990 StringBuffer buffer = new StringBuffer( uri );
91 StringUtils.replaceAll( buffer, "_", "__" );
92 StringUtils.replaceAll( buffer, "://", "_" );
93 StringUtils.replaceAll( buffer, "/", "_" );
94 StringUtils.replaceAll( buffer, ":", "___" );
959697//if there are any characters that can't be stored in a filesystem encode98//them now99 encodeQueryData( buffer );
100101102return buffer.toString();
103 }
104105106/***107 Decode the given URI.108 */109publicstatic String decode( String uri ) {
110111if ( uri == null ) {
112thrownew IllegalArgumentException( "URI may not be null. " );
113 }
114115 String newURI = "";
116117int start = uri.indexOf("_");
118119 String protocol = null;
120121//SGP: needed if uri does not contain protocol but contains "_"122if( uri.charAt( start + 1 ) == '_' ) {
123 start = -1;
124 }
125126if ( start > -1 ) {
127 protocol = uri.substring( 0, start );
128 }
129130 newURI = uri.substring( start + 1, uri.length() );
131 StringBuffer buffer = new StringBuffer( newURI );
132133 StringUtils.replaceAll( buffer, "___", ":" );
134135 StringUtils.replaceAll( buffer, "_", "/" );
136 StringUtils.replaceAll( buffer, "_", "/" );
137138//now the original "__" should be slashes so replace them with a single "_"139 StringUtils.replaceAll( buffer, "//", "_" );
140141if ( protocol != null ) {
142 buffer.replace( 0, 0, "://" ); //prepend string
143 buffer.replace( 0, 0, protocol ); //prepend protocol144 }
145146 decodeQueryData( buffer );
147148return buffer.toString();
149 }
150151/***152 <p>153 If this data contains any INVALID_CHARACTERS encode the data into a target154 String.155 </p>156157 <p>158 NOTE: the algorithm between encode and decode is shared, if you modify one159 you should modify the other.160 </p>161 @see decode(String data)162 */163privatestatic StringBuffer encodeQueryData( StringBuffer data ) {
164165for (int i = 0; i < INVALID_CHARACTERS.length; ++i ) {
166167 String source = INVALID_CHARACTERS[i];
168169 String coded = CODED_CHARACTERS[i];
170171 data = StringUtils.replaceAll( data, source, coded );
172173 }
174175return data;
176 }
177178/***179 <p>180 If this data contains any encoded INVALID_CHARACTERS, decode the data back 181 into the source string182 </p>183184 <p>185 NOTE: the algorithm between encode and decode is shared, if you modify one186 you should modify the other.187 </p>188 @see encode(String data)189 */190privatestatic StringBuffer decodeQueryData( StringBuffer data ) {
191192for (int i = 0; i < INVALID_CHARACTERS.length; ++i ) {
193194 String source = INVALID_CHARACTERS[i];
195196 String coded = CODED_CHARACTERS[i];
197198 data = StringUtils.replaceAll( data, coded, source );
199200 }
201202return data;
203 }
204205206 }