View Javadoc

1   /*
2    * Copyright 2000-2001,2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.jetspeed.util;
18  
19  
20  /***
21  Take a URI and encode it so that it can be stored on all filesystems and HTTP
22  values 
23  
24  @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
25  @author <a href="mailto:sgala@hisitech.com">Santiago Gala</a>
26  @version $Id: URIEncoder.java,v 1.11 2004/02/23 03:23:42 jford Exp $
27  */
28  public class URIEncoder {
29  
30  
31      /***
32      A list of invalid characters that can't exist within filenames.  If they 
33      appear then the DiskCache will escape them.  The current list is in part
34      based on Microsoft Knowledge Base article Q177506 (because DOS filesystems
35      are more generally limited than UNIX filesystems).
36  
37      SGP: Windows NT refuses to take "?", so I add it to the list.
38      Additionally, if we encode "?", the jdk runtime logic decodes it twice 
39      for "file:" urls, giving a filename with a space in it. I have fixed 
40      it in JetspeedDiskCacheEntry.java, avoiding the creation of a new URL when
41      getFile() is not null.
42      */
43      public static final String[] INVALID_CHARACTERS = { "//", 
44                                                          "/", 
45                                                          ":", 
46                                                          "*", 
47                                                          "\"", 
48                                                          "<", 
49                                                          ">", 
50                                                          "|", 
51                                                          "+", 
52                                                          "?" };
53      public static final String[] CODED_CHARACTERS = { "#" + (int)'//' + ";", 
54                                                        "#" + (int)'/' + ";",
55                                                        "#" + (int)':' + ";",
56                                                        "#" + (int)'*' + ";",
57                                                        "#" + (int)'"' + ";",
58                                                        "#" + (int)'<' + ";",
59                                                        "#" + (int)'>' + ";",
60                                                        "#" + (int)'|' + ";",
61                                                        "#" + (int)'+' + ";",
62                                                        "#" + (int)'?' + ";"
63      };
64      
65      /***
66      Encode the given URI
67      */
68      public static String encode( String uri ) {
69  
70          if ( uri == null ) {
71              throw new IllegalArgumentException( "URI may not be null. " );
72          }
73          
74          /*
75          
76          Very basic encoding... should work for most circumstances.
77          
78          files like http://www.apache.org:80/index.html
79          
80          will be changed to:
81          
82          http_www.apache.org___80.index.html
83          
84          - a "_"         ->  "__"
85          - a "://"       ->  "_"    
86          - a "/"       ->  "_"    
87          - a ":"       ->  "___"
88          */
89          
90          StringBuffer buffer = new StringBuffer( uri );
91          StringUtils.replaceAll( buffer, "_", "__" );
92          StringUtils.replaceAll( buffer, "://", "_" );
93          StringUtils.replaceAll( buffer, "/", "_" );
94          StringUtils.replaceAll( buffer, ":", "___" );
95  
96          
97          //if there are any characters that can't be stored in a filesystem encode
98          //them now
99          encodeQueryData( buffer );
100         
101         
102         return buffer.toString();
103     }
104 
105 
106     /***
107     Decode the given URI.
108     */
109     public static String decode( String uri ) {
110 
111         if ( uri == null ) {
112             throw new IllegalArgumentException( "URI may not be null. " );
113         }
114         
115         String newURI = "";
116 
117         int start = uri.indexOf("_");
118 
119         String protocol = null;
120         
121         //SGP: needed if uri does not contain protocol but contains "_"
122         if( uri.charAt( start + 1 ) == '_' ) {
123             start = -1;
124         }
125 
126         if ( start > -1 ) {
127             protocol = uri.substring( 0, start );
128         }
129         
130         newURI = uri.substring( start + 1, uri.length() );
131         StringBuffer buffer = new StringBuffer( newURI );
132 
133         StringUtils.replaceAll( buffer, "___", ":" );
134 
135         StringUtils.replaceAll( buffer, "_", "/" );
136         StringUtils.replaceAll( buffer, "_", "/" );
137         
138         //now the original "__" should be slashes so replace them with a single "_"
139         StringUtils.replaceAll( buffer, "//", "_" );
140       
141         if ( protocol != null ) {
142             buffer.replace( 0, 0, "://" ); //prepend string
143             buffer.replace( 0, 0, protocol ); //prepend protocol
144         }
145         
146         decodeQueryData( buffer );
147         
148         return buffer.toString();
149     }
150     
151     /***
152     <p>
153     If this data contains any INVALID_CHARACTERS encode the data into a target
154     String.
155     </p>
156     
157     <p>
158     NOTE: the algorithm between encode and decode is shared, if you modify one
159     you should modify the other.
160     </p>
161     @see decode(String data)
162     */
163     private static StringBuffer encodeQueryData( StringBuffer data ) {
164         
165         for (int i = 0; i < INVALID_CHARACTERS.length; ++i ) {
166             
167             String source = INVALID_CHARACTERS[i];
168             
169             String coded = CODED_CHARACTERS[i]; 
170             
171             data = StringUtils.replaceAll( data, source, coded );            
172             
173         }
174         
175         return data;
176     }
177     
178     /***
179     <p>
180     If this data contains any encoded INVALID_CHARACTERS, decode the data back 
181     into the source string
182     </p>
183     
184     <p>
185     NOTE: the algorithm between encode and decode is shared, if you modify one
186     you should modify the other.
187     </p>
188     @see encode(String data)
189     */
190     private static StringBuffer decodeQueryData( StringBuffer data ) {
191         
192         for (int i = 0; i < INVALID_CHARACTERS.length; ++i ) {
193             
194             String source = INVALID_CHARACTERS[i];
195             
196             String coded = CODED_CHARACTERS[i]; 
197             
198             data = StringUtils.replaceAll( data, coded, source );            
199             
200         }
201         
202         return data;
203     }
204 
205     
206 }