View Javadoc

1   /*
2    * Copyright 2000-2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.jetspeed.util.rewriter;
18  
19  // java.io
20  import java.io.IOException;
21  import java.io.CharArrayWriter;
22  import java.io.Reader;
23  
24  // java.net
25  import java.net.MalformedURLException;
26  import java.net.URL;
27  import java.net.URLEncoder;                 
28  
29  // this makes it dependent on Swing...need an abstraction WTP
30  import javax.swing.text.html.HTML;
31  import javax.swing.text.MutableAttributeSet;
32   
33  /***
34   *                  
35   * Proxy Rewriter for rewriting HTML content with links back to a proxy host.
36   * URLs or as web-application full URLs, not relative.
37   * Given a relative URL, such a "/content/images/my.gif" it can be rewritten as either
38   * a proxied URL, for example:
39   *
40   *   "http://proxyserver/proxy?pxpath=/content/images/my.gif"
41   * 
42   * or a full path to the URL on the web server:
43   *
44   *   "http://www.webserver.com/content/images/my.gif" 
45   *
46   * @author <a href="mailto:taylor@apache.org">David Sean Taylor</a>
47   * @version $Id: ProxyRewriter.java,v 1.3 2004/02/23 03:18:59 jford Exp $
48   * 
49   */
50  
51  public class ProxyRewriter implements Rewriter
52  {
53      /*
54       * Construct a basic HTML Rewriter
55       *
56       */
57      public ProxyRewriter()
58      {
59      }
60  
61      /*
62       * Entry point into rewriting HTML content.
63       *
64       * Reads stream from proxied host, runs configured HTML parser against that stream,
65       * rewriting relevant links, and writes the parsed stream back to the client.
66       *
67       * @param input the HTML input stream.
68       * @param proxyRoot the base URL of the proxy server.
69       * @param baseURL the base URL of the target host.
70       * @return the rewritten HTML output stream.
71       *
72       * @exception MalformedURLException a servlet exception.
73       */
74      public String rewrite(Reader input, 
75                            String proxyRoot, 
76                            String baseURL)
77                                 throws MalformedURLException
78      {
79          String rewrittenHTML = "";
80          
81          this.proxyRoot = proxyRoot;
82          this.baseURL = baseURL;
83          HTMLParserAdaptor parser = new SwingParserAdaptor(this);
84          rewrittenHTML = parser.run(input);
85  
86          return rewrittenHTML;
87      }
88  
89      private String proxyRoot;
90      private String baseURL;
91  
92      public final static String DEFAULT_PROXY_PARAM = "js_path";
93  
94      /*    
95       * This callback is called by the HTMLParserAdaptor implementation to write
96       * back all rewritten URLs to point to the proxy server.
97       * Given the targetURL, rewrites the link as a link back to the proxy server.
98       *
99       * @param targetURL the URL to be rewritten back to the proxy server.
100      * @param baseURL the base URL of the target host.
101      * @param proxyURL the base URL of the proxy server.
102      * @return the rewritten URL to the proxy server.
103      *
104      * @exception MalformedURLException a servlet exception.
105      */
106     public String generateNewUrl( String targetURL, HTML.Tag tag, HTML.Attribute attribute)
107                                 //  String  targetURL, 
108                                 // String     baseURL,
109                                 // String     proxyURL,
110                                 // boolean proxied)
111     {
112         try {                
113 
114             URL full = new URL(new URL(proxyRoot), targetURL);
115             String fullPath = full.toString();
116 
117             StringBuffer buffer = new StringBuffer(proxyRoot.toString());
118             buffer.append("?");
119              buffer.append(DEFAULT_PROXY_PARAM);
120             buffer.append("=");
121             buffer.append(URLEncoder.encode(fullPath));
122             String proxiedPath = buffer.toString().replace('&', '@');
123             return proxiedPath;
124 
125         } 
126         catch (Throwable t)
127         {
128             //FIXME: transient print to debug...
129             System.err.println( "HTMLRewriter: BASE=" + proxyRoot);
130             System.err.println( "target=" + targetURL);
131             return URLEncoder.encode(targetURL);    
132         }
133 
134     }
135 
136     
137     /*
138      * Returns true if all rewritten URLs should be sent back to the proxy server.
139      *
140      * @return true if all URLs are rewritten back to proxy server.
141      */
142     public boolean proxyAllTags()
143     {
144         return true; //false;
145     }
146 
147     public String rewrite(Reader input, String baseURL)
148                                throws MalformedURLException
149     {
150         String rewrittenHTML = "";
151         this.baseURL = baseURL;
152 
153         HTMLParserAdaptor parser = new SwingParserAdaptor(this);
154         rewrittenHTML = parser.run(input);
155 
156         return rewrittenHTML;
157     }
158 
159 
160     public static byte[] rewriteScript(String script, 
161                                        String url,
162                                        String proxyHost,
163                                        String base)
164                   throws IOException 
165     {
166         int baseLength = base.length();
167 
168         int totalScriptLength = script.length();
169         CharArrayWriter writer = new CharArrayWriter(totalScriptLength + 100);
170         char chars[] = script.toCharArray();
171         boolean translating = false;        
172 
173         // now rewrite the script stream
174         for (int ix=0; ix < chars.length; ix++)         
175         {
176             if (chars[ix] == '"')
177             {
178                 //int endpos= ix + len + 1;
179                 if (translating) 
180                     translating = false;
181                 else if (false == translating ) //&& 
182                   //       endpos < totalScriptLength)
183                 {
184                         translating = true;
185                         writer.write(chars[ix]);
186 
187                         if (!findImage(chars, ix + 1))
188                             continue;
189 
190                         String trans = translate(proxyHost, base);
191                         writer.write(trans);
192                         if (chars[ix+1] != PATH_SEPARATOR && base.charAt(baseLength - 1) != PATH_SEPARATOR) 
193                             writer.write(PATH_SEPARATOR);
194                         if (chars[ix+1] == PATH_SEPARATOR && base.charAt(baseLength - 1) == PATH_SEPARATOR)
195                             ix++;
196 
197                         continue;
198                 }
199             }
200             if (translating && chars[ix] == '&') 
201                 writer.write('@');
202             else
203                 writer.write(chars[ix]);
204         }
205 
206         return writer.toString().getBytes();
207     }
208 
209 
210     public static String translate(String proxyURL, String targetURL)
211      {
212          StringBuffer buffer = new StringBuffer(proxyURL);
213          buffer.append("?");
214          buffer.append(DEFAULT_PROXY_PARAM);
215          buffer.append("=");
216          buffer.append(targetURL.replace('&', '@'));
217          String proxiedPath = buffer.toString();                
218 
219          return proxiedPath;
220      }
221 
222     /*
223      * Finds an image hyperlink in a quoted string.
224      * The image hyperlink is found by searching through the script text, searching
225      * for references ending in typical image extensions (GIF, PNG, JPG).
226      *
227      * NOTE: this function is just the start of script-parsing.
228      * A much more robust implementation will be necessary.
229      *
230      * @param chars The character array to search.
231      * @param ix The starting index to search from in the character array.
232      * @return If the image string is found, returns true otherwise false.
233      *
234      */
235     protected static boolean findImage(char[] chars, int ix)
236     {
237         for (int iy=ix; iy < chars.length  ; iy++)
238         {
239             if (chars[iy] == '"')
240                 return false;
241             if (chars[iy] == '.')                
242             {
243                 int iw = 0;
244                 for (int iz = iy+1; iz < chars.length && iw < 3; iz++, iw++ )
245                 {
246                     if (chars[iz] == GIF[iw] || chars[iz] == PNG[iw] || chars[iz] == JPG[iw])
247                     {
248                         continue;
249                     }
250                     else
251                         return false;
252                 }
253                 if (iw == 3)
254                     return true;
255 
256                 return false;
257             }
258         }
259         return false;
260     }
261     
262     private static final char[] GIF = {'g', 'i', 'f'};
263     private static final char[] PNG = {'p', 'n', 'g'};
264     private static final char[] JPG = {'j', 'p', 'g'};
265     protected static final char PATH_SEPARATOR = '/';
266 
267     /*
268      * Simple Tag Events
269      */
270     public boolean enterSimpleTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
271     {
272         return true;
273     }
274 
275     public String exitSimpleTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
276     {
277         return null;
278     }
279 
280     /*
281      * Start Tag Events
282      */
283     public boolean enterStartTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
284     {
285         return true;
286     }
287 
288     public String exitStartTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
289     {
290         return null;
291     }
292 
293     /*
294      * Exit Tag Events
295      */
296     public boolean enterEndTagEvent(HTML.Tag tag)
297     {
298         return true;
299     }
300 
301     public String exitEndTagEvent(HTML.Tag tag)
302     {
303         return null;
304     }
305 
306 
307     /*
308      * Convert Tag Events
309      */
310     public void convertTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
311     {
312     }
313 
314 	public boolean enterText(char[] values, int param)
315 	{
316 		return false;
317 	}
318 }
319 
320