View Javadoc

1   /*
2    * Copyright 2000-2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.apache.jetspeed.util.rewriter;
17  
18  // javax.swing.text
19  import javax.swing.text.*;
20  import javax.swing.text.html.*;
21  import javax.swing.text.html.HTMLEditorKit;
22  
23  // java.io
24  import java.io.*;
25  
26  // java.util
27  import java.util.*;
28  
29  // java.net
30  import java.net.*;
31  import org.apache.turbine.util.Log;//AAAtogli!
32  
33  
34  /*
35   * HTML Parser Adaptor for the Swing 'HotJava' parser.
36   *
37   * @author <a href="mailto:taylor@apache.org">David Sean Taylor</a>
38   * @version $Id: SwingParserAdaptor.java,v 1.6 2004/02/23 03:18:59 jford Exp $
39   */
40  
41  public class SwingParserAdaptor implements HTMLParserAdaptor
42  {
43  
44      private SwingParserAdaptor.Callback cb = new SwingParserAdaptor.Callback();
45      private String lineSeparator;
46      private boolean skippingImplied = false;
47      private Rewriter rewriter;
48      /*
49       * Construct a swing (hot java) parser adaptor
50       * Receives a Rewriter parameter, which is used as a callback when rewriting URLs.
51       * The rewriter object executes the implementation specific URL rewriting.
52       *
53       * @param rewriter The rewriter object that is called back during URL rewriting
54       */
55      public SwingParserAdaptor(Rewriter rewriter)
56      {
57          this.rewriter = rewriter;
58          lineSeparator = System.getProperty("line.separator", "\r\n");         
59      }
60  
61      /*
62       * Parses and an HTML document, rewriting all URLs as determined by the Rewriter callback
63       *
64       *
65       * @param reader The input stream reader 
66       *
67       * @throws MalformedURLException 
68       *
69       * @return An HTML-String with rewritten URLs.
70       */    
71      public String run(Reader reader)
72      throws MalformedURLException
73      {
74          HTMLEditorKit.Parser parser = new SwingParserAdaptor.ParserGetter().getParser();        
75  
76          String res ="";
77          try
78          {
79              parser.parse(reader, cb, true);
80              res = cb.getResult(); 
81          } catch (Exception e)
82          {
83              e.printStackTrace();
84        //Log.info("Exception occurred:" + e.toString());AAAtogli!!!
85        //Log.info("Exception occurred:" + e.printStackTrace());
86              throw new MalformedURLException(e.toString());
87          }
88          return res;
89      }
90  
91  
92      /*
93       * This Class is needed, because getParser is protected and therefore 
94       *  only accessibly by a subclass
95       */
96      class ParserGetter extends HTMLEditorKit
97      {
98  
99          public HTMLEditorKit.Parser getParser(){
100             return super.getParser();
101         }
102     } 
103 
104 
105     /*
106      *  Swing Parser Callback from the HTMLEditorKit.
107      * This class handles all SAX-like events during parsing.
108      *
109      */
110     class Callback extends HTMLEditorKit.ParserCallback
111     {
112 
113 
114         // either handling of <FORM> is buggy, or I made some weird mistake ... 
115         // ... JDK 1.3 sends double "</form>"-tags on closing <form>
116         private boolean inForm = false; 
117         private boolean inScript = false; 
118         private boolean emit = true;
119         private boolean simpleTag = false;
120 
121         private StringWriter result = new StringWriter();
122 
123         private Callback () 
124         {
125         }
126 
127         //
128         // -------------- Hot Java event callbacks... --------------------
129         //
130 
131         /*
132          *  Hot Java event callback for text (all data in between tags)
133          * 
134          * @param values The array of characters containing the text.
135          */
136         public void handleText(char[] values,int param) 
137         {
138              if (false == emit)                               
139                  return;                                      
140              if (values[0] == '>')                            
141                  return;     
142              if (false == rewriter.enterText(values, param))
143                 return;                    
144 
145             addToResult(values);
146         }
147 
148         /*
149          * Hot Java event callback for handling a simple tag (without begin/end)
150          *
151          * @param tag The HTML tag being handled.
152          * @param attrs The mutable HTML attribute set for the current HTML element.         
153          * @param position the position of the tag.         
154          *
155          */
156         public void handleSimpleTag(HTML.Tag tag,MutableAttributeSet attrs,int param) 
157         {
158             simpleTag = true;
159             if (false == rewriter.enterSimpleTagEvent(tag, attrs))
160                 return;
161 
162             if (false == isValidFragmentTag(tag))
163                 return;
164 
165             appendTagToResult(tag,attrs);        
166             if (tag.toString().equalsIgnoreCase("param") ||
167                 tag.toString().equalsIgnoreCase("object") ||
168                 tag.toString().equalsIgnoreCase("embed"))
169             {
170                 result.write(lineSeparator);
171             }
172             simpleTag = false;
173             String appended = rewriter.exitSimpleTagEvent(tag, attrs);
174             if (null != appended)
175                 result.write(appended);
176         }
177 
178         /*
179          * Hot Java event callback for handling a start tag.
180          *
181          * @param tag The HTML tag being handled.
182          * @param attrs The mutable HTML attribute set for the current HTML element.         
183          * @param position the position of the tag.         
184          *
185          */
186         public void handleStartTag(HTML.Tag tag,  MutableAttributeSet attrs, int position) 
187         {
188             if (false == rewriter.enterStartTagEvent(tag, attrs))
189                 return;
190 
191             if (tag == HTML.Tag.HEAD)
192             {
193                 emit = false;
194                 return;
195             }
196 
197            if (false == isValidFragmentTag(tag))
198                 return;
199 
200             appendTagToResult(tag,attrs);
201             formatLine(tag);
202             String appended = rewriter.exitStartTagEvent(tag, attrs);
203             if (null != appended)
204                 result.write(appended);
205         }
206 
207 
208         boolean isValidFragmentTag(HTML.Tag tag)
209         {                    
210             /*
211             if (false == emit)
212                 return false;
213 
214             if (tag == HTML.Tag.HTML)  // always strip out HTML tag for fragments
215                 return false;
216 
217             if (tag == HTML.Tag.BODY)
218                 return false;
219 
220             if (tag == HTML.Tag.FRAMESET)  // always strip out FRAMESET tag for fragments
221                 return false;
222 
223             if (tag == HTML.Tag.FRAME)  
224                 return false;
225 
226             if (tag == HTML.Tag.NOFRAMES)  
227                 return false;
228               */
229             return true;
230         }
231 
232 
233         /*
234          * Hot Java event callback for handling an end tag.
235          *
236          * @param tag The HTML tag being handled.
237          * @param position the position of the tag.
238          *
239          */
240         public void handleEndTag(HTML.Tag tag, int position) 
241         {
242             if (false == rewriter.enterEndTagEvent(tag))
243                 return;
244 
245             if (tag == HTML.Tag.HEAD)
246             {
247                 emit = true;
248                 return;
249             }
250 
251            if (false == isValidFragmentTag(tag))
252                 return;
253 
254            addToResult("</").addToResult(tag).addToResult(">");
255 
256             formatLine(tag);
257             String appended = rewriter.exitEndTagEvent(tag);
258             if (null != appended)
259                 result.write(appended);
260 
261         }
262 
263 
264         /*
265          * Hot Java event callback for handling errors.
266          *
267          * @param str The error message from Swing.
268          * @param param A parameter passed to handler.
269          *
270          */
271         public void handleError(java.lang.String str,int param) 
272         {
273             // ignored
274         }
275 
276         /*
277          * Hot Java event callback for HTML comments.
278          *
279          * @param values The character array of text comments.
280          * @param param A parameter passed to handler.
281          *
282          */
283         public void handleComment(char[] values,int param) 
284         {
285             // STRIP COMMENTS: addToResult(values);
286             // this is questionable, we may need to turn this on for scripts inside comments
287         }
288 
289         /*
290          * Hot Java event callback for end of line strings.
291          *
292          * @param str The end-of-line string.
293          *
294          */
295         public void handleEndOfLineString(java.lang.String str) 
296         {
297             addToResult(str);
298         }
299 
300 
301         /*
302          * Prints new lines to make the output a little easier to read when debugging.
303          *
304          * @param tag The HTML tag being handled.         
305          *
306          */
307         private void formatLine(HTML.Tag tag)
308         {
309             if (tag.isBlock() || 
310                 tag.breaksFlow() || 
311                 tag == HTML.Tag.FRAME ||
312                 tag == HTML.Tag.FRAMESET ||
313                 tag == HTML.Tag.SCRIPT)
314             {
315                 result.write(lineSeparator);
316             }
317         }
318 
319 
320         /*
321          * Used to write tag and attribute objects to the output stream.
322          * Returns a reference to itself so that these calls can be chained.
323          *
324          * @param txt Any text to be written out to stream with toString method.
325          *            The object being written should implement its toString method.
326          * @return A handle to the this, the callback, for chaining results.
327          *
328          */
329         private Callback addToResult(Object txt)
330         {
331             // to allow for implementation using Stringbuffer or StringWriter
332             // I don't know yet, which one is better in this case
333             //if (ignoreLevel > 0 ) return this;
334 
335             try
336             {
337                 result.write(txt.toString());
338             } catch (Exception e)
339             {
340                 System.err.println("Error parsing:" + e);
341             }
342             return this;
343         }
344 
345 
346         /*
347          * Used to write all character content to the output stream.
348          * Returns a reference to itself so that these calls can be chained.
349          *
350          * @param txt Any character text to be written out directly to stream.
351          * @return A handle to the this, the callback, for chaining results.
352          *
353          */
354         private Callback addToResult(char[] txt)
355         {
356             //if (ignoreLevel > 0) return this;
357 
358             try
359             {
360 
361                 result.write(txt);
362 
363             } catch (Exception e)
364             { /* ignore */
365             }
366             return this;
367         }
368 
369         /* 
370          * Accessor to the Callback's content-String
371          *
372          * @return Cleaned and rewritten HTML-Content
373          */        
374         public String getResult() 
375         {
376             try
377             {
378                 result.flush();
379             } catch (Exception e)
380             { /* ignore */
381             }
382 
383             // WARNING: doesn't work, if you remove " " + ... but don't know why
384             String res = " " + result.toString(); 
385 
386             return res;
387         }
388 
389         /*
390          * Flushes the output stream. NOT IMPLEMENTED
391          *
392          */
393         public void flush() throws javax.swing.text.BadLocationException 
394         {
395             // nothing to do here ...
396         }
397 
398         /*
399          * Writes output to the final stream for all attributes of a given tag.
400          *
401          * @param tag The HTML tag being output.
402          * @param attrs The mutable HTML attribute set for the current HTML tag.
403          *
404          */
405         private void appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs) 
406         {
407             convertURLS(tag,attrs);
408             Enumeration e = attrs.getAttributeNames();
409             addToResult("<").addToResult(tag);
410             while (e.hasMoreElements())
411             {
412                 Object attr = e.nextElement();
413                 String value = attrs.getAttribute(attr).toString();
414                 addToResult(" ").addToResult(attr).addToResult("=\"").
415                 addToResult(value).addToResult("\"");
416             }        
417             if (simpleTag)
418                 addToResult("/>");
419             else             
420                 addToResult(">");
421         }
422 
423 
424         /*
425          * Determines which HTML Tag/Element is being inspected, and calls the 
426          * appropriate converter for that context.  This method contains all the
427          * logic for determining how tags are rewritten. 
428          *
429          * TODO: it would be better to drive this logic off a state table that is not
430          * tied to the Hot Java parser.
431          *
432          * @param tag TAG from the Callback-Interface.
433          * @param attrs The mutable HTML attribute set for the current HTML element.
434          */
435 
436         private void convertURLS( HTML.Tag tag, MutableAttributeSet attrs ) 
437         {
438             rewriter.convertTagEvent(tag, attrs);
439             if ((tag == HTML.Tag.A) && 
440                 (attrs.getAttribute(HTML.Attribute.HREF) != null))
441             {
442 
443                 // ---- CHECKING <A HREF
444                 addProxiedConvertedAttribute( tag, HTML.Attribute.HREF, attrs);
445 
446             } 
447             else if (((tag == HTML.Tag.IMG || 
448                          tag == HTML.Tag.INPUT
449                         ) && 
450                         (attrs.getAttribute(HTML.Attribute.SRC) != null)
451                        ))
452             {
453 
454                 // ---- CHECKING <IMG SRC & <INPUT SRC
455                 addConvertedAttribute( tag,
456                                        HTML.Attribute.SRC, 
457                                        attrs, 
458                                        rewriter.proxyAllTags());    
459 
460             } else if (((tag == HTML.Tag.OPTION) ) && 
461                        (attrs.getAttribute(HTML.Attribute.VALUE) != null))
462             {
463                 // ---- CHECKING <OPTION 
464                 addProxiedConvertedAttribute( tag, HTML.Attribute.VALUE, attrs );
465 
466             } else if (((tag == HTML.Tag.LINK) ) && 
467                        (attrs.getAttribute(HTML.Attribute.HREF) != null))
468             {
469 
470                 // ---- CHECKING <LINK
471                 addConvertedAttribute( tag,
472                                        HTML.Attribute.HREF,
473                                        attrs,
474                                        rewriter.proxyAllTags());
475 
476             } else if ( tag == HTML.Tag.APPLET )
477             {
478 
479                 // ---- CHECKING <APPLET CODEBASE=
480                 addConvertedAttribute( tag,
481                                        HTML.Attribute.CODEBASE,
482                                        attrs,
483                                        rewriter.proxyAllTags());
484 
485             } else if ( tag == HTML.Tag.FRAME )
486             {
487 
488                 // ---- CHECKING <FRAME SRC=
489                 addProxiedConvertedAttribute( tag, HTML.Attribute.SRC, attrs);
490 
491             } else if ( tag == HTML.Tag.SCRIPT )
492             {
493                 // ---- CHECKING <SCRIPT SRC=
494                 if (attrs.getAttribute(HTML.Attribute.SRC) != null)
495                 {
496 
497                     // script is external
498                     String s = attrs.getAttribute(HTML.Attribute.SRC).toString();
499                     if (s.indexOf("%3E") == -1)
500                     {
501                         addConvertedAttribute( tag,
502                                                HTML.Attribute.SRC, 
503                                                attrs,
504                                                rewriter.proxyAllTags());
505                     }
506 
507                 } else
508                 {
509                     // script is inline
510                     //parserOff = true;
511                 }
512 
513             } else if (tag == HTML.Tag.FORM)
514             {
515 
516                 // ---- CHECKING <FORM ACTION=
517                 inForm = true; // buggy <form> handling in jdk 1.3 
518 
519                 if (attrs.getAttribute(HTML.Attribute.ACTION) == null)
520                 {
521                     // always post
522                     attrs.addAttribute(HTML.Attribute.METHOD, "POST");                      
523                     //self referencing <FORM>
524                     
525                     // attrs.addAttribute(HTML.Attribute.ACTION,
526                     //                   baseURL);
527 
528                 } else
529                 {
530                     // always post
531                     attrs.addAttribute(HTML.Attribute.METHOD, "POST");                      
532                     addProxiedConvertedAttribute( tag, HTML.Attribute.ACTION, attrs);
533 
534                 }
535 
536             } else if (((tag == HTML.Tag.AREA) ) && 
537                        (attrs.getAttribute(HTML.Attribute.HREF) != null))
538             {
539 
540                 // ---- CHECKING <AREA
541                 addProxiedConvertedAttribute( tag, HTML.Attribute.HREF,
542                                               attrs );
543 
544             } else if (((tag == HTML.Tag.BODY) ) && 
545                        (attrs.getAttribute(HTML.Attribute.BACKGROUND) != null))
546             {
547 
548                 // ---- CHECKING <BODY
549                 addConvertedAttribute( tag,
550                                        HTML.Attribute.BACKGROUND,
551                                        attrs,
552                                        rewriter.proxyAllTags());
553 
554             } else if (tag == HTML.Tag.TD)
555             {
556                 // ---- CHECKING <TD BACKGROUND=
557                 if (! (attrs.getAttribute(HTML.Attribute.BACKGROUND) == null))
558                 {
559                     addConvertedAttribute( tag,
560                                            HTML.Attribute.BACKGROUND,
561                                            attrs,
562                                            rewriter.proxyAllTags());
563                 }
564             }
565 
566             /*
567               if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
568                 ignoreLevel ++;
569               */
570         }
571 
572         /*
573          * Converts the given attribute's URL compatible element to a proxied URL.
574          * Uses the proxy parameter to determine if the URL should be written back as a
575          * proxied URL, or as a fullpath to the original host.
576          *
577          * @param attr The HTML attribute to be proxied.
578          * @param attrs The mutable HTML attribute set for the current HTML element.
579          * @param proxy If set true, the URL is written back as a proxied URL, otherwise
580          * it is written back as a fullpath back to the original host.
581          *
582          */
583         private void addConvertedAttribute( HTML.Tag tag,
584                                             HTML.Attribute attr,
585                                             MutableAttributeSet attrs,
586                                             boolean proxy ) 
587         {
588             if (proxy)
589             {
590                 addProxiedConvertedAttribute(tag, attr,attrs);
591             } else
592             {
593                 if ( attrs.getAttribute( attr ) != null )
594                 {
595                     attrs.addAttribute( attr,
596                                         generateNewUrl( tag, attrs, attr, false ) );
597                 }
598             }
599         }
600 
601 
602         /***
603          *
604          * Converts the given attribute's URL compatible element to a proxied URL.
605          * This method will always add the proxy host prefix to the rewritten URL.
606          *
607          * @param attr The HTML attribute to be proxied.
608          * @param attrs The mutable HTML attribute set for the current HTML element.
609          *
610          */
611         private void addProxiedConvertedAttribute( HTML.Tag tag,
612                                                    HTML.Attribute attr,
613                                                    MutableAttributeSet attrs ) {
614 
615 
616 
617             if ( attrs.getAttribute( attr ) != null )
618             {
619                 String attrSource =  attrs.getAttribute( attr ).toString();
620 
621                 // special case: mailto should not be sent to the proxy server
622                 if (attrSource.startsWith("mailto:"))
623                 {
624                     attrs.addAttribute( attr,
625                                         generateNewUrl( tag, attrs, attr, true ) );
626                 } else if (attrSource.startsWith("javascript:"))
627                 {
628                     attrs.addAttribute( attr,
629                                         attrSource);
630                 } else
631                 {
632                     attrs.addAttribute( attr,
633                                         generateNewUrl( tag, attrs, attr, true ) );
634                 }
635             }
636         }
637 
638         /*
639          * Calls the rewriter's URL generator callback, which will translate the old url
640          * into a new fullpath URL, either relative to the proxy server, or a fullpath
641          * to the original web server, depending on the 'proxied' parameter.
642          * 
643          * @param oldURL The original URL, before it is tranlated.
644          * @param proxied Boolean indicator denotes if the URL should be written back
645          *        as a proxied URL (true), or as a fully addressable address to the 
646          *       original web server.
647          * @return The translated new URL.
648          *         
649          */
650         private String generateNewUrl(HTML.Tag tag,
651                                       MutableAttributeSet attrs,
652                                       HTML.Attribute attr,
653                                       boolean proxied)
654         {
655             String oldURL =  attrs.getAttribute( attr ).toString();
656             // System.out.println("Generating new url: " + oldURL);
657             return rewriter.generateNewUrl(oldURL, tag, attr);
658         }
659 
660 
661     }
662 
663 }
664 
665