1/*2 * Copyright 2000-2004 The Apache Software Foundation.3 * 4 * Licensed under the Apache License, Version 2.0 (the "License");5 * you may not use this file except in compliance with the License.6 * You may obtain a copy of the License at7 * 8 * http://www.apache.org/licenses/LICENSE-2.09 * 10 * Unless required by applicable law or agreed to in writing, software11 * distributed under the License is distributed on an "AS IS" BASIS,12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.13 * See the License for the specific language governing permissions and14 * limitations under the License.15 */16packageorg.apache.jetspeed.util.rewriter;
1718// javax.swing.text19import javax.swing.text.*;
20import javax.swing.text.html.*;
21import javax.swing.text.html.HTMLEditorKit;
2223// java.io24import java.io.*;
2526// java.util27import java.util.*;
2829// java.net30import java.net.*;
31import org.apache.turbine.util.Log;//AAAtogli!323334/*35 * HTML Parser Adaptor for the Swing 'HotJava' parser.36 *37 * @author <a href="mailto:taylor@apache.org">David Sean Taylor</a>38 * @version $Id: SwingParserAdaptor.java,v 1.6 2004/02/23 03:18:59 jford Exp $39 */4041publicclassSwingParserAdaptor implements HTMLParserAdaptor42 {
4344private SwingParserAdaptor.Callback cb = new SwingParserAdaptor.Callback();
45private String lineSeparator;
46privateboolean skippingImplied = false;
47privateRewriter rewriter;
48/*49 * Construct a swing (hot java) parser adaptor50 * Receives a Rewriter parameter, which is used as a callback when rewriting URLs.51 * The rewriter object executes the implementation specific URL rewriting.52 *53 * @param rewriter The rewriter object that is called back during URL rewriting54 */55publicSwingParserAdaptor(Rewriter rewriter)
56 {
57this.rewriter = rewriter;
58 lineSeparator = System.getProperty("line.separator", "\r\n");
59 }
6061/*62 * Parses and an HTML document, rewriting all URLs as determined by the Rewriter callback63 *64 *65 * @param reader The input stream reader 66 *67 * @throws MalformedURLException 68 *69 * @return An HTML-String with rewritten URLs.70 */71public String run(Reader reader)
72 throws MalformedURLException
73 {
74 HTMLEditorKit.Parser parser = new SwingParserAdaptor.ParserGetter().getParser();
7576 String res ="";
77try78 {
79 parser.parse(reader, cb, true);
80 res = cb.getResult();
81 } catch (Exception e)
82 {
83 e.printStackTrace();
84//Log.info("Exception occurred:" + e.toString());AAAtogli!!!85//Log.info("Exception occurred:" + e.printStackTrace());86thrownew MalformedURLException(e.toString());
87 }
88return res;
89 }
909192/*93 * This Class is needed, because getParser is protected and therefore 94 * only accessibly by a subclass95 */96class ParserGetter extends HTMLEditorKit
97 {
9899public HTMLEditorKit.Parser getParser(){
100returnsuper.getParser();
101 }
102 }
103104105/*106 * Swing Parser Callback from the HTMLEditorKit.107 * This class handles all SAX-like events during parsing.108 *109 */110class Callback extends HTMLEditorKit.ParserCallback
111 {
112113114// either handling of <FORM> is buggy, or I made some weird mistake ... 115// ... JDK 1.3 sends double "</form>"-tags on closing <form>116privateboolean inForm = false;
117privateboolean inScript = false;
118privateboolean emit = true;
119privateboolean simpleTag = false;
120121private StringWriter result = new StringWriter();
122123private Callback ()
124 {
125 }
126127//128// -------------- Hot Java event callbacks... --------------------129//130131/*132 * Hot Java event callback for text (all data in between tags)133 * 134 * @param values The array of characters containing the text.135 */136publicvoid handleText(char[] values,int param)
137 {
138if (false == emit)
139return;
140if (values[0] == '>')
141return;
142if (false == rewriter.enterText(values, param))
143return;
144145 addToResult(values);
146 }
147148/*149 * Hot Java event callback for handling a simple tag (without begin/end)150 *151 * @param tag The HTML tag being handled.152 * @param attrs The mutable HTML attribute set for the current HTML element. 153 * @param position the position of the tag. 154 *155 */156publicvoid handleSimpleTag(HTML.Tag tag,MutableAttributeSet attrs,int param)
157 {
158 simpleTag = true;
159if (false == rewriter.enterSimpleTagEvent(tag, attrs))
160return;
161162if (false == isValidFragmentTag(tag))
163return;
164165 appendTagToResult(tag,attrs);
166if (tag.toString().equalsIgnoreCase("param") ||
167 tag.toString().equalsIgnoreCase("object") ||
168 tag.toString().equalsIgnoreCase("embed"))
169 {
170 result.write(lineSeparator);
171 }
172 simpleTag = false;
173 String appended = rewriter.exitSimpleTagEvent(tag, attrs);
174if (null != appended)
175 result.write(appended);
176 }
177178/*179 * Hot Java event callback for handling a start tag.180 *181 * @param tag The HTML tag being handled.182 * @param attrs The mutable HTML attribute set for the current HTML element. 183 * @param position the position of the tag. 184 *185 */186publicvoid handleStartTag(HTML.Tag tag, MutableAttributeSet attrs, int position)
187 {
188if (false == rewriter.enterStartTagEvent(tag, attrs))
189return;
190191if (tag == HTML.Tag.HEAD)
192 {
193 emit = false;
194return;
195 }
196197if (false == isValidFragmentTag(tag))
198return;
199200 appendTagToResult(tag,attrs);
201 formatLine(tag);
202 String appended = rewriter.exitStartTagEvent(tag, attrs);
203if (null != appended)
204 result.write(appended);
205 }
206207208boolean isValidFragmentTag(HTML.Tag tag)
209 {
210/*211 if (false == emit)212 return false;213214 if (tag == HTML.Tag.HTML) // always strip out HTML tag for fragments215 return false;216217 if (tag == HTML.Tag.BODY)218 return false;219220 if (tag == HTML.Tag.FRAMESET) // always strip out FRAMESET tag for fragments221 return false;222223 if (tag == HTML.Tag.FRAME) 224 return false;225226 if (tag == HTML.Tag.NOFRAMES) 227 return false;228 */229returntrue;
230 }
231232233/*234 * Hot Java event callback for handling an end tag.235 *236 * @param tag The HTML tag being handled.237 * @param position the position of the tag.238 *239 */240publicvoid handleEndTag(HTML.Tag tag, int position)
241 {
242if (false == rewriter.enterEndTagEvent(tag))
243return;
244245if (tag == HTML.Tag.HEAD)
246 {
247 emit = true;
248return;
249 }
250251if (false == isValidFragmentTag(tag))
252return;
253254 addToResult("</").addToResult(tag).addToResult(">");
255256 formatLine(tag);
257 String appended = rewriter.exitEndTagEvent(tag);
258if (null != appended)
259 result.write(appended);
260261 }
262263264/*265 * Hot Java event callback for handling errors.266 *267 * @param str The error message from Swing.268 * @param param A parameter passed to handler.269 *270 */271publicvoid handleError(java.lang.String str,int param)
272 {
273// ignored274 }
275276/*277 * Hot Java event callback for HTML comments.278 *279 * @param values The character array of text comments.280 * @param param A parameter passed to handler.281 *282 */283publicvoid handleComment(char[] values,int param)
284 {
285// STRIP COMMENTS: addToResult(values);286// this is questionable, we may need to turn this on for scripts inside comments287 }
288289/*290 * Hot Java event callback for end of line strings.291 *292 * @param str The end-of-line string.293 *294 */295publicvoid handleEndOfLineString(java.lang.String str)
296 {
297 addToResult(str);
298 }
299300301/*302 * Prints new lines to make the output a little easier to read when debugging.303 *304 * @param tag The HTML tag being handled. 305 *306 */307privatevoid formatLine(HTML.Tag tag)
308 {
309if (tag.isBlock() ||
310 tag.breaksFlow() ||
311 tag == HTML.Tag.FRAME ||
312 tag == HTML.Tag.FRAMESET ||
313 tag == HTML.Tag.SCRIPT)
314 {
315 result.write(lineSeparator);
316 }
317 }
318319320/*321 * Used to write tag and attribute objects to the output stream.322 * Returns a reference to itself so that these calls can be chained.323 *324 * @param txt Any text to be written out to stream with toString method.325 * The object being written should implement its toString method.326 * @return A handle to the this, the callback, for chaining results.327 *328 */329private Callback addToResult(Object txt)
330 {
331// to allow for implementation using Stringbuffer or StringWriter332// I don't know yet, which one is better in this case333//if (ignoreLevel > 0 ) return this;334335try336 {
337 result.write(txt.toString());
338 } catch (Exception e)
339 {
340 System.err.println("Error parsing:" + e);
341 }
342returnthis;
343 }
344345346/*347 * Used to write all character content to the output stream.348 * Returns a reference to itself so that these calls can be chained.349 *350 * @param txt Any character text to be written out directly to stream.351 * @return A handle to the this, the callback, for chaining results.352 *353 */354private Callback addToResult(char[] txt)
355 {
356//if (ignoreLevel > 0) return this;357358try359 {
360361 result.write(txt);
362363 } catch (Exception e)
364 { /* ignore */365 }
366returnthis;
367 }
368369/*370 * Accessor to the Callback's content-String371 *372 * @return Cleaned and rewritten HTML-Content373 */374public String getResult()
375 {
376try377 {
378 result.flush();
379 } catch (Exception e)
380 { /* ignore */381 }
382383// WARNING: doesn't work, if you remove " " + ... but don't know why384 String res = " " + result.toString();
385386return res;
387 }
388389/*390 * Flushes the output stream. NOT IMPLEMENTED391 *392 */393publicvoid flush() throws javax.swing.text.BadLocationException
394 {
395// nothing to do here ...396 }
397398/*399 * Writes output to the final stream for all attributes of a given tag.400 *401 * @param tag The HTML tag being output.402 * @param attrs The mutable HTML attribute set for the current HTML tag.403 *404 */405privatevoid appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs)
406 {
407 convertURLS(tag,attrs);
408 Enumeration e = attrs.getAttributeNames();
409 addToResult("<").addToResult(tag);
410while (e.hasMoreElements())
411 {
412 Object attr = e.nextElement();
413 String value = attrs.getAttribute(attr).toString();
414 addToResult(" ").addToResult(attr).addToResult("=\"").
415 addToResult(value).addToResult("\"");
416 }
417if (simpleTag)
418 addToResult("/>");
419else420 addToResult(">");
421 }
422423424/*425 * Determines which HTML Tag/Element is being inspected, and calls the 426 * appropriate converter for that context. This method contains all the427 * logic for determining how tags are rewritten. 428 *429 * TODO: it would be better to drive this logic off a state table that is not430 * tied to the Hot Java parser.431 *432 * @param tag TAG from the Callback-Interface.433 * @param attrs The mutable HTML attribute set for the current HTML element.434 */435436privatevoid convertURLS( HTML.Tag tag, MutableAttributeSet attrs )
437 {
438 rewriter.convertTagEvent(tag, attrs);
439if ((tag == HTML.Tag.A) &&
440 (attrs.getAttribute(HTML.Attribute.HREF) != null))
441 {
442443// ---- CHECKING <A HREF444 addProxiedConvertedAttribute( tag, HTML.Attribute.HREF, attrs);
445446 }
447elseif (((tag == HTML.Tag.IMG ||
448 tag == HTML.Tag.INPUT
449 ) &&
450 (attrs.getAttribute(HTML.Attribute.SRC) != null)
451 ))
452 {
453454// ---- CHECKING <IMG SRC & <INPUT SRC455 addConvertedAttribute( tag,
456 HTML.Attribute.SRC,
457 attrs,
458 rewriter.proxyAllTags());
459460 } elseif (((tag == HTML.Tag.OPTION) ) &&
461 (attrs.getAttribute(HTML.Attribute.VALUE) != null))
462 {
463// ---- CHECKING <OPTION 464 addProxiedConvertedAttribute( tag, HTML.Attribute.VALUE, attrs );
465466 } elseif (((tag == HTML.Tag.LINK) ) &&
467 (attrs.getAttribute(HTML.Attribute.HREF) != null))
468 {
469470// ---- CHECKING <LINK471 addConvertedAttribute( tag,
472 HTML.Attribute.HREF,
473 attrs,
474 rewriter.proxyAllTags());
475476 } elseif ( tag == HTML.Tag.APPLET )
477 {
478479// ---- CHECKING <APPLET CODEBASE=480 addConvertedAttribute( tag,
481 HTML.Attribute.CODEBASE,
482 attrs,
483 rewriter.proxyAllTags());
484485 } elseif ( tag == HTML.Tag.FRAME )
486 {
487488// ---- CHECKING <FRAME SRC=489 addProxiedConvertedAttribute( tag, HTML.Attribute.SRC, attrs);
490491 } elseif ( tag == HTML.Tag.SCRIPT )
492 {
493// ---- CHECKING <SCRIPT SRC=494if (attrs.getAttribute(HTML.Attribute.SRC) != null)
495 {
496497// script is external498 String s = attrs.getAttribute(HTML.Attribute.SRC).toString();
499if (s.indexOf("%3E") == -1)
500 {
501 addConvertedAttribute( tag,
502 HTML.Attribute.SRC,
503 attrs,
504 rewriter.proxyAllTags());
505 }
506507 } else508 {
509// script is inline510//parserOff = true;511 }
512513 } elseif (tag == HTML.Tag.FORM)
514 {
515516// ---- CHECKING <FORM ACTION=517 inForm = true; // buggy <form> handling in jdk 1.3 518519if (attrs.getAttribute(HTML.Attribute.ACTION) == null)
520 {
521// always post522 attrs.addAttribute(HTML.Attribute.METHOD, "POST");
523//self referencing <FORM>524525// attrs.addAttribute(HTML.Attribute.ACTION,526// baseURL);527528 } else529 {
530// always post531 attrs.addAttribute(HTML.Attribute.METHOD, "POST");
532 addProxiedConvertedAttribute( tag, HTML.Attribute.ACTION, attrs);
533534 }
535536 } elseif (((tag == HTML.Tag.AREA) ) &&
537 (attrs.getAttribute(HTML.Attribute.HREF) != null))
538 {
539540// ---- CHECKING <AREA541 addProxiedConvertedAttribute( tag, HTML.Attribute.HREF,
542 attrs );
543544 } elseif (((tag == HTML.Tag.BODY) ) &&
545 (attrs.getAttribute(HTML.Attribute.BACKGROUND) != null))
546 {
547548// ---- CHECKING <BODY549 addConvertedAttribute( tag,
550 HTML.Attribute.BACKGROUND,
551 attrs,
552 rewriter.proxyAllTags());
553554 } elseif (tag == HTML.Tag.TD)
555 {
556// ---- CHECKING <TD BACKGROUND=557if (! (attrs.getAttribute(HTML.Attribute.BACKGROUND) == null))
558 {
559 addConvertedAttribute( tag,
560 HTML.Attribute.BACKGROUND,
561 attrs,
562 rewriter.proxyAllTags());
563 }
564 }
565566/*567 if ( removeScript && (tag == HTML.Tag.SCRIPT)) {568 ignoreLevel ++;569 */570 }
571572/*573 * Converts the given attribute's URL compatible element to a proxied URL.574 * Uses the proxy parameter to determine if the URL should be written back as a575 * proxied URL, or as a fullpath to the original host.576 *577 * @param attr The HTML attribute to be proxied.578 * @param attrs The mutable HTML attribute set for the current HTML element.579 * @param proxy If set true, the URL is written back as a proxied URL, otherwise580 * it is written back as a fullpath back to the original host.581 *582 */583privatevoid addConvertedAttribute( HTML.Tag tag,
584 HTML.Attribute attr,
585 MutableAttributeSet attrs,
586boolean proxy )
587 {
588if (proxy)
589 {
590 addProxiedConvertedAttribute(tag, attr,attrs);
591 } else592 {
593if ( attrs.getAttribute( attr ) != null )
594 {
595 attrs.addAttribute( attr,
596 generateNewUrl( tag, attrs, attr, false ) );
597 }
598 }
599 }
600601602/***603 *604 * Converts the given attribute's URL compatible element to a proxied URL.605 * This method will always add the proxy host prefix to the rewritten URL.606 *607 * @param attr The HTML attribute to be proxied.608 * @param attrs The mutable HTML attribute set for the current HTML element.609 *610 */611privatevoid addProxiedConvertedAttribute( HTML.Tag tag,
612 HTML.Attribute attr,
613 MutableAttributeSet attrs ) {
614615616617if ( attrs.getAttribute( attr ) != null )
618 {
619 String attrSource = attrs.getAttribute( attr ).toString();
620621// special case: mailto should not be sent to the proxy server622if (attrSource.startsWith("mailto:"))
623 {
624 attrs.addAttribute( attr,
625 generateNewUrl( tag, attrs, attr, true ) );
626 } elseif (attrSource.startsWith("javascript:"))
627 {
628 attrs.addAttribute( attr,
629 attrSource);
630 } else631 {
632 attrs.addAttribute( attr,
633 generateNewUrl( tag, attrs, attr, true ) );
634 }
635 }
636 }
637638/*639 * Calls the rewriter's URL generator callback, which will translate the old url640 * into a new fullpath URL, either relative to the proxy server, or a fullpath641 * to the original web server, depending on the 'proxied' parameter.642 * 643 * @param oldURL The original URL, before it is tranlated.644 * @param proxied Boolean indicator denotes if the URL should be written back645 * as a proxied URL (true), or as a fully addressable address to the 646 * original web server.647 * @return The translated new URL.648 * 649 */650private String generateNewUrl(HTML.Tag tag,
651 MutableAttributeSet attrs,
652 HTML.Attribute attr,
653boolean proxied)
654 {
655 String oldURL = attrs.getAttribute( attr ).toString();
656// System.out.println("Generating new url: " + oldURL);657return rewriter.generateNewUrl(oldURL, tag, attr);
658 }
659660661 }
662663 }
664665