1 /*
2 * Copyright 2000-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package org.apache.jetspeed.util.rewriter;
17
18 // javax.swing.text
19 import javax.swing.text.*;
20 import javax.swing.text.html.*;
21 import javax.swing.text.html.HTMLEditorKit;
22
23 // java.io
24 import java.io.*;
25
26 // java.util
27 import java.util.*;
28
29 // java.net
30 import java.net.*;
31 import org.apache.turbine.util.Log;//AAAtogli!
32
33
34 /*
35 * HTML Parser Adaptor for the Swing 'HotJava' parser.
36 *
37 * @author <a href="mailto:taylor@apache.org">David Sean Taylor</a>
38 * @version $Id: SwingParserAdaptor.java,v 1.6 2004/02/23 03:18:59 jford Exp $
39 */
40
41 public class SwingParserAdaptor implements HTMLParserAdaptor
42 {
43
44 private SwingParserAdaptor.Callback cb = new SwingParserAdaptor.Callback();
45 private String lineSeparator;
46 private boolean skippingImplied = false;
47 private Rewriter rewriter;
48 /*
49 * Construct a swing (hot java) parser adaptor
50 * Receives a Rewriter parameter, which is used as a callback when rewriting URLs.
51 * The rewriter object executes the implementation specific URL rewriting.
52 *
53 * @param rewriter The rewriter object that is called back during URL rewriting
54 */
55 public SwingParserAdaptor(Rewriter rewriter)
56 {
57 this.rewriter = rewriter;
58 lineSeparator = System.getProperty("line.separator", "\r\n");
59 }
60
61 /*
62 * Parses and an HTML document, rewriting all URLs as determined by the Rewriter callback
63 *
64 *
65 * @param reader The input stream reader
66 *
67 * @throws MalformedURLException
68 *
69 * @return An HTML-String with rewritten URLs.
70 */
71 public String run(Reader reader)
72 throws MalformedURLException
73 {
74 HTMLEditorKit.Parser parser = new SwingParserAdaptor.ParserGetter().getParser();
75
76 String res ="";
77 try
78 {
79 parser.parse(reader, cb, true);
80 res = cb.getResult();
81 } catch (Exception e)
82 {
83 e.printStackTrace();
84 //Log.info("Exception occurred:" + e.toString());AAAtogli!!!
85 //Log.info("Exception occurred:" + e.printStackTrace());
86 throw new MalformedURLException(e.toString());
87 }
88 return res;
89 }
90
91
92 /*
93 * This Class is needed, because getParser is protected and therefore
94 * only accessibly by a subclass
95 */
96 class ParserGetter extends HTMLEditorKit
97 {
98
99 public HTMLEditorKit.Parser getParser(){
100 return super.getParser();
101 }
102 }
103
104
105 /*
106 * Swing Parser Callback from the HTMLEditorKit.
107 * This class handles all SAX-like events during parsing.
108 *
109 */
110 class Callback extends HTMLEditorKit.ParserCallback
111 {
112
113
114 // either handling of <FORM> is buggy, or I made some weird mistake ...
115 // ... JDK 1.3 sends double "</form>"-tags on closing <form>
116 private boolean inForm = false;
117 private boolean inScript = false;
118 private boolean emit = true;
119 private boolean simpleTag = false;
120
121 private StringWriter result = new StringWriter();
122
123 private Callback ()
124 {
125 }
126
127 //
128 // -------------- Hot Java event callbacks... --------------------
129 //
130
131 /*
132 * Hot Java event callback for text (all data in between tags)
133 *
134 * @param values The array of characters containing the text.
135 */
136 public void handleText(char[] values,int param)
137 {
138 if (false == emit)
139 return;
140 if (values[0] == '>')
141 return;
142 if (false == rewriter.enterText(values, param))
143 return;
144
145 addToResult(values);
146 }
147
148 /*
149 * Hot Java event callback for handling a simple tag (without begin/end)
150 *
151 * @param tag The HTML tag being handled.
152 * @param attrs The mutable HTML attribute set for the current HTML element.
153 * @param position the position of the tag.
154 *
155 */
156 public void handleSimpleTag(HTML.Tag tag,MutableAttributeSet attrs,int param)
157 {
158 simpleTag = true;
159 if (false == rewriter.enterSimpleTagEvent(tag, attrs))
160 return;
161
162 if (false == isValidFragmentTag(tag))
163 return;
164
165 appendTagToResult(tag,attrs);
166 if (tag.toString().equalsIgnoreCase("param") ||
167 tag.toString().equalsIgnoreCase("object") ||
168 tag.toString().equalsIgnoreCase("embed"))
169 {
170 result.write(lineSeparator);
171 }
172 simpleTag = false;
173 String appended = rewriter.exitSimpleTagEvent(tag, attrs);
174 if (null != appended)
175 result.write(appended);
176 }
177
178 /*
179 * Hot Java event callback for handling a start tag.
180 *
181 * @param tag The HTML tag being handled.
182 * @param attrs The mutable HTML attribute set for the current HTML element.
183 * @param position the position of the tag.
184 *
185 */
186 public void handleStartTag(HTML.Tag tag, MutableAttributeSet attrs, int position)
187 {
188 if (false == rewriter.enterStartTagEvent(tag, attrs))
189 return;
190
191 if (tag == HTML.Tag.HEAD)
192 {
193 emit = false;
194 return;
195 }
196
197 if (false == isValidFragmentTag(tag))
198 return;
199
200 appendTagToResult(tag,attrs);
201 formatLine(tag);
202 String appended = rewriter.exitStartTagEvent(tag, attrs);
203 if (null != appended)
204 result.write(appended);
205 }
206
207
208 boolean isValidFragmentTag(HTML.Tag tag)
209 {
210 /*
211 if (false == emit)
212 return false;
213
214 if (tag == HTML.Tag.HTML) // always strip out HTML tag for fragments
215 return false;
216
217 if (tag == HTML.Tag.BODY)
218 return false;
219
220 if (tag == HTML.Tag.FRAMESET) // always strip out FRAMESET tag for fragments
221 return false;
222
223 if (tag == HTML.Tag.FRAME)
224 return false;
225
226 if (tag == HTML.Tag.NOFRAMES)
227 return false;
228 */
229 return true;
230 }
231
232
233 /*
234 * Hot Java event callback for handling an end tag.
235 *
236 * @param tag The HTML tag being handled.
237 * @param position the position of the tag.
238 *
239 */
240 public void handleEndTag(HTML.Tag tag, int position)
241 {
242 if (false == rewriter.enterEndTagEvent(tag))
243 return;
244
245 if (tag == HTML.Tag.HEAD)
246 {
247 emit = true;
248 return;
249 }
250
251 if (false == isValidFragmentTag(tag))
252 return;
253
254 addToResult("</").addToResult(tag).addToResult(">");
255
256 formatLine(tag);
257 String appended = rewriter.exitEndTagEvent(tag);
258 if (null != appended)
259 result.write(appended);
260
261 }
262
263
264 /*
265 * Hot Java event callback for handling errors.
266 *
267 * @param str The error message from Swing.
268 * @param param A parameter passed to handler.
269 *
270 */
271 public void handleError(java.lang.String str,int param)
272 {
273 // ignored
274 }
275
276 /*
277 * Hot Java event callback for HTML comments.
278 *
279 * @param values The character array of text comments.
280 * @param param A parameter passed to handler.
281 *
282 */
283 public void handleComment(char[] values,int param)
284 {
285 // STRIP COMMENTS: addToResult(values);
286 // this is questionable, we may need to turn this on for scripts inside comments
287 }
288
289 /*
290 * Hot Java event callback for end of line strings.
291 *
292 * @param str The end-of-line string.
293 *
294 */
295 public void handleEndOfLineString(java.lang.String str)
296 {
297 addToResult(str);
298 }
299
300
301 /*
302 * Prints new lines to make the output a little easier to read when debugging.
303 *
304 * @param tag The HTML tag being handled.
305 *
306 */
307 private void formatLine(HTML.Tag tag)
308 {
309 if (tag.isBlock() ||
310 tag.breaksFlow() ||
311 tag == HTML.Tag.FRAME ||
312 tag == HTML.Tag.FRAMESET ||
313 tag == HTML.Tag.SCRIPT)
314 {
315 result.write(lineSeparator);
316 }
317 }
318
319
320 /*
321 * Used to write tag and attribute objects to the output stream.
322 * Returns a reference to itself so that these calls can be chained.
323 *
324 * @param txt Any text to be written out to stream with toString method.
325 * The object being written should implement its toString method.
326 * @return A handle to the this, the callback, for chaining results.
327 *
328 */
329 private Callback addToResult(Object txt)
330 {
331 // to allow for implementation using Stringbuffer or StringWriter
332 // I don't know yet, which one is better in this case
333 //if (ignoreLevel > 0 ) return this;
334
335 try
336 {
337 result.write(txt.toString());
338 } catch (Exception e)
339 {
340 System.err.println("Error parsing:" + e);
341 }
342 return this;
343 }
344
345
346 /*
347 * Used to write all character content to the output stream.
348 * Returns a reference to itself so that these calls can be chained.
349 *
350 * @param txt Any character text to be written out directly to stream.
351 * @return A handle to the this, the callback, for chaining results.
352 *
353 */
354 private Callback addToResult(char[] txt)
355 {
356 //if (ignoreLevel > 0) return this;
357
358 try
359 {
360
361 result.write(txt);
362
363 } catch (Exception e)
364 { /* ignore */
365 }
366 return this;
367 }
368
369 /*
370 * Accessor to the Callback's content-String
371 *
372 * @return Cleaned and rewritten HTML-Content
373 */
374 public String getResult()
375 {
376 try
377 {
378 result.flush();
379 } catch (Exception e)
380 { /* ignore */
381 }
382
383 // WARNING: doesn't work, if you remove " " + ... but don't know why
384 String res = " " + result.toString();
385
386 return res;
387 }
388
389 /*
390 * Flushes the output stream. NOT IMPLEMENTED
391 *
392 */
393 public void flush() throws javax.swing.text.BadLocationException
394 {
395 // nothing to do here ...
396 }
397
398 /*
399 * Writes output to the final stream for all attributes of a given tag.
400 *
401 * @param tag The HTML tag being output.
402 * @param attrs The mutable HTML attribute set for the current HTML tag.
403 *
404 */
405 private void appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs)
406 {
407 convertURLS(tag,attrs);
408 Enumeration e = attrs.getAttributeNames();
409 addToResult("<").addToResult(tag);
410 while (e.hasMoreElements())
411 {
412 Object attr = e.nextElement();
413 String value = attrs.getAttribute(attr).toString();
414 addToResult(" ").addToResult(attr).addToResult("=\"").
415 addToResult(value).addToResult("\"");
416 }
417 if (simpleTag)
418 addToResult("/>");
419 else
420 addToResult(">");
421 }
422
423
424 /*
425 * Determines which HTML Tag/Element is being inspected, and calls the
426 * appropriate converter for that context. This method contains all the
427 * logic for determining how tags are rewritten.
428 *
429 * TODO: it would be better to drive this logic off a state table that is not
430 * tied to the Hot Java parser.
431 *
432 * @param tag TAG from the Callback-Interface.
433 * @param attrs The mutable HTML attribute set for the current HTML element.
434 */
435
436 private void convertURLS( HTML.Tag tag, MutableAttributeSet attrs )
437 {
438 rewriter.convertTagEvent(tag, attrs);
439 if ((tag == HTML.Tag.A) &&
440 (attrs.getAttribute(HTML.Attribute.HREF) != null))
441 {
442
443 // ---- CHECKING <A HREF
444 addProxiedConvertedAttribute( tag, HTML.Attribute.HREF, attrs);
445
446 }
447 else if (((tag == HTML.Tag.IMG ||
448 tag == HTML.Tag.INPUT
449 ) &&
450 (attrs.getAttribute(HTML.Attribute.SRC) != null)
451 ))
452 {
453
454 // ---- CHECKING <IMG SRC & <INPUT SRC
455 addConvertedAttribute( tag,
456 HTML.Attribute.SRC,
457 attrs,
458 rewriter.proxyAllTags());
459
460 } else if (((tag == HTML.Tag.OPTION) ) &&
461 (attrs.getAttribute(HTML.Attribute.VALUE) != null))
462 {
463 // ---- CHECKING <OPTION
464 addProxiedConvertedAttribute( tag, HTML.Attribute.VALUE, attrs );
465
466 } else if (((tag == HTML.Tag.LINK) ) &&
467 (attrs.getAttribute(HTML.Attribute.HREF) != null))
468 {
469
470 // ---- CHECKING <LINK
471 addConvertedAttribute( tag,
472 HTML.Attribute.HREF,
473 attrs,
474 rewriter.proxyAllTags());
475
476 } else if ( tag == HTML.Tag.APPLET )
477 {
478
479 // ---- CHECKING <APPLET CODEBASE=
480 addConvertedAttribute( tag,
481 HTML.Attribute.CODEBASE,
482 attrs,
483 rewriter.proxyAllTags());
484
485 } else if ( tag == HTML.Tag.FRAME )
486 {
487
488 // ---- CHECKING <FRAME SRC=
489 addProxiedConvertedAttribute( tag, HTML.Attribute.SRC, attrs);
490
491 } else if ( tag == HTML.Tag.SCRIPT )
492 {
493 // ---- CHECKING <SCRIPT SRC=
494 if (attrs.getAttribute(HTML.Attribute.SRC) != null)
495 {
496
497 // script is external
498 String s = attrs.getAttribute(HTML.Attribute.SRC).toString();
499 if (s.indexOf("%3E") == -1)
500 {
501 addConvertedAttribute( tag,
502 HTML.Attribute.SRC,
503 attrs,
504 rewriter.proxyAllTags());
505 }
506
507 } else
508 {
509 // script is inline
510 //parserOff = true;
511 }
512
513 } else if (tag == HTML.Tag.FORM)
514 {
515
516 // ---- CHECKING <FORM ACTION=
517 inForm = true; // buggy <form> handling in jdk 1.3
518
519 if (attrs.getAttribute(HTML.Attribute.ACTION) == null)
520 {
521 // always post
522 attrs.addAttribute(HTML.Attribute.METHOD, "POST");
523 //self referencing <FORM>
524
525 // attrs.addAttribute(HTML.Attribute.ACTION,
526 // baseURL);
527
528 } else
529 {
530 // always post
531 attrs.addAttribute(HTML.Attribute.METHOD, "POST");
532 addProxiedConvertedAttribute( tag, HTML.Attribute.ACTION, attrs);
533
534 }
535
536 } else if (((tag == HTML.Tag.AREA) ) &&
537 (attrs.getAttribute(HTML.Attribute.HREF) != null))
538 {
539
540 // ---- CHECKING <AREA
541 addProxiedConvertedAttribute( tag, HTML.Attribute.HREF,
542 attrs );
543
544 } else if (((tag == HTML.Tag.BODY) ) &&
545 (attrs.getAttribute(HTML.Attribute.BACKGROUND) != null))
546 {
547
548 // ---- CHECKING <BODY
549 addConvertedAttribute( tag,
550 HTML.Attribute.BACKGROUND,
551 attrs,
552 rewriter.proxyAllTags());
553
554 } else if (tag == HTML.Tag.TD)
555 {
556 // ---- CHECKING <TD BACKGROUND=
557 if (! (attrs.getAttribute(HTML.Attribute.BACKGROUND) == null))
558 {
559 addConvertedAttribute( tag,
560 HTML.Attribute.BACKGROUND,
561 attrs,
562 rewriter.proxyAllTags());
563 }
564 }
565
566 /*
567 if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
568 ignoreLevel ++;
569 */
570 }
571
572 /*
573 * Converts the given attribute's URL compatible element to a proxied URL.
574 * Uses the proxy parameter to determine if the URL should be written back as a
575 * proxied URL, or as a fullpath to the original host.
576 *
577 * @param attr The HTML attribute to be proxied.
578 * @param attrs The mutable HTML attribute set for the current HTML element.
579 * @param proxy If set true, the URL is written back as a proxied URL, otherwise
580 * it is written back as a fullpath back to the original host.
581 *
582 */
583 private void addConvertedAttribute( HTML.Tag tag,
584 HTML.Attribute attr,
585 MutableAttributeSet attrs,
586 boolean proxy )
587 {
588 if (proxy)
589 {
590 addProxiedConvertedAttribute(tag, attr,attrs);
591 } else
592 {
593 if ( attrs.getAttribute( attr ) != null )
594 {
595 attrs.addAttribute( attr,
596 generateNewUrl( tag, attrs, attr, false ) );
597 }
598 }
599 }
600
601
602 /***
603 *
604 * Converts the given attribute's URL compatible element to a proxied URL.
605 * This method will always add the proxy host prefix to the rewritten URL.
606 *
607 * @param attr The HTML attribute to be proxied.
608 * @param attrs The mutable HTML attribute set for the current HTML element.
609 *
610 */
611 private void addProxiedConvertedAttribute( HTML.Tag tag,
612 HTML.Attribute attr,
613 MutableAttributeSet attrs ) {
614
615
616
617 if ( attrs.getAttribute( attr ) != null )
618 {
619 String attrSource = attrs.getAttribute( attr ).toString();
620
621 // special case: mailto should not be sent to the proxy server
622 if (attrSource.startsWith("mailto:"))
623 {
624 attrs.addAttribute( attr,
625 generateNewUrl( tag, attrs, attr, true ) );
626 } else if (attrSource.startsWith("javascript:"))
627 {
628 attrs.addAttribute( attr,
629 attrSource);
630 } else
631 {
632 attrs.addAttribute( attr,
633 generateNewUrl( tag, attrs, attr, true ) );
634 }
635 }
636 }
637
638 /*
639 * Calls the rewriter's URL generator callback, which will translate the old url
640 * into a new fullpath URL, either relative to the proxy server, or a fullpath
641 * to the original web server, depending on the 'proxied' parameter.
642 *
643 * @param oldURL The original URL, before it is tranlated.
644 * @param proxied Boolean indicator denotes if the URL should be written back
645 * as a proxied URL (true), or as a fully addressable address to the
646 * original web server.
647 * @return The translated new URL.
648 *
649 */
650 private String generateNewUrl(HTML.Tag tag,
651 MutableAttributeSet attrs,
652 HTML.Attribute attr,
653 boolean proxied)
654 {
655 String oldURL = attrs.getAttribute( attr ).toString();
656 // System.out.println("Generating new url: " + oldURL);
657 return rewriter.generateNewUrl(oldURL, tag, attr);
658 }
659
660
661 }
662
663 }
664
665