View Javadoc

1   /*
2    * Copyright 2000-2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.jetspeed.util.rewriter;
18  
19  // java.io
20  import java.io.Reader;
21  import java.net.MalformedURLException;
22  import java.util.Enumeration;
23  import java.util.StringTokenizer;
24  
25  import javax.swing.text.MutableAttributeSet;
26  import javax.swing.text.html.HTML;
27  
28  import org.apache.turbine.util.Log;
29  
30  /***
31   *
32   * HTML Rewriter for transformer service
33   *
34   * @author <a href="mailto:mmari@ce.unipr.it">Marco Mari</a>
35   * @version $Id: ClipperRewriter.java,v 1.2 2004/02/23 03:18:59 jford Exp $ 
36   */
37  
38  public class ClipperRewriter extends HTMLRewriter
39  {
40  	private String startElement;
41  	private String stopElement;
42  	private boolean foundStart = false;
43  	private boolean foundStop = false;
44  	private boolean nested = false;
45  	private int nestedNumber = 0;
46  	private int startElementNumber;
47  	private int foundElementNumber = 0;
48  	private String NOT_FOUND = "<br>Element not found, returning null<br>";
49  	private String INVALID_START = "<br>Error: received null start element<br>";
50  	private String INVALID_NUMBER =
51  		"<br>Error: received tagNumber negative or null<br>";
52  
53  	/*
54  	 * Construct the Clipper Rewriter
55  	 *
56  	 */
57  	public ClipperRewriter()
58  	{
59  	}
60  
61  	/*
62  	 * Rewriting HTML content between startElement and stopElement
63  	 *
64  	 * @param input    the HTML input stream.
65  	 * @param baseURL  the base URL of the target host.
66  	 * @return         the rewritten HTML output stream.
67  	 *
68  	 * @exception      MalformedURLException a servlet exception.
69  	 */
70  
71  	public String rewrite(Reader input, String baseURL)
72  		throws MalformedURLException
73  	{
74  		this.baseURL = baseURL;
75  		String rewrittenHTML = "";
76  		foundStart = false;
77  		foundStop = false;
78  		nestedNumber = 0;
79  		foundElementNumber = 0;
80  
81  		// Null startElement is invalid
82  		if (startElement == null)
83  		{
84  			return INVALID_START;
85  		}
86  
87  		// StartElementNumber must be positive
88  		if (startElementNumber <= 0)
89  		{
90  			return INVALID_NUMBER;
91  		}
92  
93  		nested = controlCoupled(startElement, stopElement);
94  		HTMLParserAdaptor parser = new SwingParserAdaptor(this);
95  		rewrittenHTML = parser.run(input);
96  
97  		if (Log.getLogger().isDebugEnabled())
98  		{
99  			Log.debug("Clipper rewriter: start element:" + startElement);
100 			Log.debug("Clipper rewriter: stop element:" + stopElement);
101 			Log.debug("Clipper rewriter: foundStart:" + foundStart);
102 			Log.debug("Clipper rewriter: foundStop:" + foundStop);
103 			Log.debug("Clipper rewriter: nested:" + nested);
104 			Log.debug(
105 				"Clipper rewriter: foundElementNumber:" + foundElementNumber);
106 			Log.debug("Clipper rewriter: rewrittenHTML:" + rewrittenHTML);
107 		}
108 
109 		if ((foundStart == false)
110 			|| ((foundStop == false) && (stopElement != null)))
111 			return NOT_FOUND;
112 		else
113 			return rewrittenHTML;
114 	}
115 
116 	/*
117 	 * Returns true if all rewritten URLs should be sent back to the proxy server.
118 	 *
119 	 * @return true if all URLs are rewritten back to proxy server.
120 	 */
121 	public boolean proxyAllTags()
122 	{
123 		return true;
124 	}
125 
126 	/*
127 	 * Simple Tag Events
128 	 */
129 	public boolean enterSimpleTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
130 	{
131 		String attributes = attrsToString(attrs);
132 		String tagString = tag.toString();
133 		return checkTag(tagString, attributes, "simple");
134 	}
135 
136 	/*
137 	 * Start Tag Events
138 	 */
139 	public boolean enterStartTagEvent(HTML.Tag tag, MutableAttributeSet attrs)
140 	{
141 		String attributes = attrsToString(attrs);
142 		String tagString = tag.toString();
143 		return checkTag(tagString, attributes, "start");
144 	}
145 
146 	/*
147 	 * Exit Tag Events
148 	 */
149 	public boolean enterEndTagEvent(HTML.Tag tag)
150 	{
151 		String tagString = tag.toString();
152 		return checkTag(tagString, null, "end");
153 	}
154 
155 	/*
156 	 * Text Event
157 	 */
158 	public boolean enterText(char[] values, int param)
159 	{
160 		if ((foundStart == true)
161 			&& (foundStop == false)
162 			&& (stopElement != null))
163 			return true;
164 		else
165 			return false;
166 	}
167 
168 	/***
169 	 * Set the start element
170 	 *
171 	 * @param startElement    the new start element
172 	 */
173 	public void setStartElement(String startElement)
174 	{
175 		this.startElement = startElement;
176 	}
177 
178 	/***
179 	 * Set the stop element
180 	 *
181 	 * @param stopElement    the new stop element
182 	 */
183 	public void setStopElement(String stopElement)
184 	{
185 		this.stopElement = stopElement;
186 	}
187 
188 	/***
189 	 * Set the start element number
190 	 *
191 	 * @param startElementNumber    the new start element number
192 	 */
193 	public void setStartElementNumber(int startElementNumber)
194 	{
195 		this.startElementNumber = startElementNumber;
196 	}
197 
198 	/*
199 	 * Control if searched tags are coupled.
200 	 * If searched tags are coupled, we must consider nested tags, example:
201 	 * Searched: <table>   and   </table>
202 	 * in the page there is:
203 	 * <table>...
204 	 *     <table>...
205 	 *     </table>
206 	 * </table>
207 	 * We are searching for the first and fourth tag!
208 	 */
209 	private boolean controlCoupled(String start, String stop)
210 	{
211 		StringTokenizer startTok = new StringTokenizer(start);
212 		boolean foundCoupled = false;
213 		String token;
214 
215 		if (stop == null)
216 			return false;
217 
218 		while (startTok.hasMoreTokens())
219 		{
220 			token = startTok.nextToken();
221 
222 			if (token.equals(stop) == true)
223 				return true;
224 		}
225 
226 		return false;
227 	}
228 
229 	/*
230 	 * Convert the attributes set to a string
231 	 */
232 	private String attrsToString(MutableAttributeSet attrs)
233 	{
234 		String element = "";
235 
236 		if (attrs != null)
237 		{
238 			Enumeration en = attrs.getAttributeNames();
239 
240 			while (en.hasMoreElements())
241 			{
242 				Object attr = en.nextElement();
243 				element =
244 					element
245 						+ " "
246 						+ attr.toString()
247 						+ "="
248 						+ attrs.getAttribute(attr).toString();
249 			}
250 
251 			return element;
252 		}
253 
254 		return null;
255 	}
256 
257 	/*
258 	 * Control to include or exclude the tag
259 	 */
260 	private boolean checkTag(String tag, String attrs, String position)
261 	{
262 		if (foundStart == false)
263 		{
264 			// Searching for start element
265 			if ((compareTag(tag, attrs, startElement) == true)
266 				&& ((position.equals("end") == false) || (stopElement == null)))
267 			{
268 				foundElementNumber = foundElementNumber + 1;
269 
270 				if (foundElementNumber == startElementNumber)
271 				{
272 					foundStart = true;
273 
274 					if (nested == true)
275 						nestedNumber = nestedNumber + 1;
276 
277 					return true;
278 				}
279 				else
280 					return false;
281 			}
282 			else
283 			{
284 				// It's not start element
285 				return false;
286 			}
287 			// Searching for stop element
288 		}
289 		else if ((foundStop == false) && (stopElement != null))
290 		{
291 			if (compareTag(tag, attrs, stopElement))
292 			{
293 				if (nested == true)
294 					if (position.equals("start"))
295 						nestedNumber = nestedNumber + 1;
296 					else if (position.equals("end"))
297 						nestedNumber = nestedNumber - 1;
298 
299 				if ((nestedNumber == 0) && (position.equals("start") == false))
300 					foundStop = true;
301 
302 				return true;
303 			}
304 			else
305 			{
306 				// It's not stop element
307 				return true;
308 			}
309 		}
310 		else
311 			// Stop already found, don't include this tag
312 			return false;
313 	}
314 
315 	/*
316 	 * Control if the current tag is the searched tag with the right attributes
317 	 */
318 	private boolean compareTag(String tag, String attrs, String base)
319 	{
320 		StringTokenizer baseTok = new StringTokenizer(base);
321 		String token;
322 		boolean foundTag = false;
323 
324 		while (baseTok.hasMoreTokens())
325 		{
326 			token = baseTok.nextToken();
327 
328 			// Exact match for the tag, for the attrs it's simpler to control the index
329 			if (token.equals(tag))
330 				foundTag = true;
331 			else if (attrs == null)
332 				return false;
333 			else if (attrs.indexOf(token) == -1)
334 				return false;
335 		}
336 
337 		if (foundTag == false)
338 			return false;
339 		else
340 		{
341 			if (Log.getLogger().isDebugEnabled())
342 				Log.debug(
343 					"Clipper rewriter: match between tag "
344 						+ tag
345 						+ ", attrs "
346 						+ attrs
347 						+ ", and searched: "
348 						+ base);
349 
350 			return true;
351 		}
352 
353 	}
354 
355 }