View Javadoc

1   /*
2    * Copyright 2000-2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.jetspeed.portal.portlets;
18  
19  //Element Construction Set
20  import java.io.IOException;
21  import java.io.InputStreamReader;
22  import java.io.Reader;
23  import java.net.URL;
24  import java.net.URLConnection;
25  import java.util.Enumeration;
26  import java.util.Hashtable;
27  import java.util.Iterator;
28  
29  import org.apache.ecs.ConcreteElement;
30  import org.apache.jetspeed.portal.PortletConfig;
31  import org.apache.jetspeed.portal.PortletException;
32  import org.apache.jetspeed.services.Transformer;
33  import org.apache.jetspeed.util.Base64;
34  import org.apache.jetspeed.util.JetspeedClearElement;
35  import org.apache.turbine.services.servlet.TurbineServlet;
36  import org.apache.turbine.util.RunData;
37  
38  import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
39  import org.apache.jetspeed.services.logging.JetspeedLogger;
40  
41  /***
42   * A class that clips parts of one or more web pages.
43   *
44   * @author <a href="mailto:mmari@ce.unipr.it">Marco Mari</a>
45   * @version $Id: WebClippingPortlet.java,v 1.2 2004/02/23 04:03:34 jford Exp $ 
46   */
47  
48  public class WebClippingPortlet extends AbstractInstancePortlet
49  {
50  
51  	/***
52  	 * Static initialization of the logger for this class
53  	 */
54  	private static final JetspeedLogger logger =
55  		JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
56  
57  	// Define parameter name for the first tag to clip
58  	public static final String START = "startTag";
59  	// Define parameter name for the last tag to clip
60  	public static final String STOP = "stopTag";
61  	// Define parameter name for a single tag to clip
62  	public static final String TAG = "Tag";
63  	// Define parameter name for the number of the tag to clip
64  	public static final String TAGNUM = "startTagNumber";
65  	// Define parameter name for the URL of the page
66  	public static final String URL = "url";
67  	// Error message for startTag without stopTag
68  	private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
69  	// Error message for wrong startTagNumber parameter
70  	private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
71  
72  	protected boolean initDone = false;
73  	protected boolean contentStale = true;
74  	protected boolean cacheContent = false;
75  	protected String username = null;
76  	protected String password = null;
77  
78  	private Hashtable patterns = null;
79  
80  	/***
81  	 * Initialize this portlet
82  	 * @throws PortletException Initialization failed
83  	 */
84  	public void init()
85  	{
86  		if (initDone)
87  			return;
88  
89  		patterns = new Hashtable();
90  
91  		try
92  		{
93  			loadParams();
94  		}
95  		catch (Exception e)
96  		{
97  			logger.info("Exception occurred:" + e.toString());
98  			e.printStackTrace();
99  		}
100 
101 		contentStale = true;
102 		initDone = true;
103 	}
104 
105 	/***
106 	 * took this from FileServerPortlet as it was private 
107 	 *
108 	*/
109 
110 	// FIXME: Currently only the expiration the HTTP Response header is honored. 
111 	//        Expiration information in <meta> tags are not honored 
112 
113 	protected Reader getReader(String url) throws IOException
114 	{
115 		URL pageUrl = new URL(url);
116 
117 		URLConnection pageConn = pageUrl.openConnection();
118 		try
119 		{
120 			// set HTTP Basic Authetication header if username and password are set
121 			if (username != null && password != null)
122 			{
123 				pageConn.setRequestProperty(
124 					"Authorization",
125 					"Basic "
126 						+ Base64.encodeAsString(username + ":" + password));
127 			}
128 
129 		}
130 		catch (Exception e)
131 		{
132 			logger.info("Exception occurred:" + e.toString());
133 			e.printStackTrace();
134 		}
135 
136 		long pageExpiration = pageConn.getExpiration();
137 		String encoding = pageConn.getContentEncoding();
138 		String tempString = null;
139 		String noCache = "no-cache";
140 
141 		if (encoding == null)
142 		{
143 			// Standard HTTP encoding
144 			encoding = "iso-8859-1";
145 		}
146 
147 		/*
148 		 * Determing if content should be cached.
149 		 */
150 		cacheContent = true; // Assume content is cached
151 		if (pageExpiration == 0)
152 		{
153 			cacheContent = false;
154 		}
155 		// Check header field CacheControl
156 		tempString = pageConn.getHeaderField("Cache-Control");
157 		if (tempString != null)
158 		{
159 			if (tempString.toLowerCase().indexOf(noCache) >= 0)
160 			{
161 				cacheContent = false;
162 			}
163 		}
164 		// Check header field Pragma
165 		tempString = pageConn.getHeaderField("Pragma");
166 		if (tempString != null)
167 		{
168 			if (tempString.toLowerCase().indexOf(noCache) >= 0)
169 			{
170 				cacheContent = false;
171 			}
172 		}
173 
174 		// Assign a reader
175 		Reader rdr = new InputStreamReader(pageConn.getInputStream(), encoding);
176 
177 		// Only set the page expiration it the page has not expired
178 		if (pageExpiration > System.currentTimeMillis()
179 			&& (cacheContent == true))
180 		{
181 			contentStale = false;
182 			logger.debug(
183 				"WebPagePortlet caching URL: "
184 					+ url
185 					+ " Expiration: "
186 					+ pageExpiration
187 					+ ", "
188 					+ (pageExpiration - System.currentTimeMillis())
189 					+ " milliseconds into the future");
190 			setExpirationMillis(pageExpiration);
191 		}
192 		else
193 		{
194 			contentStale = true;
195 		}
196 
197 		return rdr;
198 	}
199 
200 	/***
201 	This methods outputs the content of the portlet for a given 
202 	request.
203 	
204 	@param data the RunData object for the request
205 	@return the content to be displayed to the user-agent
206 	*/
207 	public ConcreteElement getContent(RunData data)
208 	{
209 		PortletConfig config = this.getPortletConfig();
210 
211 		if (contentStale == true)
212 			return getWebClippedContent(data, config);
213 
214 		if (null == getExpirationMillis())
215 			return getContent(data, null, true);
216 
217 		if (getExpirationMillis().longValue() <= System.currentTimeMillis())
218 			return getWebClippedContent(data, config);
219 
220 		return getContent(data, null, true);
221 	}
222 
223 	/*
224 	 * This method returns the clipped part of the Web page
225 	 */
226 	private ConcreteElement getWebClippedContent(
227 		RunData data,
228 		PortletConfig config)
229 	{
230 		String clippedString = ""; // HTML to visualize
231 		JetspeedClearElement element = null;
232 		int patternNumber = 1;
233 		int tagNumber = 0;
234 		Reader htmlReader;
235 		String defaultUrl = selectUrl(data, config);
236 
237 		try
238 		{
239 			// Re-load parameters to see immediately the effect of changes
240 			loadParams();
241 			Enumeration en = patterns.keys();
242 
243 			while (en.hasMoreElements())
244 			{
245 				String name = (String) en.nextElement();
246 
247 				// Search for parameters in the right order
248 				if (name.equals(START + String.valueOf(patternNumber))
249 					|| name.equals(TAG + String.valueOf(patternNumber)))
250 				{
251 					String start =
252 						(String) patterns.get(
253 							START + String.valueOf(patternNumber));
254 					String simpleTag =
255 						(String) patterns.get(
256 							TAG + String.valueOf(patternNumber));
257 					String stop =
258 						(String) patterns.get(
259 							STOP + String.valueOf(patternNumber));
260 					String tagNum =
261 						(String) patterns.get(
262 							TAGNUM + String.valueOf(patternNumber));
263 					// A group of params can have a specific url
264 					String url =
265 						(String) patterns.get(
266 							URL + String.valueOf(patternNumber));
267 					url = controlUrl(url, defaultUrl);
268 					htmlReader = getReader(url);
269 
270 					if ((start != null) && (stop == null))
271 					{
272 						element = new JetspeedClearElement(BAD_PARAM);
273 						return element;
274 					}
275 
276 					if (tagNum != null)
277 					{
278 						try
279 						{
280 							tagNumber = Integer.parseInt(tagNum);
281 						}
282 						catch (NumberFormatException e)
283 						{
284 							logger.info("Exception occurred:" + e.toString());
285 							e.printStackTrace();
286 							element = new JetspeedClearElement(BAD_NUMBER);
287 							return element;
288 						}
289 					}
290 
291 					if ((simpleTag != null) && (tagNum == null))
292 						clippedString =
293 							clippedString
294 								+ Transformer.findElement(
295 									htmlReader,
296 									url,
297 									simpleTag);
298 					else if ((simpleTag != null) && (tagNum != null))
299 						clippedString =
300 							clippedString
301 								+ Transformer.findElementNumber(
302 									htmlReader,
303 									url,
304 									simpleTag,
305 									tagNumber);
306 					else if (tagNum == null)
307 						clippedString =
308 							clippedString
309 								+ Transformer.clipElements(
310 									htmlReader,
311 									url,
312 									start,
313 									stop);
314 					else if (tagNum != null)
315 						clippedString =
316 							clippedString
317 								+ Transformer.clipElementsNumber(
318 									htmlReader,
319 									url,
320 									start,
321 									stop,
322 									tagNumber);
323 
324 					patternNumber = patternNumber + 1;
325 					//Restart Enumeration, because params could not be in the right order
326 					en = patterns.keys();
327 					htmlReader.close();
328 				}
329 			}
330 
331 			element = new JetspeedClearElement(clippedString);
332 
333 			//FIXME: We should do a clearContent() for the media type, not ALL media types
334 			this.clearContent();
335 			// doing this because setContent() is not overwriting current content.
336 			this.setContent(element);
337 
338 		}
339 		catch (Exception e)
340 		{
341 			logger.info("Exception occurred:" + e.toString());
342 			e.printStackTrace();
343 		}
344 
345 		return element;
346 	}
347 
348 	/***
349 	 * Usually called by caching system when portlet is marked as expired, but
350 	 * has not be idle longer then TimeToLive.
351 	 *
352 	 * Any cached content that is expired need to be refreshed.
353 	 */
354 	public void refresh()
355 	{
356 		if (cacheContent == true)
357 		{
358 			getWebClippedContent(null, this.getPortletConfig());
359 		}
360 	}
361 
362 	/***
363 	 * Select the URL to use for this portlet.
364 	 * @return The URL to use for this portlet
365 	 */
366 	protected String selectUrl(RunData data, PortletConfig config)
367 	{
368 		String url = config.getURL();
369 		return url;
370 	}
371 
372 	/*
373 	 * Choose between a specific url and the default url
374 	 */
375 	private String controlUrl(String url, String defaultUrl)
376 	{
377 		if (url == null)
378 		{
379 			return defaultUrl;
380 		}
381 
382 		//if the given URL doesn not include a protocol... ie http:// or ftp://
383 		//then resolve it relative to the current URL context
384 		if (url.indexOf("://") < 0)
385 		{
386 			url = TurbineServlet.getResource(url).toString();
387 		}
388 
389 		return url;
390 	}
391 
392 	/*
393 	 * Load portlet parameters
394 	 */
395 	private void loadParams() throws PortletException
396 	{
397 		Iterator en = this.getPortletConfig().getInitParameterNames();
398 
399 		try
400 		{
401 			while (en.hasNext())
402 			{
403 				String name = (String) en.next();
404 
405 				if (name.equals("username"))
406 					username =
407 						this.getPortletConfig().getInitParameter("username");
408 				else if (name.equals("password"))
409 					password =
410 						this.getPortletConfig().getInitParameter("password");
411 				else
412 					patterns.put(
413 						name,
414 						this.getPortletConfig().getInitParameter(name));
415 
416 			}
417 		}
418 		catch (Exception e)
419 		{
420 			logger.info("Exception occurred:" + e.toString());
421 			e.printStackTrace();
422 			throw new PortletException(e.toString());
423 		}
424 	}
425 
426 }