1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.jetspeed.portal.portlets;
18
19
20 import java.io.IOException;
21 import java.io.InputStreamReader;
22 import java.io.Reader;
23 import java.net.URL;
24 import java.net.URLConnection;
25 import java.util.Enumeration;
26 import java.util.Hashtable;
27 import java.util.Iterator;
28
29 import org.apache.ecs.ConcreteElement;
30 import org.apache.jetspeed.portal.PortletConfig;
31 import org.apache.jetspeed.portal.PortletException;
32 import org.apache.jetspeed.services.Transformer;
33 import org.apache.jetspeed.util.Base64;
34 import org.apache.jetspeed.util.JetspeedClearElement;
35 import org.apache.turbine.services.servlet.TurbineServlet;
36 import org.apache.turbine.util.RunData;
37
38 import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
39 import org.apache.jetspeed.services.logging.JetspeedLogger;
40
41 /***
42 * A class that clips parts of one or more web pages.
43 *
44 * @author <a href="mailto:mmari@ce.unipr.it">Marco Mari</a>
45 * @version $Id: WebClippingPortlet.java,v 1.2 2004/02/23 04:03:34 jford Exp $
46 */
47
48 public class WebClippingPortlet extends AbstractInstancePortlet
49 {
50
51 /***
52 * Static initialization of the logger for this class
53 */
54 private static final JetspeedLogger logger =
55 JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
56
57
58 public static final String START = "startTag";
59
60 public static final String STOP = "stopTag";
61
62 public static final String TAG = "Tag";
63
64 public static final String TAGNUM = "startTagNumber";
65
66 public static final String URL = "url";
67
68 private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
69
70 private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
71
72 protected boolean initDone = false;
73 protected boolean contentStale = true;
74 protected boolean cacheContent = false;
75 protected String username = null;
76 protected String password = null;
77
78 private Hashtable patterns = null;
79
80 /***
81 * Initialize this portlet
82 * @throws PortletException Initialization failed
83 */
84 public void init()
85 {
86 if (initDone)
87 return;
88
89 patterns = new Hashtable();
90
91 try
92 {
93 loadParams();
94 }
95 catch (Exception e)
96 {
97 logger.info("Exception occurred:" + e.toString());
98 e.printStackTrace();
99 }
100
101 contentStale = true;
102 initDone = true;
103 }
104
105 /***
106 * took this from FileServerPortlet as it was private
107 *
108 */
109
110
111
112
113 protected Reader getReader(String url) throws IOException
114 {
115 URL pageUrl = new URL(url);
116
117 URLConnection pageConn = pageUrl.openConnection();
118 try
119 {
120
121 if (username != null && password != null)
122 {
123 pageConn.setRequestProperty(
124 "Authorization",
125 "Basic "
126 + Base64.encodeAsString(username + ":" + password));
127 }
128
129 }
130 catch (Exception e)
131 {
132 logger.info("Exception occurred:" + e.toString());
133 e.printStackTrace();
134 }
135
136 long pageExpiration = pageConn.getExpiration();
137 String encoding = pageConn.getContentEncoding();
138 String tempString = null;
139 String noCache = "no-cache";
140
141 if (encoding == null)
142 {
143
144 encoding = "iso-8859-1";
145 }
146
147
148
149
150 cacheContent = true;
151 if (pageExpiration == 0)
152 {
153 cacheContent = false;
154 }
155
156 tempString = pageConn.getHeaderField("Cache-Control");
157 if (tempString != null)
158 {
159 if (tempString.toLowerCase().indexOf(noCache) >= 0)
160 {
161 cacheContent = false;
162 }
163 }
164
165 tempString = pageConn.getHeaderField("Pragma");
166 if (tempString != null)
167 {
168 if (tempString.toLowerCase().indexOf(noCache) >= 0)
169 {
170 cacheContent = false;
171 }
172 }
173
174
175 Reader rdr = new InputStreamReader(pageConn.getInputStream(), encoding);
176
177
178 if (pageExpiration > System.currentTimeMillis()
179 && (cacheContent == true))
180 {
181 contentStale = false;
182 logger.debug(
183 "WebPagePortlet caching URL: "
184 + url
185 + " Expiration: "
186 + pageExpiration
187 + ", "
188 + (pageExpiration - System.currentTimeMillis())
189 + " milliseconds into the future");
190 setExpirationMillis(pageExpiration);
191 }
192 else
193 {
194 contentStale = true;
195 }
196
197 return rdr;
198 }
199
200 /***
201 This methods outputs the content of the portlet for a given
202 request.
203
204 @param data the RunData object for the request
205 @return the content to be displayed to the user-agent
206 */
207 public ConcreteElement getContent(RunData data)
208 {
209 PortletConfig config = this.getPortletConfig();
210
211 if (contentStale == true)
212 return getWebClippedContent(data, config);
213
214 if (null == getExpirationMillis())
215 return getContent(data, null, true);
216
217 if (getExpirationMillis().longValue() <= System.currentTimeMillis())
218 return getWebClippedContent(data, config);
219
220 return getContent(data, null, true);
221 }
222
223
224
225
226 private ConcreteElement getWebClippedContent(
227 RunData data,
228 PortletConfig config)
229 {
230 String clippedString = "";
231 JetspeedClearElement element = null;
232 int patternNumber = 1;
233 int tagNumber = 0;
234 Reader htmlReader;
235 String defaultUrl = selectUrl(data, config);
236
237 try
238 {
239
240 loadParams();
241 Enumeration en = patterns.keys();
242
243 while (en.hasMoreElements())
244 {
245 String name = (String) en.nextElement();
246
247
248 if (name.equals(START + String.valueOf(patternNumber))
249 || name.equals(TAG + String.valueOf(patternNumber)))
250 {
251 String start =
252 (String) patterns.get(
253 START + String.valueOf(patternNumber));
254 String simpleTag =
255 (String) patterns.get(
256 TAG + String.valueOf(patternNumber));
257 String stop =
258 (String) patterns.get(
259 STOP + String.valueOf(patternNumber));
260 String tagNum =
261 (String) patterns.get(
262 TAGNUM + String.valueOf(patternNumber));
263
264 String url =
265 (String) patterns.get(
266 URL + String.valueOf(patternNumber));
267 url = controlUrl(url, defaultUrl);
268 htmlReader = getReader(url);
269
270 if ((start != null) && (stop == null))
271 {
272 element = new JetspeedClearElement(BAD_PARAM);
273 return element;
274 }
275
276 if (tagNum != null)
277 {
278 try
279 {
280 tagNumber = Integer.parseInt(tagNum);
281 }
282 catch (NumberFormatException e)
283 {
284 logger.info("Exception occurred:" + e.toString());
285 e.printStackTrace();
286 element = new JetspeedClearElement(BAD_NUMBER);
287 return element;
288 }
289 }
290
291 if ((simpleTag != null) && (tagNum == null))
292 clippedString =
293 clippedString
294 + Transformer.findElement(
295 htmlReader,
296 url,
297 simpleTag);
298 else if ((simpleTag != null) && (tagNum != null))
299 clippedString =
300 clippedString
301 + Transformer.findElementNumber(
302 htmlReader,
303 url,
304 simpleTag,
305 tagNumber);
306 else if (tagNum == null)
307 clippedString =
308 clippedString
309 + Transformer.clipElements(
310 htmlReader,
311 url,
312 start,
313 stop);
314 else if (tagNum != null)
315 clippedString =
316 clippedString
317 + Transformer.clipElementsNumber(
318 htmlReader,
319 url,
320 start,
321 stop,
322 tagNumber);
323
324 patternNumber = patternNumber + 1;
325
326 en = patterns.keys();
327 htmlReader.close();
328 }
329 }
330
331 element = new JetspeedClearElement(clippedString);
332
333
334 this.clearContent();
335
336 this.setContent(element);
337
338 }
339 catch (Exception e)
340 {
341 logger.info("Exception occurred:" + e.toString());
342 e.printStackTrace();
343 }
344
345 return element;
346 }
347
348 /***
349 * Usually called by caching system when portlet is marked as expired, but
350 * has not be idle longer then TimeToLive.
351 *
352 * Any cached content that is expired need to be refreshed.
353 */
354 public void refresh()
355 {
356 if (cacheContent == true)
357 {
358 getWebClippedContent(null, this.getPortletConfig());
359 }
360 }
361
362 /***
363 * Select the URL to use for this portlet.
364 * @return The URL to use for this portlet
365 */
366 protected String selectUrl(RunData data, PortletConfig config)
367 {
368 String url = config.getURL();
369 return url;
370 }
371
372
373
374
375 private String controlUrl(String url, String defaultUrl)
376 {
377 if (url == null)
378 {
379 return defaultUrl;
380 }
381
382
383
384 if (url.indexOf("://") < 0)
385 {
386 url = TurbineServlet.getResource(url).toString();
387 }
388
389 return url;
390 }
391
392
393
394
395 private void loadParams() throws PortletException
396 {
397 Iterator en = this.getPortletConfig().getInitParameterNames();
398
399 try
400 {
401 while (en.hasNext())
402 {
403 String name = (String) en.next();
404
405 if (name.equals("username"))
406 username =
407 this.getPortletConfig().getInitParameter("username");
408 else if (name.equals("password"))
409 password =
410 this.getPortletConfig().getInitParameter("password");
411 else
412 patterns.put(
413 name,
414 this.getPortletConfig().getInitParameter(name));
415
416 }
417 }
418 catch (Exception e)
419 {
420 logger.info("Exception occurred:" + e.toString());
421 e.printStackTrace();
422 throw new PortletException(e.toString());
423 }
424 }
425
426 }