View Javadoc

1   /*
2    * Copyright 2000-2001,2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.jetspeed.portal.portlets;
18  
19  //Element Construction Set
20  import org.apache.jetspeed.util.JetspeedClearElement;
21  import org.apache.ecs.ConcreteElement;
22  
23  //Jetspeed stuff
24  import org.apache.jetspeed.portal.PortletConfig;
25  import org.apache.jetspeed.portal.PortletException;
26  import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
27  import org.apache.jetspeed.services.logging.JetspeedLogger;
28  import org.apache.jetspeed.util.HTMLRewriter;
29  import org.apache.jetspeed.util.Base64;
30  
31  //turbine
32  import org.apache.turbine.util.RunData;
33  
34  //standard java stuff
35  import java.io.InputStreamReader;
36  import java.io.IOException;
37  import java.io.Reader;
38  import java.net.URL;
39  import java.net.URLConnection;
40  import java.util.StringTokenizer;
41  
42  /***
43   * A class that loads a web page and filters it to have certain features
44   * deleted.
45   *
46   *@author <a href="mailto:rammer@sycom.at">Ingo Rammer</a>
47   *@author <a href="mailto:sgala@apache.org">Santiago Gala</a>
48   *@author <a href="mailto:taylor@apache.org">David Sean Taylor</a>
49   *@author <a href="mailto:david@pssp.com">David G. Powers</a>
50   */
51  public class WebPagePortlet extends AbstractInstancePortlet 
52  {
53      /***
54       * Static initialization of the logger for this class
55       */    
56      private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(WebPagePortlet.class.getName());
57      
58      protected HTMLRewriter rewriter = null;
59      protected boolean initDone = false;
60      protected boolean contentStale = true;
61      protected boolean cacheContent = false;
62      protected String  username = null;
63      protected String  password = null;
64      
65      /***
66       * Initialize this portlet by defining a HTML rewriter.
67       * @throws PortletException Initialization failed
68       */    
69      public void init() throws PortletException {
70    
71          if (initDone) // Why is init called more than once per portlet?
72              return;
73  
74          PortletConfig config = this.getPortletConfig();
75          
76          try 
77          {
78              //FIXME: HTMLRewriter should take a Reader, and work
79              rewriter = new HTMLRewriter(
80                      ! config.getInitParameter("dont_remove_script","no")
81                          .equalsIgnoreCase("yes"),
82                      ! config.getInitParameter("dont_remove_style","no")
83                          .equalsIgnoreCase("yes"),
84                      ! config.getInitParameter("dont_remove_noscript","no")
85                          .equalsIgnoreCase("yes"),
86                      ! config.getInitParameter("dont_remove_meta","no")
87                          .equalsIgnoreCase("yes"),
88                      ! config.getInitParameter("dont_remove_applet","no")
89                          .equalsIgnoreCase("yes"),
90                      ! config.getInitParameter("dont_remove_object","no")
91                          .equalsIgnoreCase("yes"),
92                      ! config.getInitParameter("dont_remove_head","no")
93                          .equalsIgnoreCase("yes"),
94                      ! config.getInitParameter("dont_remove_onsomething","no")
95                          .equalsIgnoreCase("yes"),
96                      config.getInitParameter("open_in_popup","no")
97                          .equalsIgnoreCase("yes")
98                          );
99  
100             // fetch username and password for HTTP Basic Autentication
101             username = config.getInitParameter("username");
102             password = config.getInitParameter("password");
103             
104             contentStale = true;
105             initDone = true;
106         } catch (Exception e) {
107             logger.info("Exception occurred:" + e.toString());
108             e.printStackTrace();
109             throw new PortletException( e.toString() );
110         }
111     }
112     
113     /***
114      * took this from FileServerPortlet as it was private 
115      *
116     */
117     // FIXME: Currently only the expiration the HTTP Reponse header is honored. 
118     //        Expiration information in <meta> tags are not honored 
119     protected Reader getReader(String url) throws IOException 
120     {
121         URL            pageUrl = new URL(url);
122 
123         URLConnection  pageConn = pageUrl.openConnection();
124         try
125         {
126             // set HTTP Basic Authetication header if username and password are set
127             if (username != null && password !=null)
128             {
129                 pageConn.setRequestProperty("Authorization", "Basic " +
130                                         Base64.encodeAsString(username + ":" + password));
131             }
132                 
133         }
134         catch (Exception e)
135         {
136             logger.info("Exception occurred:" + e.toString(), e);
137         }
138         
139         long           pageExpiration = pageConn.getExpiration();
140         String         encoding = "iso-8859-1";
141         String         contentType = pageConn.getContentType();
142         String         tempString = null;
143         String         noCache = "no-cache";
144         
145         if (contentType != null)
146         {
147             StringTokenizer st = new StringTokenizer(contentType, "; =");
148             while (st.hasMoreTokens())
149             {
150                 if (st.nextToken().equalsIgnoreCase("charset"))
151                 {
152                     try
153                     {
154                         encoding = st.nextToken();
155                         break;
156                     }
157                     catch (Exception e)
158                     {
159                         break;
160                     }
161                 }
162             }
163         }
164 
165         /*
166          * Determing if content should be cached.
167          */
168         cacheContent = true; // Assume content is cached
169         if (pageExpiration == 0) {
170             cacheContent = false;
171         }
172         // Check header field CacheControl
173         tempString = pageConn.getHeaderField( "Cache-Control");
174         if (tempString != null) {
175             if (tempString.toLowerCase().indexOf(noCache) >= 0) {
176                 cacheContent = false;
177             }
178         }
179         // Check header field Pragma
180         tempString = pageConn.getHeaderField( "Pragma");
181         if (tempString != null) {
182             if (tempString.toLowerCase().indexOf(noCache) >= 0) {
183                 cacheContent = false;
184             }
185         }
186             
187         // Assign a reader
188         Reader rdr = new InputStreamReader(pageConn.getInputStream(),
189                                            encoding );
190 
191         // Only set the page expiration it the page has not expired
192         if (pageExpiration > System.currentTimeMillis() && (cacheContent == true))
193         {
194             contentStale = false;
195             logger.debug( "WebPagePortlet caching URL: " + 
196                        url + 
197                        " Expiration: " + 
198                        pageExpiration +
199                        ", " +
200                        (pageExpiration - System.currentTimeMillis() ) +
201                        " milliseconds into the future" );
202             setExpirationMillis(pageExpiration);
203         } else {
204             contentStale = true;
205         }
206 
207         return rdr;
208     }
209 
210 
211     /***
212     This methods outputs the content of the portlet for a given 
213     request.
214 
215     @param data the RunData object for the request
216     @return the content to be displayed to the user-agent
217     */
218     public ConcreteElement getContent( RunData data ) 
219     {
220         PortletConfig config = this.getPortletConfig();
221         
222         if (contentStale == true)
223             return getWebPageContent(data, config);
224         
225         if (null == getExpirationMillis())
226             return getContent( data, null, true);
227         
228         if (getExpirationMillis().longValue() <= System.currentTimeMillis())
229             return getWebPageContent(data, config);
230 
231         return getContent( data, null , true );
232     }
233 
234     private ConcreteElement getWebPageContent( RunData data, PortletConfig config )
235     {    
236         
237         String convertedString = null;  // parsed and re-written HTML
238         JetspeedClearElement element = null;
239 
240         String url = selectUrl( data, config );
241 
242         try 
243         {
244             Reader htmlReader = getReader( url );
245             //FIXME: HTMLRewriter should take a Reader, and work
246             convertedString = rewriter.convertURLs(htmlReader, url);
247             element = new JetspeedClearElement(convertedString);
248 
249             //FIXME: We should do a clearContent() for the media type, not ALL media types
250             this.clearContent();  // doing this because setContent() is not overwriting current content.
251             this.setContent(element);
252 
253             htmlReader.close();
254 
255         } catch (Exception e) {
256             logger.info("Exception occurred:" + e.toString(), e);
257         }        
258 
259         return element;
260     }
261     
262     /***
263      * Usually called by caching system when portlet is marked as expired, but
264      * has not be idle longer then TimeToLive.
265      *
266      * Any cached content that is expired need to be refreshed.
267      */
268     public void refresh() {
269         if (cacheContent == true) {
270           getWebPageContent(null, this.getPortletConfig());
271         }
272     }
273 
274     /***
275     * Select the URL to use for this portlet.
276     * @return The URL to use for this portlet
277     */
278     protected String selectUrl( RunData data, PortletConfig config )
279     {
280         String url = config.getURL();
281 
282         return url;
283 
284     }   // selectUrl
285 
286 }