1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.apache.jetspeed.services.search.handlers;
17
18
19 import java.io.IOException;
20 import java.net.URL;
21
22
23 import org.apache.commons.httpclient.HttpClient;
24 import org.apache.commons.httpclient.HttpException;
25 import org.apache.commons.httpclient.methods.GetMethod;
26
27
28 import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
29 import org.apache.jetspeed.services.logging.JetspeedLogger;
30 import org.apache.jetspeed.services.search.AbstractObjectHandler;
31 import org.apache.jetspeed.services.search.BaseParsedObject;
32 import org.apache.jetspeed.services.search.ParsedObject;
33
34 /***
35 * This object handler deals with URLs.
36 *
37 * @author <a href="mailto:morciuch@apache.org">Mark Orciuch</a>
38 * @version $Id: URLToDocHandler.java,v 1.4 2004/02/23 03:47:46 jford Exp $
39 */
40 public class URLToDocHandler extends AbstractObjectHandler
41 {
42 /***
43 * Static initialization of the logger for this class
44 */
45 private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLToDocHandler.class.getName());
46
47 /***
48 * Parses a specific object into a document suitable for index placement
49 *
50 * @param o
51 * @return
52 */
53 public ParsedObject parseObject(Object o)
54 {
55 ParsedObject result = new BaseParsedObject();
56
57 if ((o instanceof URL) == false)
58 {
59 logger.error("URLToDocHandler: invalid object type: " + o);
60 return null;
61 }
62
63 URL pageToAdd = (URL) o;
64
65 HttpClient client = new HttpClient();
66 client.startSession(pageToAdd);
67 GetMethod method = new GetMethod(pageToAdd.getPath());
68 method.setFollowRedirects(true);
69 int statusCode = -1;
70 int attempt = 0;
71
72
73 while (statusCode == -1 && attempt < 3)
74 {
75 try
76 {
77
78 client.executeMethod(method);
79 statusCode = method.getStatusCode();
80 if (logger.isDebugEnabled())
81 {
82 logger.debug("URL = " + pageToAdd.toString() + "Status code = " + statusCode);
83 }
84 }
85 catch (HttpException e)
86 {
87
88 }
89 catch (IOException e)
90 {
91 return null;
92 }
93 }
94
95 if (statusCode != -1)
96 {
97 String content = null;
98 try
99 {
100 content = method.getDataAsString();
101 }
102 catch (IOException ioe)
103 {
104 logger.error("Getting content for " + pageToAdd.toString(), ioe);
105 }
106
107 if (content != null)
108 {
109 try
110 {
111 result.setKey(java.net.URLEncoder.encode(pageToAdd.toString()));
112 result.setType(ParsedObject.OBJECT_TYPE_URL);
113
114 result.setTitle(pageToAdd.toString());
115 result.setContent(content);
116 result.setDescription("");
117 result.setLanguage("");
118 result.setURL(pageToAdd);
119 result.setClassName(o.getClass().getName());
120 logger.info("Parsed '" + pageToAdd.toString() + "'");
121 }
122 catch (Exception e)
123 {
124 e.printStackTrace();
125 logger.error("Adding document to index", e);
126 }
127 }
128 }
129 try
130 {
131 client.endSession();
132 }
133 catch (IOException ioe)
134 {
135 ioe.printStackTrace();
136 logger.error("Ending session to " + pageToAdd.toString(), ioe);
137 }
138
139 return result;
140
141 }
142 }
143