1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.jetspeed.search.handlers;
18
19
20 import java.io.IOException;
21 import java.net.URL;
22
23 import org.apache.commons.httpclient.HttpClient;
24 import org.apache.commons.httpclient.HttpException;
25 import org.apache.commons.httpclient.methods.GetMethod;
26 import org.apache.jetspeed.search.AbstractObjectHandler;
27 import org.apache.jetspeed.search.BaseParsedObject;
28
29 /***
30 * This object handler deals with URLs.
31 *
32 * @author <a href="mailto:morciuch@apache.org">Mark Orciuch</a>
33 * @version $Id: URLToDocHandler.java 516448 2007-03-09 16:25:47Z ate $
34 */
35 public class URLToDocHandler extends AbstractObjectHandler
36 {
37 /***
38 * Static initialization of the logger for this class
39 */
40
41
42 /***
43 * Parses a specific object into a document suitable for index placement
44 *
45 * @param o
46 * @return
47 */
48 public org.apache.jetspeed.search.ParsedObject parseObject(Object o)
49 {
50 org.apache.jetspeed.search.ParsedObject result = new BaseParsedObject();
51
52 if ((o instanceof URL) == false)
53 {
54
55 return null;
56 }
57
58 URL pageToAdd = (URL) o;
59
60 HttpClient client = new HttpClient();
61 GetMethod method = new GetMethod(pageToAdd.toString());
62 method.setFollowRedirects(true);
63 int statusCode = -1;
64 int attempt = 0;
65
66 try
67 {
68
69 while (statusCode == -1 && attempt < 3)
70 {
71 try
72 {
73
74 client.executeMethod(method);
75 statusCode = method.getStatusCode();
76
77 {
78
79 }
80 }
81 catch (HttpException e)
82 {
83
84 attempt++;
85 }
86 catch (IOException e)
87 {
88 return null;
89 }
90 }
91
92 if (statusCode != -1)
93 {
94 String content = null;
95 try
96 {
97 content = method.getResponseBodyAsString();
98 }
99 catch (Exception ioe)
100 {
101
102 }
103
104 if (content != null)
105 {
106 try
107 {
108 result.setKey(java.net.URLEncoder.encode(pageToAdd.toString(),"UTF-8"));
109 result.setType(org.apache.jetspeed.search.ParsedObject.OBJECT_TYPE_URL);
110
111 result.setTitle(pageToAdd.toString());
112 result.setContent(content);
113 result.setDescription("");
114 result.setLanguage("");
115 result.setURL(pageToAdd);
116 result.setClassName(o.getClass().getName());
117
118 }
119 catch (Exception e)
120 {
121 e.printStackTrace();
122
123 }
124 }
125 }
126 }
127 finally
128 {
129 method.releaseConnection();
130 }
131
132 return result;
133
134 }
135 }
136