1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.jetspeed.search.lucene;
18
19 import java.io.File;
20 import java.io.IOException;
21 import java.net.URL;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.Iterator;
26 import java.util.Map;
27 import java.util.Set;
28
29 import org.apache.commons.collections.MultiHashMap;
30 import org.apache.commons.collections.MultiMap;
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.jetspeed.search.BaseParsedObject;
34 import org.apache.jetspeed.search.HandlerFactory;
35 import org.apache.jetspeed.search.ObjectHandler;
36 import org.apache.jetspeed.search.ParsedObject;
37 import org.apache.jetspeed.search.SearchEngine;
38 import org.apache.jetspeed.search.SearchResults;
39 import org.apache.lucene.analysis.Analyzer;
40 import org.apache.lucene.analysis.standard.StandardAnalyzer;
41 import org.apache.lucene.document.Document;
42 import org.apache.lucene.document.Field;
43 import org.apache.lucene.index.IndexReader;
44 import org.apache.lucene.index.IndexWriter;
45 import org.apache.lucene.index.Term;
46 import org.apache.lucene.queryParser.MultiFieldQueryParser;
47 import org.apache.lucene.queryParser.ParseException;
48 import org.apache.lucene.search.Hits;
49 import org.apache.lucene.search.IndexSearcher;
50 import org.apache.lucene.search.Query;
51 import org.apache.lucene.search.Searcher;
52
53 /***
54 * @author <a href="mailto: jford@apache.org">Jeremy Ford</a>
55 *
56 */
57 public class SearchEngineImpl implements SearchEngine
58 {
59 protected final static Log log = LogFactory.getLog(SearchEngineImpl.class);
60 private File rootIndexDir = null;
61 private String analyzerClassName = null;
62 private boolean optimizeAfterUpdate = true;
63 private HandlerFactory handlerFactory;
64
65 private static final int KEYWORD = 0;
66 private static final int TEXT = 1;
67
68 public SearchEngineImpl(String indexRoot, String analyzerClassName, boolean optimzeAfterUpdate, HandlerFactory handlerFactory)
69 throws Exception
70 {
71
72 rootIndexDir = new File(indexRoot);
73 this.analyzerClassName = analyzerClassName;
74 this.optimizeAfterUpdate = optimzeAfterUpdate;
75 this.handlerFactory = handlerFactory;
76
77 try
78 {
79 Searcher searcher = null;
80 searcher = new IndexSearcher(rootIndexDir.getPath());
81 searcher.close();
82 }
83 catch (Exception e)
84 {
85 if (rootIndexDir.exists())
86 {
87 log.error("Failed to open Portal Registry indexes in " + rootIndexDir.getPath(), e);
88 }
89 try
90 {
91 rootIndexDir.delete();
92 rootIndexDir.mkdirs();
93
94 IndexWriter indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), true);
95 indexWriter.close();
96 indexWriter = null;
97 log.warn("Re-created Lucene Index in " + rootIndexDir.getPath());
98 }
99 catch (Exception e1)
100 {
101 String message = "Cannot RECREATE Portlet Registry indexes in " + rootIndexDir.getPath();
102 log.error(message, e1);
103 throw new Exception(message);
104 }
105 }
106 }
107
108
109
110
111 public boolean add(Object o)
112 {
113 Collection c = new ArrayList(1);
114 c.add(o);
115
116 return add(c);
117 }
118
119
120
121
122 public synchronized boolean add(Collection objects)
123 {
124 boolean result = false;
125
126 IndexWriter indexWriter;
127 try
128 {
129 indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), false);
130 }
131 catch (IOException e)
132 {
133
134 return result;
135 }
136
137 Iterator it = objects.iterator();
138 while (it.hasNext())
139 {
140 Object o = it.next();
141
142 ObjectHandler handler = null;
143 try
144 {
145 handler = handlerFactory.getHandler(o);
146 }
147 catch (Exception e)
148 {
149
150 continue;
151 }
152
153
154 ParsedObject parsedObject = handler.parseObject(o);
155
156
157 Document doc = new Document();
158
159
160 if (parsedObject.getKey() != null)
161 {
162 doc.add(new Field(ParsedObject.FIELDNAME_KEY, parsedObject.getKey(), Field.Store.YES, Field.Index.UN_TOKENIZED));
163 }
164 if (parsedObject.getType() != null)
165 {
166 doc.add(new Field(ParsedObject.FIELDNAME_TYPE, parsedObject.getType(), Field.Store.YES, Field.Index.TOKENIZED));
167 }
168 if (parsedObject.getTitle() != null)
169 {
170 doc.add(new Field(ParsedObject.FIELDNAME_TITLE, parsedObject.getTitle(), Field.Store.YES, Field.Index.TOKENIZED));
171 }
172 if (parsedObject.getDescription() != null)
173 {
174 doc.add(new Field(ParsedObject.FIELDNAME_DESCRIPTION, parsedObject.getDescription(), Field.Store.YES, Field.Index.TOKENIZED));
175 }
176 if (parsedObject.getContent() != null)
177 {
178 doc.add(new Field(ParsedObject.FIELDNAME_CONTENT, parsedObject.getContent(), Field.Store.YES, Field.Index.TOKENIZED));
179 }
180 if (parsedObject.getLanguage() != null)
181 {
182 doc.add(new Field(ParsedObject.FIELDNAME_LANGUAGE, parsedObject.getLanguage(), Field.Store.YES, Field.Index.TOKENIZED));
183 }
184 if (parsedObject.getURL() != null)
185 {
186 doc.add(new Field(ParsedObject.FIELDNAME_URL, parsedObject.getURL().toString(), Field.Store.YES, Field.Index.TOKENIZED));
187 }
188 if(parsedObject.getClassName() != null)
189 {
190 doc.add(new Field(ParsedObject.FIELDNAME_CLASSNAME, parsedObject.getClassName(), Field.Store.YES, Field.Index.TOKENIZED));
191 }
192
193 String[] keywordArray = parsedObject.getKeywords();
194 if(keywordArray != null)
195 {
196 for(int i=0; i<keywordArray.length; ++i)
197 {
198 String keyword = keywordArray[i];
199 doc.add(new Field(ParsedObject.FIELDNAME_KEYWORDS, keyword, Field.Store.YES, Field.Index.UN_TOKENIZED));
200 }
201 }
202
203 Map keywords = parsedObject.getKeywordsMap();
204 addFieldsToDocument(doc, keywords, KEYWORD);
205
206 Map fields = parsedObject.getFields();
207 addFieldsToDocument(doc, fields, TEXT);
208
209
210 try
211 {
212 indexWriter.addDocument(doc);
213 }
214 catch (IOException e)
215 {
216
217 }
218
219
220 result = true;
221 }
222
223 try
224 {
225 if(optimizeAfterUpdate)
226 {
227 indexWriter.optimize();
228 }
229 }
230 catch (IOException e)
231 {
232
233 }
234 finally
235 {
236 try
237 {
238 indexWriter.close();
239 }
240 catch (IOException e)
241 {
242
243 }
244 }
245
246 return result;
247 }
248
249
250
251
252 public boolean remove(Object o)
253 {
254 Collection c = new ArrayList(1);
255 c.add(o);
256
257 return remove(c);
258 }
259
260
261
262
263 public synchronized boolean remove(Collection objects)
264 {
265 boolean result = false;
266
267 try
268 {
269 IndexReader indexReader = IndexReader.open(this.rootIndexDir);
270
271 Iterator it = objects.iterator();
272 while (it.hasNext())
273 {
274 Object o = it.next();
275
276 ObjectHandler handler = handlerFactory.getHandler(o);
277
278
279 ParsedObject parsedObject = handler.parseObject(o);
280
281
282 Term term = null;
283
284 if (parsedObject.getKey() != null)
285 {
286 term = new Term(ParsedObject.FIELDNAME_KEY, parsedObject.getKey());
287
288 int rc = indexReader.deleteDocuments(term);
289
290
291 result = rc > 0;
292 }
293 }
294
295 indexReader.close();
296
297 if(optimizeAfterUpdate)
298 {
299 optimize();
300 }
301
302 }
303 catch (Exception e)
304 {
305
306 result = false;
307 }
308
309 return result;
310 }
311
312
313
314
315 public boolean update(Object o)
316 {
317 Collection c = new ArrayList(1);
318 c.add(o);
319
320 return update(c);
321 }
322
323
324
325
326 public synchronized boolean update(Collection objects)
327 {
328 boolean result = false;
329
330 try
331 {
332
333 remove(objects);
334 result = true;
335 }
336 catch (Throwable e)
337 {
338
339 }
340
341 try
342 {
343
344 if(result)
345 {
346 add(objects);
347 result = true;
348 }
349 }
350 catch (Throwable e)
351 {
352
353 }
354
355 return result;
356 }
357
358
359
360
361 public synchronized boolean optimize()
362 {
363 boolean result = false;
364
365 try
366 {
367 IndexWriter indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), false);
368 indexWriter.optimize();
369 indexWriter.close();
370 result = true;
371 }
372 catch (IOException e)
373 {
374
375 }
376 return result;
377 }
378
379
380
381
382 public SearchResults search(String queryString)
383 {
384 Searcher searcher = null;
385 Hits hits = null;
386
387 try
388 {
389 searcher = new IndexSearcher(rootIndexDir.getPath());
390 }
391 catch (IOException e)
392 {
393
394 return null;
395 }
396
397 Analyzer analyzer = newAnalyzer();
398
399 String[] searchFields = {ParsedObject.FIELDNAME_CONTENT, ParsedObject.FIELDNAME_DESCRIPTION, ParsedObject.FIELDNAME_FIELDS,
400 ParsedObject.FIELDNAME_KEY, ParsedObject.FIELDNAME_KEYWORDS, ParsedObject.FIELDNAME_LANGUAGE,
401 ParsedObject.FIELDNAME_SCORE, ParsedObject.FIELDNAME_TITLE, ParsedObject.FIELDNAME_TYPE,
402 ParsedObject.FIELDNAME_URL, ParsedObject.FIELDNAME_CLASSNAME};
403
404 Query query= null;
405 try
406 {
407 String s[] = new String[searchFields.length];
408 for(int i=0;i<s.length;i++)
409 s[i] = queryString;
410 query = MultiFieldQueryParser.parse(s, searchFields, analyzer);
411
412 }
413 catch (ParseException e)
414 {
415
416 return null;
417 }
418
419 try
420 {
421 hits = searcher.search(query);
422 }
423 catch (IOException e)
424 {
425
426 return null;
427 }
428
429 int hitNum = hits.length();
430 ArrayList resultList = new ArrayList(hitNum);
431 for(int i=0; i<hitNum; i++)
432 {
433 ParsedObject result = new BaseParsedObject();
434 try
435 {
436 Document doc = hits.doc(i);
437
438 addFieldsToParsedObject(doc, result);
439
440 result.setScore(hits.score(i));
441 Field type = doc.getField(ParsedObject.FIELDNAME_TYPE);
442 if(type != null)
443 {
444 result.setType(type.stringValue());
445 }
446
447 Field key = doc.getField(ParsedObject.FIELDNAME_KEY);
448 if(key != null)
449 {
450 result.setKey(key.stringValue());
451 }
452
453 Field description = doc.getField(ParsedObject.FIELDNAME_DESCRIPTION);
454 if(description != null)
455 {
456 result.setDescription(description.stringValue());
457 }
458
459 Field title = doc.getField(ParsedObject.FIELDNAME_TITLE);
460 if(title != null)
461 {
462 result.setTitle(title.stringValue());
463 }
464
465 Field content = doc.getField(ParsedObject.FIELDNAME_CONTENT);
466 if(content != null)
467 {
468 result.setContent(content.stringValue());
469 }
470
471 Field language = doc.getField(ParsedObject.FIELDNAME_LANGUAGE);
472 if (language != null)
473 {
474 result.setLanguage(language.stringValue());
475 }
476
477 Field classname = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
478 if (classname != null)
479 {
480 result.setClassName(classname.stringValue());
481 }
482
483 Field url = doc.getField(ParsedObject.FIELDNAME_URL);
484 if (url != null)
485 {
486 result.setURL(new URL(url.stringValue()));
487 }
488
489 Field[] keywords = doc.getFields(ParsedObject.FIELDNAME_KEYWORDS);
490 if(keywords != null)
491 {
492 String[] keywordArray = new String[keywords.length];
493
494 for(int j=0; j<keywords.length; j++)
495 {
496 Field keyword = keywords[j];
497 keywordArray[j] = keyword.stringValue();
498 }
499
500 result.setKeywords(keywordArray);
501 }
502
503 resultList.add(i, result);
504 }
505 catch(IOException e)
506 {
507
508 }
509 }
510
511 if (searcher != null)
512 {
513 try
514 {
515 searcher.close();
516 }
517 catch (IOException ioe)
518 {
519
520 }
521 }
522
523 SearchResults results = new SearchResultsImpl(resultList);
524 return results;
525 }
526
527 private Analyzer newAnalyzer() {
528 Analyzer rval = null;
529
530 if(analyzerClassName != null)
531 {
532 try {
533 Class analyzerClass = Class.forName(analyzerClassName);
534 rval = (Analyzer) analyzerClass.newInstance();
535 } catch(InstantiationException e) {
536
537 } catch(ClassNotFoundException e) {
538
539 } catch(IllegalAccessException e) {
540
541 }
542 }
543
544 if(rval == null) {
545 rval = new StandardAnalyzer();
546 }
547
548 return rval;
549 }
550
551 private void addFieldsToDocument(Document doc, Map fields, int type)
552 {
553 if(fields != null)
554 {
555 Iterator keyIter = fields.keySet().iterator();
556 while(keyIter.hasNext())
557 {
558 Object key = keyIter.next();
559 if(key != null)
560 {
561 Object values = fields.get(key);
562 if(values != null)
563 {
564 if(values instanceof Collection)
565 {
566 Iterator valueIter = ((Collection)values).iterator();
567 while(valueIter.hasNext())
568 {
569 Object value = valueIter.next();
570 if(value != null)
571 {
572 if(type == TEXT)
573 {
574 doc.add(new Field(key.toString(), value.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
575 }
576 else
577 {
578 doc.add(new Field(key.toString(), value.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
579 }
580 }
581 }
582 }
583 else
584 {
585 if(type == TEXT)
586 {
587 doc.add(new Field(key.toString(), values.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
588 }
589 else
590 {
591 doc.add(new Field(key.toString(), values.toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
592 }
593 }
594 }
595 }
596 }
597 }
598 }
599
600 private void addFieldsToParsedObject(Document doc, ParsedObject o)
601 {
602 try
603 {
604 MultiMap multiKeywords = new MultiHashMap();
605 MultiMap multiFields = new MultiHashMap();
606 HashMap fieldMap = new HashMap();
607
608 Field classNameField = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
609 if(classNameField != null)
610 {
611 String className = classNameField.stringValue();
612 o.setClassName(className);
613 ObjectHandler handler = handlerFactory.getHandler(className);
614
615 Set fields = handler.getFields();
616 addFieldsToMap(doc, fields, multiFields);
617 addFieldsToMap(doc, fields, fieldMap);
618
619 Set keywords = handler.getKeywords();
620 addFieldsToMap(doc, keywords, multiKeywords);
621 }
622
623 o.setKeywordsMap(multiKeywords);
624 o.setFields(multiFields);
625 o.setFields(fieldMap);
626 }
627 catch(Exception e)
628 {
629
630 }
631 }
632
633 private void addFieldsToMap(Document doc, Set fieldNames, Map fields)
634 {
635 Iterator fieldIter = fieldNames.iterator();
636 while(fieldIter.hasNext())
637 {
638 String fieldName = (String)fieldIter.next();
639 Field[] docFields = doc.getFields(fieldName);
640 if(docFields != null)
641 {
642 for(int i=0; i<docFields.length; i++)
643 {
644 Field field = docFields[i];
645 if(field != null)
646 {
647 String value = field.stringValue();
648 fields.put(fieldName, value);
649 }
650 }
651 }
652 }
653 }
654 }