1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.jetspeed.services.search.lucene;
18
19
20 import java.io.File;
21 import java.io.IOException;
22 import java.net.URL;
23 import javax.servlet.ServletConfig;
24 import java.util.Collection;
25 import java.util.ArrayList;
26 import java.util.HashMap;
27 import java.util.Iterator;
28 import java.util.Map;
29 import java.util.Set;
30
31
32 import org.apache.commons.collections.MultiHashMap;
33 import org.apache.commons.collections.MultiMap;
34 import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
35 import org.apache.jetspeed.services.logging.JetspeedLogger;
36 import org.apache.jetspeed.services.search.HandlerFactory;
37 import org.apache.jetspeed.services.search.ObjectHandler;
38 import org.apache.jetspeed.services.search.ParsedObject;
39 import org.apache.jetspeed.services.search.BaseParsedObject;
40 import org.apache.jetspeed.services.search.SearchResults;
41 import org.apache.jetspeed.services.search.SearchService;
42
43
44 import org.apache.turbine.services.InitializationException;
45 import org.apache.turbine.services.resources.ResourceService;
46 import org.apache.turbine.services.servlet.TurbineServlet;
47 import org.apache.turbine.services.TurbineBaseService;
48 import org.apache.turbine.services.TurbineServices;
49
50
51 import org.apache.lucene.analysis.Analyzer;
52 import org.apache.lucene.analysis.standard.StandardAnalyzer;
53 import org.apache.lucene.document.Document;
54 import org.apache.lucene.document.Field;
55 import org.apache.lucene.index.Term;
56 import org.apache.lucene.index.IndexWriter;
57 import org.apache.lucene.index.IndexReader;
58 import org.apache.lucene.queryParser.MultiFieldQueryParser;
59 import org.apache.lucene.queryParser.ParseException;
60 import org.apache.lucene.queryParser.QueryParser;
61 import org.apache.lucene.search.IndexSearcher;
62 import org.apache.lucene.search.Hits;
63 import org.apache.lucene.search.Query;
64 import org.apache.lucene.search.Searcher;
65
66 /***
67 * Lucene implementation of search service.
68 *
69 * @author <a href="mailto:taylor@apache.org">David Sean taylor</a>
70 * @author <a href="mailto:caius1440@hotmail.com">Jeremy Ford</a>
71 * @author <a href="mailto:morciuch@apache.org">Mark Orciuch</a>
72 * @version $Id: LuceneSearchService.java,v 1.10 2004/03/05 03:49:15 jford Exp $
73 */
74 public class LuceneSearchService extends TurbineBaseService implements SearchService
75 {
76 /***
77 * Static initialization of the logger for this class
78 */
79 private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(LuceneSearchService.class.getName());
80
81 private static final int KEYWORD = 0;
82 private static final int TEXT = 1;
83
84 private static final String CONFIG_DIRECTORY = "directory";
85 private File rootDir = null;
86 private String indexRoot = null;
87
88 /***
89 * This is the early initialization method called by the
90 * Turbine <code>Service</code> framework
91 * @param conf The <code>ServletConfig</code>
92 * @exception throws a <code>InitializationException</code> if the service
93 * fails to initialize
94 */
95 public synchronized void init(ServletConfig conf) throws InitializationException
96 {
97
98
99 if (getInit())
100 {
101 return;
102 }
103
104 initConfiguration(conf);
105
106
107 setInit(true);
108
109 }
110
111 /***
112 * This is the lateinitialization method called by the
113 * Turbine <code>Service</code> framework
114 *
115 * @exception throws a <code>InitializationException</code> if the service
116 * fails to initialize
117 */
118 public void init() throws InitializationException
119 {
120 logger.info("Late init for " + SearchService.SERVICE_NAME + " called");
121 while (!getInit())
122 {
123
124 try
125 {
126 Thread.sleep(100);
127 logger.info("Waiting for init of " + SearchService.SERVICE_NAME + "...");
128 }
129 catch (InterruptedException ie)
130 {
131 logger.error("Exception", ie);
132 }
133 }
134 }
135
136 /***
137 * This is the shutdown method called by the
138 * Turbine <code>Service</code> framework
139 */
140 public void shutdown()
141 {
142 }
143
144 /***
145 * Loads the configuration parameters for this service from the
146 * JetspeedResources.properties file.
147 *
148 * @exception throws a <code>InitializationException</code> if the service
149 * fails to initialize
150 */
151 private void initConfiguration(ServletConfig conf) throws InitializationException
152 {
153 if (getInit())
154 {
155 return;
156 }
157
158
159 ResourceService serviceConf = ((TurbineServices) TurbineServices.getInstance())
160 .getResources(SearchService.SERVICE_NAME);
161
162
163 indexRoot = serviceConf.getString(CONFIG_DIRECTORY);
164
165
166
167
168 rootDir = new File(indexRoot);
169
170
171 if (!rootDir.exists())
172 {
173 if (indexRoot != null)
174 {
175 String rootDirPath = TurbineServlet.getRealPath("") + indexRoot;
176 rootDir = new File(rootDirPath);
177 if (!rootDir.exists())
178 {
179 rootDir.mkdir();
180 logger.info("Created index directory '" + rootDir.getPath() + "'");
181 }
182 }
183 }
184
185 try
186 {
187 Searcher searcher = null;
188 searcher = new IndexSearcher(rootDir.getPath());
189 searcher.close();
190 }
191 catch (Exception e)
192 {
193 try
194 {
195 IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), true);
196 indexWriter.close();
197 indexWriter = null;
198 logger.info("Created Lucene Index in " + rootDir.getPath());
199 }
200 catch (Exception e1)
201 {
202 logger.error(this.getClass().getName() + ".initConfiguration - Getting or creating IndexSearcher", e);
203 throw new InitializationException("Getting or creating Index Searcher");
204 }
205 }
206
207
208 setInit(true);
209 }
210
211 /***
212 * Search
213 *
214 * @task Parse content into title and description fields
215 * @param searchString
216 * is the what is being searched for
217 * @return Hits, if no hits then null.
218 */
219 public SearchResults search(String searchString)
220 {
221 Searcher searcher = null;
222 Hits hits = null;
223
224 try
225 {
226 searcher = new IndexSearcher(rootDir.getPath());
227 }
228 catch (IOException e)
229 {
230 logger.error("Failed to create index search using path " + rootDir.getPath());
231 return null;
232 }
233
234 Analyzer analyzer = new StandardAnalyzer();
235
236 String[] searchFields = {ParsedObject.FIELDNAME_CONTENT, ParsedObject.FIELDNAME_DESCRIPTION, ParsedObject.FIELDNAME_FIELDS,
237 ParsedObject.FIELDNAME_KEY, ParsedObject.FIELDNAME_KEYWORDS, ParsedObject.FIELDNAME_LANGUAGE,
238 ParsedObject.FIELDNAME_SCORE, ParsedObject.FIELDNAME_TITLE, ParsedObject.FIELDNAME_TYPE,
239 ParsedObject.FIELDNAME_URL, ParsedObject.FIELDNAME_CLASSNAME};
240
241 Query query= null;
242 try
243 {
244 query = MultiFieldQueryParser.parse(searchString, searchFields, analyzer);
245
246 }
247 catch (ParseException e)
248 {
249 logger.info("Failed to parse query " + searchString);
250 return null;
251 }
252
253 try
254 {
255 hits = searcher.search(query);
256 }
257 catch (IOException e)
258 {
259 logger.error("Error while peforming search.", e);
260 return null;
261 }
262
263
264 int hitCount = hits.length();
265 Document doc = null;
266 SearchResults results = new SearchResults(hitCount);
267 for (int counter = 0; counter < hitCount; counter++)
268 {
269 ParsedObject result = new BaseParsedObject();
270 try
271 {
272 doc = hits.doc(counter);
273 addFieldsToParsedObject(doc, result);
274
275 result.setScore(hits.score(counter));
276 result.setType(doc.getField(ParsedObject.FIELDNAME_TYPE).stringValue());
277 result.setKey(doc.getField(ParsedObject.FIELDNAME_KEY).stringValue());
278 result.setDescription(doc.getField(ParsedObject.FIELDNAME_DESCRIPTION).stringValue());
279 result.setTitle(doc.getField(ParsedObject.FIELDNAME_TITLE).stringValue());
280 result.setContent(doc.getField(ParsedObject.FIELDNAME_CLASSNAME).stringValue());
281 Field language = doc.getField(ParsedObject.FIELDNAME_LANGUAGE);
282 if (language != null)
283 {
284 result.setLanguage(language.stringValue());
285 }
286 Field classname = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
287 if (classname != null)
288 {
289 result.setClassName(classname.stringValue());
290 }
291 Field url = doc.getField(ParsedObject.FIELDNAME_URL);
292 if (url != null)
293 {
294 result.setURL(new URL(url.stringValue()));
295 }
296
297 results.add(counter, result);
298 }
299 catch (Exception ioe)
300 {
301 logger.error("Exception", ioe);
302 }
303 }
304
305 if (searcher != null)
306 {
307 try
308 {
309 searcher.close();
310 }
311 catch (IOException ioe)
312 {
313 logger.error("Closing Searcher", ioe);
314 }
315 }
316 return results;
317 }
318
319 private void addFieldsToParsedObject(Document doc, ParsedObject o)
320 {
321 try
322 {
323 MultiMap multiKeywords = new MultiHashMap();
324 MultiMap multiFields = new MultiHashMap();
325 HashMap fieldMap = new HashMap();
326
327 Field classNameField = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
328 if(classNameField != null)
329 {
330 String className = classNameField.stringValue();
331 o.setClassName(className);
332 ObjectHandler handler = HandlerFactory.getHandler(className);
333
334 Set fields = handler.getFields();
335 addFieldsToMap(doc, fields, multiFields);
336 addFieldsToMap(doc, fields, fieldMap);
337
338 Set keywords = handler.getKeywords();
339 addFieldsToMap(doc, keywords, multiKeywords);
340 }
341
342 o.setMultiKeywords(multiKeywords);
343 o.setMultiFields(multiFields);
344 o.setFields(fieldMap);
345 }
346 catch(Exception e)
347 {
348 logger.error("Error trying to add fields to parsed object.", e);
349 }
350 }
351
352 private void addFieldsToMap(Document doc, Set fieldNames, Map fields)
353 {
354 Iterator fieldIter = fieldNames.iterator();
355 while(fieldIter.hasNext())
356 {
357 String fieldName = (String)fieldIter.next();
358 Field[] docFields = doc.getFields(fieldName);
359 if(fields != null)
360 {
361 for(int i=0; i<docFields.length; i++)
362 {
363 Field field = docFields[i];
364 if(field != null)
365 {
366 String value = field.stringValue();
367 fields.put(fieldName, value);
368 }
369 }
370 }
371 }
372 }
373
374 /***
375 *
376 * @return
377 */
378 public String[] getSearchSets()
379 {
380 return null;
381 }
382
383 /***
384 *
385 * @see org.apache.jetspeed.services.search.SearchService#add(java.lang.Object)
386 * @param o
387 * @return
388 */
389 public boolean add(Object o)
390 {
391 Collection c = new ArrayList(1);
392 c.add(o);
393
394 return add(c);
395 }
396
397 /***
398 *
399 * @see org.apache.jetspeed.services.search.SearchService#add(java.lang.Collection)
400 * @param c
401 * @return
402 */
403 public boolean add(Collection c)
404 {
405 boolean result = false;
406
407 IndexWriter indexWriter;
408 try
409 {
410 indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);
411 }
412 catch (IOException e)
413 {
414 logger.error("Error while creating index writer. Skipping add...", e);
415 return result;
416 }
417
418 Iterator it = c.iterator();
419 while (it.hasNext())
420 {
421 Object o = it.next();
422
423 ObjectHandler handler = null;
424 try
425 {
426 handler = HandlerFactory.getHandler(o);
427 }
428 catch (Exception e)
429 {
430 logger.error("Failed to create hanlder for object " + o.getClass().getName());
431 continue;
432 }
433
434
435 ParsedObject parsedObject = handler.parseObject(o);
436
437
438 Document doc = new Document();
439
440
441 if (parsedObject.getKey() != null)
442 {
443 doc.add(Field.Keyword(ParsedObject.FIELDNAME_KEY, parsedObject.getKey()));
444 }
445 if (parsedObject.getType() != null)
446 {
447 doc.add(Field.Text(ParsedObject.FIELDNAME_TYPE, parsedObject.getType()));
448 }
449 if (parsedObject.getTitle() != null)
450 {
451 doc.add(Field.Text(ParsedObject.FIELDNAME_TITLE, parsedObject.getTitle()));
452 }
453 if (parsedObject.getDescription() != null)
454 {
455 doc.add(Field.Text(ParsedObject.FIELDNAME_DESCRIPTION, parsedObject.getDescription()));
456 }
457 if (parsedObject.getContent() != null)
458 {
459 doc.add(Field.Text(ParsedObject.FIELDNAME_CONTENT, parsedObject.getContent()));
460 }
461 if (parsedObject.getLanguage() != null)
462 {
463 doc.add(Field.Text(ParsedObject.FIELDNAME_LANGUAGE, parsedObject.getLanguage()));
464 }
465 if (parsedObject.getURL() != null)
466 {
467 doc.add(Field.Text(ParsedObject.FIELDNAME_URL, parsedObject.getURL().toString()));
468 }
469 if(parsedObject.getClassName() != null)
470 {
471 doc.add(Field.Text(ParsedObject.FIELDNAME_CLASSNAME, parsedObject.getClassName()));
472 }
473
474 MultiMap multiKeywords = parsedObject.getMultiKeywords();
475 addFieldsToDocument(doc, multiKeywords, KEYWORD);
476
477 MultiMap multiFields = parsedObject.getMultiFields();
478 addFieldsToDocument(doc, multiFields, TEXT);
479
480 Map fields = parsedObject.getFields();
481 addFieldsToDocument(doc, fields, TEXT);
482
483
484 try
485 {
486 indexWriter.addDocument(doc);
487 }
488 catch (IOException e)
489 {
490 logger.error("Error adding document to index.", e);
491 }
492 logger.debug("Index Document Count = " + indexWriter.docCount());
493 logger.info("Added '" + parsedObject.getTitle() + "' to index");
494 result = true;
495 }
496
497 try
498 {
499 indexWriter.optimize();
500 }
501 catch (IOException e)
502 {
503 logger.error("Error while trying to optimize index.");
504 }
505 finally
506 {
507 try
508 {
509 indexWriter.close();
510 }
511 catch (IOException e)
512 {
513 logger.error("Error while closing index writer.", e);
514 }
515 }
516
517 return result;
518 }
519
520 private void addFieldsToDocument(Document doc, Map fields, int type)
521 {
522 if(fields != null)
523 {
524 Iterator keyIter = fields.keySet().iterator();
525 while(keyIter.hasNext())
526 {
527 Object key = keyIter.next();
528 if(key != null)
529 {
530 Object values = fields.get(key);
531 if(values != null)
532 {
533 if(values instanceof Collection)
534 {
535 Iterator valueIter = ((Collection)values).iterator();
536 while(valueIter.hasNext())
537 {
538 Object value = valueIter.next();
539 if(value != null)
540 {
541 if(type == TEXT)
542 {
543 doc.add(Field.Text(key.toString(), value.toString()));
544 }
545 else
546 {
547 doc.add(Field.Keyword(key.toString(), value.toString()));
548 }
549 }
550 }
551 }
552 else
553 {
554 if(type == TEXT)
555 {
556 doc.add(Field.Text(key.toString(), values.toString()));
557 }
558 else
559 {
560 doc.add(Field.Keyword(key.toString(), values.toString()));
561 }
562 }
563 }
564 }
565 }
566 }
567 }
568
569 /***
570 *
571 * @see org.apache.jetspeed.services.search.SearchService#remove(java.lang.Object)
572 * @param o
573 * @return
574 */
575 public boolean remove(Object o)
576 {
577 Collection c = new ArrayList(1);
578 c.add(o);
579
580 return remove(c);
581 }
582
583 /***
584 *
585 * @see org.apache.jetspeed.services.search.SearchService#remove(java.lang.Collection)
586 * @param c
587 * @return
588 */
589 public boolean remove(Collection c)
590 {
591 boolean result = false;
592
593 try
594 {
595 IndexReader indexReader = IndexReader.open(this.rootDir);
596
597 Iterator it = c.iterator();
598 while (it.hasNext())
599 {
600 Object o = it.next();
601
602 ObjectHandler handler = HandlerFactory.getHandler(o);
603
604
605 ParsedObject parsedObject = handler.parseObject(o);
606
607
608 Term term = null;
609
610 if (parsedObject.getKey() != null)
611 {
612 term = new Term(ParsedObject.FIELDNAME_KEY, parsedObject.getKey());
613
614 int rc = indexReader.delete(term);
615 logger.info("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
616
617 result = rc > 0;
618 }
619 }
620
621 indexReader.close();
622
623 IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);
624 indexWriter.optimize();
625 indexWriter.close();
626
627 }
628 catch (Exception e)
629 {
630 logger.error("Exception", e);
631 result = false;
632 }
633
634 return result;
635 }
636
637 /***
638 *
639 * @see org.apache.jetspeed.services.search.SearchService#update(java.lang.Object)
640 * @param o
641 * @return
642 */
643 public boolean update(Object o)
644 {
645 Collection c = new ArrayList(1);
646 c.add(o);
647
648 return update(c);
649 }
650 /***
651 * Updates an index entry. For now, it's a remove and add.
652 *
653 * @param c
654 * @return
655 * @see org.apache.jetspeed.services.search.SearchService#update(java.lang.Collection)
656 */
657 public boolean update(Collection c)
658 {
659 boolean result = false;
660
661 try
662 {
663
664 remove(c);
665 result = true;
666 }
667 catch (Throwable e)
668 {
669 logger.error("Exception", e);
670 }
671
672 try
673 {
674
675 add(c);
676 result = true;
677 }
678 catch (Throwable e)
679 {
680 logger.error("Exception", e);
681 }
682
683 return false;
684 }
685
686 }