View Javadoc

1   /*
2    * Copyright 2000-2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.jetspeed.services.search.lucene;
18  
19  // Java imports
20  import java.io.File;
21  import java.io.IOException;
22  import java.net.URL;
23  import javax.servlet.ServletConfig;
24  import java.util.Collection;
25  import java.util.ArrayList;
26  import java.util.HashMap;
27  import java.util.Iterator;
28  import java.util.Map;
29  import java.util.Set;
30  
31  // Jetspeed imports
32  import org.apache.commons.collections.MultiHashMap;
33  import org.apache.commons.collections.MultiMap;
34  import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
35  import org.apache.jetspeed.services.logging.JetspeedLogger;
36  import org.apache.jetspeed.services.search.HandlerFactory;
37  import org.apache.jetspeed.services.search.ObjectHandler;
38  import org.apache.jetspeed.services.search.ParsedObject;
39  import org.apache.jetspeed.services.search.BaseParsedObject;
40  import org.apache.jetspeed.services.search.SearchResults;
41  import org.apache.jetspeed.services.search.SearchService;
42  
43  // Turbine imports
44  import org.apache.turbine.services.InitializationException;
45  import org.apache.turbine.services.resources.ResourceService;
46  import org.apache.turbine.services.servlet.TurbineServlet;
47  import org.apache.turbine.services.TurbineBaseService;
48  import org.apache.turbine.services.TurbineServices;
49  
50  // Lucene imports
51  import org.apache.lucene.analysis.Analyzer;
52  import org.apache.lucene.analysis.standard.StandardAnalyzer;
53  import org.apache.lucene.document.Document;
54  import org.apache.lucene.document.Field;
55  import org.apache.lucene.index.Term;
56  import org.apache.lucene.index.IndexWriter;
57  import org.apache.lucene.index.IndexReader;
58  import org.apache.lucene.queryParser.MultiFieldQueryParser;
59  import org.apache.lucene.queryParser.ParseException;
60  import org.apache.lucene.queryParser.QueryParser;
61  import org.apache.lucene.search.IndexSearcher;
62  import org.apache.lucene.search.Hits;
63  import org.apache.lucene.search.Query;
64  import org.apache.lucene.search.Searcher;
65  
66  /***
67   * Lucene implementation of search service.
68   *
69   * @author <a href="mailto:taylor@apache.org">David Sean taylor</a>
70   * @author <a href="mailto:caius1440@hotmail.com">Jeremy Ford</a>
71   * @author <a href="mailto:morciuch@apache.org">Mark Orciuch</a> 
72   * @version $Id: LuceneSearchService.java,v 1.10 2004/03/05 03:49:15 jford Exp $
73   */
74  public class LuceneSearchService extends TurbineBaseService implements SearchService
75  {
76      /***
77       * Static initialization of the logger for this class
78       */    
79      private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(LuceneSearchService.class.getName());
80      
81      private static final int KEYWORD = 0;
82      private static final int TEXT = 1;
83      
84      private static final String CONFIG_DIRECTORY = "directory";
85      private File rootDir = null;
86      private String indexRoot = null;
87  
88      /***
89       * This is the early initialization method called by the
90       * Turbine <code>Service</code> framework
91       * @param conf The <code>ServletConfig</code>
92       * @exception throws a <code>InitializationException</code> if the service
93       * fails to initialize
94       */
95      public synchronized void init(ServletConfig conf) throws InitializationException
96      {
97  
98          // already initialized
99          if (getInit())
100         {
101             return;
102         }
103 
104         initConfiguration(conf);
105 
106         // initialization done
107         setInit(true);
108 
109     }
110 
111     /***
112      * This is the lateinitialization method called by the
113      * Turbine <code>Service</code> framework
114      *
115      * @exception throws a <code>InitializationException</code> if the service
116      * fails to initialize
117      */
118     public void init() throws InitializationException
119     {
120         logger.info("Late init for " + SearchService.SERVICE_NAME + " called");
121         while (!getInit())
122         {
123             //Not yet...
124             try
125             {
126                 Thread.sleep(100);
127                 logger.info("Waiting for init of " + SearchService.SERVICE_NAME + "...");
128             }
129             catch (InterruptedException ie)
130             {
131                 logger.error("Exception", ie);
132             }
133         }
134     }
135 
136     /***
137      * This is the shutdown method called by the
138      * Turbine <code>Service</code> framework
139      */
140     public void shutdown()
141     {
142     }
143 
144     /***
145      * Loads the configuration parameters for this service from the
146      * JetspeedResources.properties file.
147      *
148      * @exception throws a <code>InitializationException</code> if the service
149      * fails to initialize
150      */
151     private void initConfiguration(ServletConfig conf) throws InitializationException
152     {
153         if (getInit())
154         {
155             return;
156         }
157 
158         // get configuration parameters from Jetspeed Resources
159         ResourceService serviceConf = ((TurbineServices) TurbineServices.getInstance())
160                                       .getResources(SearchService.SERVICE_NAME);
161 
162         // Get config properties
163         indexRoot = serviceConf.getString(CONFIG_DIRECTORY);
164         //
165         // The following section opens or creates the search index
166         //
167         //
168         rootDir = new File(indexRoot);
169 
170         //If the rootDir does not exist, treat it as context relative
171         if (!rootDir.exists())
172         {
173             if (indexRoot != null)
174             {
175                 String rootDirPath = TurbineServlet.getRealPath("") + indexRoot;
176                 rootDir = new File(rootDirPath);
177                 if (!rootDir.exists())
178                 {
179                     rootDir.mkdir();
180                     logger.info("Created index directory '" + rootDir.getPath() + "'");
181                 }
182             }
183         }
184 
185         try
186         {
187             Searcher searcher = null;
188             searcher = new IndexSearcher(rootDir.getPath());
189             searcher.close();
190         }
191         catch (Exception e)
192         {
193             try
194             {
195                 IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), true);
196                 indexWriter.close();
197                 indexWriter = null;
198                 logger.info("Created Lucene Index in " + rootDir.getPath());
199             }
200             catch (Exception e1)
201             {
202                 logger.error(this.getClass().getName() + ".initConfiguration - Getting or creating IndexSearcher", e);
203                 throw new InitializationException("Getting or creating Index Searcher");
204             }
205         }
206 
207         //Mark that we are done
208         setInit(true);
209     }
210 
211     /***
212      * Search
213      * 
214      * @task Parse content into title and description fields
215      * @param searchString
216      *               is the what is being searched for
217      * @return Hits, if no hits then null.
218      */
219     public SearchResults search(String searchString)
220     {
221         Searcher searcher = null;
222         Hits hits = null;
223         
224         try
225         {
226             searcher = new IndexSearcher(rootDir.getPath());
227         }
228         catch (IOException e)
229         {
230             logger.error("Failed to create index search using path " + rootDir.getPath());
231             return null;
232         }
233         
234         Analyzer analyzer = new StandardAnalyzer();
235         
236         String[] searchFields = {ParsedObject.FIELDNAME_CONTENT, ParsedObject.FIELDNAME_DESCRIPTION, ParsedObject.FIELDNAME_FIELDS,
237                            ParsedObject.FIELDNAME_KEY, ParsedObject.FIELDNAME_KEYWORDS, ParsedObject.FIELDNAME_LANGUAGE,
238                            ParsedObject.FIELDNAME_SCORE, ParsedObject.FIELDNAME_TITLE, ParsedObject.FIELDNAME_TYPE,
239                            ParsedObject.FIELDNAME_URL, ParsedObject.FIELDNAME_CLASSNAME};
240                             
241         Query query= null;
242         try
243         {
244             query = MultiFieldQueryParser.parse(searchString, searchFields, analyzer);
245 //          Query query = QueryParser.parse(searchString, ParsedObject.FIELDNAME_CONTENT, analyzer);
246         }
247         catch (ParseException e)
248         {
249             logger.info("Failed to parse query " + searchString);
250             return null;
251         }
252         
253         try
254         {
255             hits = searcher.search(query);
256         }
257         catch (IOException e)
258         {
259            logger.error("Error while peforming search.", e);
260            return null;
261         }
262 
263         // Copy hits to the result list
264         int hitCount = hits.length();
265         Document doc = null;
266         SearchResults results = new SearchResults(hitCount);
267         for (int counter = 0; counter < hitCount; counter++)
268         {            
269             ParsedObject result = new BaseParsedObject();
270             try
271             {
272                 doc = hits.doc(counter);
273                 addFieldsToParsedObject(doc, result);
274                 
275                 result.setScore(hits.score(counter));
276                 result.setType(doc.getField(ParsedObject.FIELDNAME_TYPE).stringValue());
277                 result.setKey(doc.getField(ParsedObject.FIELDNAME_KEY).stringValue());
278                 result.setDescription(doc.getField(ParsedObject.FIELDNAME_DESCRIPTION).stringValue());
279                 result.setTitle(doc.getField(ParsedObject.FIELDNAME_TITLE).stringValue());
280                 result.setContent(doc.getField(ParsedObject.FIELDNAME_CLASSNAME).stringValue());
281                 Field language = doc.getField(ParsedObject.FIELDNAME_LANGUAGE);
282                 if (language != null)
283                 {
284                 	result.setLanguage(language.stringValue());
285                 }
286                 Field classname = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
287                 if (classname != null)
288                 {
289                 	result.setClassName(classname.stringValue());
290                 }
291                 Field url = doc.getField(ParsedObject.FIELDNAME_URL);
292                 if (url != null)
293                 {
294                     result.setURL(new URL(url.stringValue()));
295                 }
296                 
297                 results.add(counter, result);
298             }
299             catch (Exception ioe)
300             {
301                 logger.error("Exception", ioe);
302             }
303         }
304 
305         if (searcher != null)
306         {
307             try
308             {
309                 searcher.close();
310             }
311             catch (IOException ioe)
312             {
313                 logger.error("Closing Searcher", ioe);
314             }
315         }
316         return results;
317     }
318     
319     private void addFieldsToParsedObject(Document doc, ParsedObject o)
320     {
321         try
322         {
323             MultiMap multiKeywords = new MultiHashMap();
324             MultiMap multiFields = new MultiHashMap();
325             HashMap fieldMap = new HashMap();
326             
327             Field classNameField = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
328             if(classNameField != null)
329             {
330                 String className = classNameField.stringValue();
331                 o.setClassName(className);
332                 ObjectHandler handler = HandlerFactory.getHandler(className);
333                 
334                 Set fields = handler.getFields();
335                 addFieldsToMap(doc, fields, multiFields);
336                 addFieldsToMap(doc, fields, fieldMap);
337                 
338                 Set keywords = handler.getKeywords();
339                 addFieldsToMap(doc, keywords, multiKeywords);
340             }
341             
342             o.setMultiKeywords(multiKeywords);
343             o.setMultiFields(multiFields);
344             o.setFields(fieldMap);
345         }
346         catch(Exception e)
347         {
348             logger.error("Error trying to add fields to parsed object.", e);
349         }
350     }
351     
352     private void addFieldsToMap(Document doc, Set fieldNames, Map fields)
353     {
354         Iterator fieldIter = fieldNames.iterator();
355         while(fieldIter.hasNext())
356         {
357             String fieldName = (String)fieldIter.next();
358             Field[] docFields = doc.getFields(fieldName);
359             if(fields != null)
360             {
361                 for(int i=0; i<docFields.length; i++)
362                 {
363                     Field field = docFields[i];
364                     if(field != null)
365                     {
366                         String value = field.stringValue();
367                         fields.put(fieldName, value);
368                     }
369                 }
370             }
371         }
372     }
373 
374     /***
375      * 
376      * @return 
377      */
378     public String[] getSearchSets()
379     {
380         return null;
381     }
382 
383     /***
384      * 
385      * @see org.apache.jetspeed.services.search.SearchService#add(java.lang.Object)
386      * @param o
387      * @return 
388      */
389     public boolean add(Object o)
390     {
391         Collection c = new ArrayList(1);
392         c.add(o);
393 
394         return add(c);
395     }
396 
397     /***
398      * 
399      * @see org.apache.jetspeed.services.search.SearchService#add(java.lang.Collection)
400      * @param c
401      * @return 
402      */
403     public boolean add(Collection c)
404     {
405         boolean result = false;
406 
407         IndexWriter indexWriter;
408         try
409         {
410             indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);
411         }
412         catch (IOException e)
413         {
414             logger.error("Error while creating index writer. Skipping add...", e);
415             return result;
416         }
417 
418         Iterator it = c.iterator();
419         while (it.hasNext()) 
420         {
421             Object o = it.next();
422             // Look up appropriate handler
423             ObjectHandler handler = null;
424             try
425             {
426                 handler = HandlerFactory.getHandler(o);
427             }
428             catch (Exception e)
429             {
430                 logger.error("Failed to create hanlder for object " + o.getClass().getName());
431                 continue;
432             }
433 
434             // Parse the object
435             ParsedObject parsedObject = handler.parseObject(o);
436 
437             // Create document
438             Document doc = new Document();
439 
440             // Populate document from the parsed object
441             if (parsedObject.getKey() != null)
442             {
443                 doc.add(Field.Keyword(ParsedObject.FIELDNAME_KEY, parsedObject.getKey()));
444             }
445             if (parsedObject.getType() != null)
446             {
447                 doc.add(Field.Text(ParsedObject.FIELDNAME_TYPE, parsedObject.getType()));
448             }
449             if (parsedObject.getTitle() != null)
450             {
451                 doc.add(Field.Text(ParsedObject.FIELDNAME_TITLE, parsedObject.getTitle()));
452             }
453             if (parsedObject.getDescription() != null)
454             {
455                 doc.add(Field.Text(ParsedObject.FIELDNAME_DESCRIPTION, parsedObject.getDescription()));
456             }
457             if (parsedObject.getContent() != null)
458             {
459                 doc.add(Field.Text(ParsedObject.FIELDNAME_CONTENT, parsedObject.getContent()));
460             }
461             if (parsedObject.getLanguage() != null)
462             {
463                 doc.add(Field.Text(ParsedObject.FIELDNAME_LANGUAGE, parsedObject.getLanguage()));   
464             }
465             if (parsedObject.getURL() != null)
466             {
467                 doc.add(Field.Text(ParsedObject.FIELDNAME_URL, parsedObject.getURL().toString()));
468             }
469             if(parsedObject.getClassName() != null)
470             {
471                 doc.add(Field.Text(ParsedObject.FIELDNAME_CLASSNAME, parsedObject.getClassName()));
472             }
473 
474             MultiMap multiKeywords = parsedObject.getMultiKeywords();
475             addFieldsToDocument(doc, multiKeywords, KEYWORD);
476             
477             MultiMap multiFields = parsedObject.getMultiFields();
478             addFieldsToDocument(doc, multiFields, TEXT);
479             
480             Map fields = parsedObject.getFields();
481             addFieldsToDocument(doc, fields, TEXT);
482 
483             // Add the document to search index
484             try
485             {
486                 indexWriter.addDocument(doc);
487             }
488             catch (IOException e)
489             {
490                logger.error("Error adding document to index.", e);
491             }
492             logger.debug("Index Document Count = " + indexWriter.docCount());
493             logger.info("Added '" + parsedObject.getTitle() + "' to index");
494             result = true;
495         }
496 
497         try
498         {
499             indexWriter.optimize();
500         }
501         catch (IOException e)
502         {
503             logger.error("Error while trying to optimize index.");
504         }
505         finally
506         {
507             try
508             {
509                 indexWriter.close();
510             }
511             catch (IOException e)
512             {
513                logger.error("Error while closing index writer.", e);
514             }
515         }
516 
517         return result;
518     }
519     
520     private void addFieldsToDocument(Document doc, Map fields, int type)
521     {
522         if(fields != null)
523         {
524             Iterator keyIter = fields.keySet().iterator();
525             while(keyIter.hasNext())
526             {
527                 Object key = keyIter.next();
528                 if(key != null)
529                 {
530                     Object values = fields.get(key);
531                     if(values != null)
532                     {
533                         if(values instanceof Collection)
534                         {
535                             Iterator valueIter = ((Collection)values).iterator();
536                             while(valueIter.hasNext())
537                             {
538                                 Object value = valueIter.next();
539                                 if(value != null)
540                                 {
541                                     if(type == TEXT)
542                                     {
543                                         doc.add(Field.Text(key.toString(), value.toString()));
544                                     }
545                                     else
546                                     {
547                                         doc.add(Field.Keyword(key.toString(), value.toString()));
548                                     }
549                                 }
550                             }
551                         }
552                         else
553                         {
554                             if(type == TEXT)
555                             {
556                                 doc.add(Field.Text(key.toString(), values.toString()));
557                             }
558                             else
559                             {
560                                 doc.add(Field.Keyword(key.toString(), values.toString()));
561                             }
562                         }
563                     }
564                 }
565             } 
566         }
567     }
568 
569     /***
570      * 
571      * @see org.apache.jetspeed.services.search.SearchService#remove(java.lang.Object)
572      * @param o
573      * @return 
574      */
575     public boolean remove(Object o)
576     {
577         Collection c = new ArrayList(1);
578         c.add(o);
579 
580         return remove(c);
581     }
582 
583     /***
584      * 
585      * @see org.apache.jetspeed.services.search.SearchService#remove(java.lang.Collection)
586      * @param c
587      * @return 
588      */
589     public boolean remove(Collection c)
590     {
591         boolean result = false;
592 
593         try 
594         {
595             IndexReader indexReader = IndexReader.open(this.rootDir);
596 
597             Iterator it = c.iterator();
598             while (it.hasNext()) 
599             {
600                 Object o = it.next();
601                 // Look up appropriate handler
602                 ObjectHandler handler = HandlerFactory.getHandler(o);
603 
604                 // Parse the object
605                 ParsedObject parsedObject = handler.parseObject(o);
606 
607                 // Create term
608                 Term term = null;
609 
610                 if (parsedObject.getKey() != null)
611                 {
612                     term = new Term(ParsedObject.FIELDNAME_KEY, parsedObject.getKey());
613                     // Remove the document from search index
614                     int rc = indexReader.delete(term);
615                     logger.info("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
616                     //System.out.println("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
617                     result = rc > 0;
618                 }
619             }
620 
621             indexReader.close();
622 
623             IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);
624             indexWriter.optimize();
625             indexWriter.close();
626 
627         }
628         catch (Exception e)
629         {
630             logger.error("Exception", e);
631             result = false;
632         }
633 
634         return result;
635     }
636 
637     /***
638      * 
639      * @see org.apache.jetspeed.services.search.SearchService#update(java.lang.Object)
640      * @param o
641      * @return 
642      */
643     public boolean update(Object o)
644     {
645         Collection c = new ArrayList(1);
646         c.add(o);
647 
648         return update(c);
649     }
650     /***
651      * Updates an index entry. For now, it's a remove and add.
652      * 
653      * @param c
654      * @return 
655      * @see org.apache.jetspeed.services.search.SearchService#update(java.lang.Collection)
656      */
657     public boolean update(Collection c)
658     {
659         boolean result = false;
660 
661         try
662         {
663             // Delete entries from index
664             remove(c);
665             result = true;
666         }
667         catch (Throwable e)
668         {
669             logger.error("Exception",  e);
670         }
671 
672         try
673         {
674             // Add entries to index
675             add(c);
676             result = true;
677         }
678         catch (Throwable e)
679         {
680             logger.error("Exception",  e);
681         }
682 
683         return false;
684     }
685 
686 }