View Javadoc

1   package net.sf.cantina.search;
2   
3   import net.sf.cantina.ListIterator;
4   import net.sf.cantina.SearchEngine;
5   import net.sf.cantina.DataSource;
6   import net.sf.cantina.Document;
7   import net.sf.cantina.util.FileUtils;
8   import org.apache.lucene.search.IndexSearcher;
9   import org.apache.lucene.search.Searcher;
10  import org.apache.lucene.index.IndexReader;
11  import org.apache.lucene.index.IndexWriter;
12  import org.apache.lucene.index.Term;
13  import org.apache.lucene.analysis.standard.StandardAnalyzer;
14  import org.apache.lucene.document.Field;
15  import org.apache.log4j.Logger;
16  
17  import java.io.File;
18  import java.util.Collection;
19  import java.util.Iterator;
20  import java.util.Locale;
21  
22  /***
23   * @author Stephane JAIS
24   */
25  public class LuceneSearchEngine
26    extends SearchEngine
27  {
28    private static final Logger logger = Logger.getLogger(LuceneSearchEngine.class);
29    private String itsIndexPath;
30  
31  
32    public LuceneSearchEngine(String indexPath)
33    throws Exception
34    {
35      itsIndexPath = indexPath;
36    }
37  
38    private static IndexWriter newIndexWriter(String indexPath)
39    throws Exception
40    {
41      return new IndexWriter(
42        indexPath,
43        new StandardAnalyzer(),
44        !indexExists(indexPath));
45  
46    }
47  
48    public static boolean indexExists(String indexPath)
49    {
50      return IndexReader.indexExists(indexPath);
51    }
52  
53    public ListIterator searchDocuments(String query)
54      throws Exception
55    {
56      Searcher searcher = new IndexSearcher(itsIndexPath);
57      LuceneDocumentSearchHandler sh = new LuceneDocumentSearchHandler(
58        searcher,query);
59      sh.executeSearch();
60      return sh;
61    }
62  
63    public void indexAllDocuments(DataSource ds)
64    throws Exception
65    {
66      logger.debug("Indexing all documents");
67      Collection allDocIds = ds.selectAllDocumentIds();
68      for (Iterator i = allDocIds.iterator();i.hasNext();)
69      {
70        String docId = (String)i.next();
71        Document d = ds.loadDocument(docId);
72        updateDocument(d);
73      }
74    }
75  
76    public void updateDocument(Document d)
77    throws Exception
78    {
79      if (indexExists(itsIndexPath))
80      {
81        //deleting the doc
82        IndexReader reader = IndexReader.open(itsIndexPath);
83        deleteDocument(d,reader);
84        reader.close();
85      }
86      //writing the doc
87      IndexWriter writer = newIndexWriter(itsIndexPath);
88      addDocumentToIndex(d,writer);
89      writer.optimize();
90      writer.close();
91    }
92  
93    protected static void addDocumentToIndex(Document d, IndexWriter writer)
94    throws Exception
95    {
96      //we just index text documents
97      if (!d.getContentType().startsWith("text"))
98        return;
99      logger.debug("Indexing document ["+d.getDocumentId()+"]");
100     //create a lucene doc for each locale.
101     for (Iterator i = d.getAvailableLocales().iterator(); i.hasNext();)
102     {
103       Locale locale = (Locale) i.next();
104       org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
105       luceneDoc.add(Field.Keyword("documentId", d.getDocumentId()));
106       luceneDoc.add(Field.Keyword("locale", locale.toString()));
107       luceneDoc.add(Field.Text("content", d.getContentAsString(locale)));
108       luceneDoc.add(Field.Keyword("realm", d.getRealm().getName()));
109       writer.addDocument(luceneDoc);
110     }
111   }
112 
113   protected static void deleteDocument(Document d, IndexReader reader)
114   throws Exception
115   {
116     //we just index text documents
117     if (!d.getContentType().startsWith("text"))
118       return;
119     logger.debug("Deleting document ["+d.getDocumentId()+"] from search index");
120     int matched = reader.delete(new Term("documentId",d.getDocumentId()));
121     logger.debug("["+matched+"] entries removed from search index.");
122   }
123 
124   public void deleteIndex()
125   {
126     File indexDir = new File(itsIndexPath);
127     if (indexDir.exists())
128       FileUtils.deleteDir(new File(itsIndexPath));
129   }
130 }