1 package net.sf.cantina.search;
2
3 import net.sf.cantina.ListIterator;
4 import net.sf.cantina.SearchEngine;
5 import net.sf.cantina.DataSource;
6 import net.sf.cantina.Document;
7 import net.sf.cantina.util.FileUtils;
8 import org.apache.lucene.search.IndexSearcher;
9 import org.apache.lucene.search.Searcher;
10 import org.apache.lucene.index.IndexReader;
11 import org.apache.lucene.index.IndexWriter;
12 import org.apache.lucene.index.Term;
13 import org.apache.lucene.analysis.standard.StandardAnalyzer;
14 import org.apache.lucene.document.Field;
15 import org.apache.log4j.Logger;
16
17 import java.io.File;
18 import java.util.Collection;
19 import java.util.Iterator;
20 import java.util.Locale;
21
22 /***
23 * @author Stephane JAIS
24 */
25 public class LuceneSearchEngine
26 extends SearchEngine
27 {
28 private static final Logger logger = Logger.getLogger(LuceneSearchEngine.class);
29 private String itsIndexPath;
30
31
32 public LuceneSearchEngine(String indexPath)
33 throws Exception
34 {
35 itsIndexPath = indexPath;
36 }
37
38 private static IndexWriter newIndexWriter(String indexPath)
39 throws Exception
40 {
41 return new IndexWriter(
42 indexPath,
43 new StandardAnalyzer(),
44 !indexExists(indexPath));
45
46 }
47
48 public static boolean indexExists(String indexPath)
49 {
50 return IndexReader.indexExists(indexPath);
51 }
52
53 public ListIterator searchDocuments(String query)
54 throws Exception
55 {
56 Searcher searcher = new IndexSearcher(itsIndexPath);
57 LuceneDocumentSearchHandler sh = new LuceneDocumentSearchHandler(
58 searcher,query);
59 sh.executeSearch();
60 return sh;
61 }
62
63 public void indexAllDocuments(DataSource ds)
64 throws Exception
65 {
66 logger.debug("Indexing all documents");
67 Collection allDocIds = ds.selectAllDocumentIds();
68 for (Iterator i = allDocIds.iterator();i.hasNext();)
69 {
70 String docId = (String)i.next();
71 Document d = ds.loadDocument(docId);
72 updateDocument(d);
73 }
74 }
75
76 public void updateDocument(Document d)
77 throws Exception
78 {
79 if (indexExists(itsIndexPath))
80 {
81
82 IndexReader reader = IndexReader.open(itsIndexPath);
83 deleteDocument(d,reader);
84 reader.close();
85 }
86
87 IndexWriter writer = newIndexWriter(itsIndexPath);
88 addDocumentToIndex(d,writer);
89 writer.optimize();
90 writer.close();
91 }
92
93 protected static void addDocumentToIndex(Document d, IndexWriter writer)
94 throws Exception
95 {
96
97 if (!d.getContentType().startsWith("text"))
98 return;
99 logger.debug("Indexing document ["+d.getDocumentId()+"]");
100
101 for (Iterator i = d.getAvailableLocales().iterator(); i.hasNext();)
102 {
103 Locale locale = (Locale) i.next();
104 org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
105 luceneDoc.add(Field.Keyword("documentId", d.getDocumentId()));
106 luceneDoc.add(Field.Keyword("locale", locale.toString()));
107 luceneDoc.add(Field.Text("content", d.getContentAsString(locale)));
108 luceneDoc.add(Field.Keyword("realm", d.getRealm().getName()));
109 writer.addDocument(luceneDoc);
110 }
111 }
112
113 protected static void deleteDocument(Document d, IndexReader reader)
114 throws Exception
115 {
116
117 if (!d.getContentType().startsWith("text"))
118 return;
119 logger.debug("Deleting document ["+d.getDocumentId()+"] from search index");
120 int matched = reader.delete(new Term("documentId",d.getDocumentId()));
121 logger.debug("["+matched+"] entries removed from search index.");
122 }
123
124 public void deleteIndex()
125 {
126 File indexDir = new File(itsIndexPath);
127 if (indexDir.exists())
128 FileUtils.deleteDir(new File(itsIndexPath));
129 }
130 }