CAST supports Apache Lucene via its com.castsoftware.nosqljava extension. Details about the support provided for Java source code is explained below.
Supported Libraries
Library | Version | Supported |
---|---|---|
Apache Lucene | up to: 9.6.0 |
Supported Operations
Operation | Methods Supported |
---|---|
Insert | org.apache.lucene.index.IndexWriter.addDocument org.apache.lucene.index.IndexWriter.addDocuments org.apache.lucene.index.IndexWriter.addIndexes org.apache.lucene.search.IndexSearcher.setQueryCache org.apache.lucene.index.IndexWriter.commit |
Select | org.apache.lucene.index.IndexReader.numDocs org.apache.lucene.index.IndexReader.maxDoc org.apache.lucene.index.IndexReader.document org.apache.lucene.index.IndexReader.docFreq org.apache.lucene.index.IndexReader.getRefCount org.apache.lucene.index.IndexReader.numDeletedDocs org.apache.lucene.index.IndexReader.hasDeletions org.apache.lucene.index.IndexReader.getSumDocFreq org.apache.lucene.index.IndexReader.getDocCount org.apache.lucene.index.IndexReader.getSumTotalTermFreq org.apache.lucene.search.IndexSearcher.search org.apache.lucene.search.IndexSearcher.explain org.apache.lucene.search.IndexSearcher.count org.apache.lucene.search.IndexSearcher.totalTermFreq org.apache.lucene.search.IndexSearcher.getQueryCache org.apache.lucene.search.IndexSearcher.getIndexReader org.apache.lucene.search.IndexSearcher.doc org.apache.lucene.search.IndexSearcher.storedFields org.apache.lucene.search.IndexSearcher.getSlices org.apache.lucene.search.IndexSearcher.searchAfter |
Delete | org.apache.lucene.index.IndexWriter.deleteDocuments org.apache.lucene.index.IndexWriter.deleteAll |
Update | org.apache.lucene.index.IndexWriter.updateDocument |
Objects
Icon | Description |
---|---|
Java ApacheLucene Index | |
Java Unknown ApacheLucene Index |
Links
All links are created between the caller Java method object and the ApacheLucene Index object:
Link type | Methods Supported |
---|---|
useSelectLink | numDocs maxDoc document docFreq getRefCount numDeletedDocs hasDeletions getSumDocFreq getDocCount getSumTotalTermFreq search explain count totalTermFreq getQueryCache getIndexReader doc storedFields getSlices searchAfter |
useInsertLink | addDocument addIndexes setQueryCache commit |
useDeleteLink | deleteDocuments deleteAll |
useUpdateLink | updateDocument |
What results can you expect?
Some example scenarios are shown below:
ApacheLucene Index
public class LuceneReadIndexExample { private static final String INDEX_DIR = "c:/temp/lucene6index"; public static void main(String[] args) throws Exception { IndexSearcher searcher = createSearcher(); //Search by ID TopDocs foundDocs = searchById(1, searcher); System.out.println("Toral Results :: " + foundDocs.totalHits); for (ScoreDoc sd : foundDocs.scoreDocs) { Document d = searcher.doc(sd.doc); System.out.println(String.format(d.get("firstName"))); } //Search by firstName TopDocs foundDocs2 = searchByFirstName("Brian", searcher); System.out.println("Toral Results :: " + foundDocs2.totalHits); for (ScoreDoc sd : foundDocs2.scoreDocs) { Document d = searcher.doc(sd.doc); System.out.println(String.format(d.get("id"))); } } private static IndexSearcher createSearcher() throws IOException { Directory dir = FSDirectory.open(Paths.get(INDEX_DIR)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); return searcher; }
public synchronized IndexWriter retrieveIndexWriter() throws LuceneAlertException { if (writer == null) { if (indexPath == null) { logger.fatal("No Lucene Index Path has been defined."); throw new LuceneAlertException( "No Lucene Index Path has been defined.Please define that in the configuration file."); } // using memory index if (indexPath != null) { logger.debug("inside of indexPath !=null"); try { if (indexPath.equalsIgnoreCase("RAM")) { if (ramDir == null) ramDir = new RAMDirectory(); writer = new IndexWriter(ramDir, new StandardAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED); writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT); } else { FSDirectory fsDir = FSDirectory.open(new File(indexPath)); writer = new IndexWriter(fsDir, new StandardAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED); writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT); } } catch (CorruptIndexException ce) { logger.error("CorruptIndexException thrown when retrieving writer." + ce.getMessage()); throw new LuceneAlertException(ce); } catch (IOException ioe) { logger.error(" IOException thrown when retrieving writer." + ioe.getMessage()); throw new LuceneAlertException(ioe); } } } return writer; }
Select Operation
public synchronized LuceneAlertSearchResult search(Query query, boolean updateRecord, IndexWriter writer, int index, int batch) throws IOException, LuceneAlertException { if (logger.isDebugEnabled()) logger.debug("Entering LuceneAlertSearchResult(), query=" + query + ", updateRecord=" + updateRecord + ", index=" + index + ", batch=" + batch); LuceneAlertSearchResult result = new LuceneAlertSearchResult(); IndexSearcher searcher = retrieveIndexSearcher(); if (logger.isDebugEnabled()) logger.debug("Hashcode for this=" + this + " ,for indexSearcher=" + searcher); TopDocs resultDocs = searcher.search(query, searcher.maxDoc()); // for clearAlert if (updateRecord) { updateAlertDoc(writer, resultDocs); setIndexUpdated(true); if (logger.isDebugEnabled()) logger.debug("setIndexUpdated to true"); } // for alert search else { List<Document> docList = new ArrayList<Document>(); ScoreDoc[] resultScoreDocs = resultDocs.scoreDocs; int totalHits = resultDocs.totalHits; int end = index + batch; int start = (index > 0) ? index : 0; end = (totalHits > end) ? end : totalHits; if (logger.isDebugEnabled()) logger.debug("Total hits=" + totalHits + ", start=" + start + ", end=" + end); for (int i = start; i < end; i++) docList.add(searcher.doc(resultScoreDocs[i].doc)); result.setDocs(docList); } if (logger.isDebugEnabled()) logger.debug("LuceneAlertSearchResult() DONE"); return result; }
Insert Operation
@Override public void addDocument(AlertDTO alertDTO) throws LuceneAlertException { Document document = convertToDocument(alertDTO); if (logger.isDebugEnabled()) logger.debug("Document object ready to be added into index: " + document); IndexWriter writer = luceneAlertHelper.retrieveIndexWriter(); if (writer == null) { logger.error("Failed to obtain index writer when trying to add doc to index"); throw new LuceneAlertException("Failed to obtain index writer"); } try { writer.addDocument(document); luceneAlertHelper.commit(); luceneAlertHelper.setIndexUpdated(true); } catch (CorruptIndexException e) { throw new LuceneAlertException(e); } catch (IOException e) { throw new LuceneAlertException(e); } if (logger.isInfoEnabled()) logger.info("Document added for alert: " + alertDTO.getId()); }
Update Operation
public static void main(String[] args) { //Input folder String docsPath = "inputFiles"; //Output folder String indexPath = "C:/index/indexedFiles"; //Input Path Variable final Path docDir = Paths.get(docsPath); try { //org.apache.lucene.store.Directory instance Directory dir = FSDirectory.open( Paths.get(indexPath) ); //analyzer with the default stop words Analyzer analyzer = new StandardAnalyzer(); //IndexWriter Configuration IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); //IndexWriter writes new index files to the directory IndexWriter writer = new IndexWriter(dir, iwc); //Its recursive method to iterate all files and directories indexDocs(writer, docDir); writer.close(); } catch (IOException e) { e.printStackTrace(); } } static void indexDocs(final IndexWriter writer, Path path) throws IOException { //Directory? if (Files.isDirectory(path)) { //Iterate directory Files.walkFileTree(path, new SimpleFileVisitor<Path>() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { try { //Index this file indexDoc(writer, file, attrs.lastModifiedTime().toMillis()); } catch (IOException ioe) { ioe.printStackTrace(); } return FileVisitResult.CONTINUE; } }); } else { //Index this file indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis()); } } static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { //Create lucene Document Document doc = new Document(); doc.add(new StringField("path", file.toString(), Field.Store.YES)); doc.add(new LongPoint("modified", lastModified)); doc.add(new TextField("contents", new String(Files.readAllBytes(file)), Store.YES)); //Updates a document by first deleting the document(s) //containing <code>term</code> and then adding the new //document. The delete and then add are atomic as seen //by a reader on the same index writer.updateDocument(new Term("path", file.toString()), doc); } } }
Delete Operation
@Override public void deleteDocument(AlertDTO alertDTO) throws LuceneAlertException { if (logger.isInfoEnabled()) logger.info("delete document for alert:" + alertDTO.getId()); IndexWriter writer = luceneAlertHelper.retrieveIndexWriter(); if (writer == null) { logger.error("Failed to obtain the index writer." + " Adding document for alert: " + alertDTO.getId() + " failed."); throw new LuceneAlertException("Failed to obtain index writer"); } if ((new Long(alertDTO.getId()) == null)) { logger.error("Failed to delete alert document for alert: " + alertDTO.getId()); throw new LuceneAlertException("Invalid alert ID."); } try { writer.deleteDocuments(new Term(LuceneAlertFields.ID, (new Long(alertDTO.getId()).toString()))); luceneAlertHelper.commit(); luceneAlertHelper.setIndexUpdated(true); } catch (CorruptIndexException e) { logger.error("A CorrupIndexException is thrown when Document is deleted from Lucene Interface for " + alertDTO.getBan() + ". " + e.getMessage()); throw new LuceneAlertException(e); } catch (IOException e) { logger.error("A IOException is thrown when Document is deleted from Lucene Interface for " + alertDTO.getBan() + ". " + e.getMessage()); throw new LuceneAlertException(e); } if (logger.isInfoEnabled()) logger.info(alertDTO.getId() + " deleted."); }
Known Limitations
- If the index location or path is not found in the source application, it will result in an Unknown index object