Apache Lucene support for Java source code

Supported Libraries
Supported Operations
Objects
Links
What results can you expect?
Known Limitations

CAST supports Apache Lucene via its com.castsoftware.nosqljava extension. Details about the support provided for Java source code is explained below.

Supported Libraries

Library	Version	Supported
Apache Lucene	up to: 9.6.0

Supported Operations

Operation	Methods Supported
Insert	org.apache.lucene.index.IndexWriter.addDocument org.apache.lucene.index.IndexWriter.addDocuments org.apache.lucene.index.IndexWriter.addIndexes org.apache.lucene.search.IndexSearcher.setQueryCache org.apache.lucene.index.IndexWriter.commit
Select	org.apache.lucene.index.IndexReader.numDocs org.apache.lucene.index.IndexReader.maxDoc org.apache.lucene.index.IndexReader.document org.apache.lucene.index.IndexReader.docFreq org.apache.lucene.index.IndexReader.getRefCount org.apache.lucene.index.IndexReader.numDeletedDocs org.apache.lucene.index.IndexReader.hasDeletions org.apache.lucene.index.IndexReader.getSumDocFreq org.apache.lucene.index.IndexReader.getDocCount org.apache.lucene.index.IndexReader.getSumTotalTermFreq org.apache.lucene.search.IndexSearcher.search org.apache.lucene.search.IndexSearcher.explain org.apache.lucene.search.IndexSearcher.count org.apache.lucene.search.IndexSearcher.totalTermFreq org.apache.lucene.search.IndexSearcher.getQueryCache org.apache.lucene.search.IndexSearcher.getIndexReader org.apache.lucene.search.IndexSearcher.doc org.apache.lucene.search.IndexSearcher.storedFields org.apache.lucene.search.IndexSearcher.getSlices org.apache.lucene.search.IndexSearcher.searchAfter
Delete	org.apache.lucene.index.IndexWriter.deleteDocuments org.apache.lucene.index.IndexWriter.deleteAll
Update	org.apache.lucene.index.IndexWriter.updateDocument

Objects

Icon	Description
	Java ApacheLucene Index
	Java Unknown ApacheLucene Index

Links

All links are created between the caller Java method object and the ApacheLucene Index object:

Link type	Methods Supported
useSelectLink	numDocs maxDoc document docFreq getRefCount numDeletedDocs hasDeletions getSumDocFreq getDocCount getSumTotalTermFreq search explain count totalTermFreq getQueryCache getIndexReader doc storedFields getSlices searchAfter
useInsertLink	addDocument addIndexes setQueryCache commit
useDeleteLink	deleteDocuments deleteAll
useUpdateLink	updateDocument

What results can you expect?

Some example scenarios are shown below:

ApacheLucene Index

public class LuceneReadIndexExample 
{
	private static final String INDEX_DIR = "c:/temp/lucene6index";

	public static void main(String[] args) throws Exception 
	{
		IndexSearcher searcher = createSearcher();
		
		//Search by ID
		TopDocs foundDocs = searchById(1, searcher);
		
		System.out.println("Toral Results :: " + foundDocs.totalHits);
		
		for (ScoreDoc sd : foundDocs.scoreDocs) 
		{
			Document d = searcher.doc(sd.doc);
			System.out.println(String.format(d.get("firstName")));
		}
		
		//Search by firstName
		TopDocs foundDocs2 = searchByFirstName("Brian", searcher);
		
		System.out.println("Toral Results :: " + foundDocs2.totalHits);
		
		for (ScoreDoc sd : foundDocs2.scoreDocs) 
		{
			Document d = searcher.doc(sd.doc);
			System.out.println(String.format(d.get("id")));
		}
	}
private static IndexSearcher createSearcher() throws IOException {
		Directory dir = FSDirectory.open(Paths.get(INDEX_DIR));
		IndexReader reader = DirectoryReader.open(dir);
		IndexSearcher searcher = new IndexSearcher(reader);
		return searcher;
	}

public synchronized IndexWriter retrieveIndexWriter() throws LuceneAlertException
   {

      if (writer == null)
      {

         if (indexPath == null)
         {
            logger.fatal("No Lucene Index Path has been defined.");
            throw new LuceneAlertException(
               "No Lucene Index Path has been defined.Please define that in the configuration file.");
         }
         // using memory index
         if (indexPath != null)
         {

            logger.debug("inside of indexPath !=null");
            try
            {
               if (indexPath.equalsIgnoreCase("RAM"))
               {
                  if (ramDir == null)
                     ramDir = new RAMDirectory();
                  writer =
                     new IndexWriter(ramDir, new StandardAnalyzer(Version.LUCENE_29),
                        IndexWriter.MaxFieldLength.LIMITED);
                  writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT);
               }
               else
               {
                  FSDirectory fsDir = FSDirectory.open(new File(indexPath));
                  writer =
                     new IndexWriter(fsDir, new StandardAnalyzer(Version.LUCENE_29), IndexWriter.MaxFieldLength.LIMITED);
                  writer.setWriteLockTimeout(WRITER_LOCK_TIMEDOUT);
               }
            }
            catch (CorruptIndexException ce)
            {
               logger.error("CorruptIndexException  thrown when retrieving writer." + ce.getMessage());
               throw new LuceneAlertException(ce);
            }
            catch (IOException ioe)
            {
               logger.error(" IOException  thrown when retrieving writer." + ioe.getMessage());
               throw new LuceneAlertException(ioe);
            }
         }
      }

      return writer;

   }

Select Operation

 public synchronized LuceneAlertSearchResult search(Query query, boolean updateRecord, IndexWriter writer,
      int index, int batch)
      throws IOException, LuceneAlertException
   {

      if (logger.isDebugEnabled())
         logger.debug("Entering LuceneAlertSearchResult(), query=" + query + ", updateRecord=" + updateRecord
            + ", index=" + index + ", batch=" + batch);

      LuceneAlertSearchResult result = new LuceneAlertSearchResult();

      IndexSearcher searcher = retrieveIndexSearcher();

      if (logger.isDebugEnabled())
         logger.debug("Hashcode for this=" + this + " ,for indexSearcher=" + searcher);

      TopDocs resultDocs = searcher.search(query, searcher.maxDoc());

      // for clearAlert
      if (updateRecord)
      {
         updateAlertDoc(writer, resultDocs);
         setIndexUpdated(true);

         if (logger.isDebugEnabled())
            logger.debug("setIndexUpdated to true");

      }
      // for alert search
      else
      {

         List<Document> docList = new ArrayList<Document>();

         ScoreDoc[] resultScoreDocs = resultDocs.scoreDocs;

         int totalHits = resultDocs.totalHits;
         int end = index + batch;
         int start = (index > 0) ? index : 0;
         end = (totalHits > end) ? end : totalHits;

         if (logger.isDebugEnabled())
            logger.debug("Total hits=" + totalHits + ", start=" + start + ", end=" + end);

         for (int i = start; i < end; i++)
            docList.add(searcher.doc(resultScoreDocs[i].doc));

         result.setDocs(docList);
      }

      if (logger.isDebugEnabled())
         logger.debug("LuceneAlertSearchResult() DONE");

      return result;

   }

Insert Operation

    
   @Override
   public void addDocument(AlertDTO alertDTO) throws LuceneAlertException
   {
      Document document = convertToDocument(alertDTO);

      if (logger.isDebugEnabled())
         logger.debug("Document object ready to be added into index: " + document);

      IndexWriter writer = luceneAlertHelper.retrieveIndexWriter();

      if (writer == null)
      {
         logger.error("Failed to obtain index writer when trying to add doc to index");
         throw new LuceneAlertException("Failed to obtain index writer");
      }
      try
      {
         writer.addDocument(document);
         luceneAlertHelper.commit();
         luceneAlertHelper.setIndexUpdated(true);

      }
      catch (CorruptIndexException e)
      {
         throw new LuceneAlertException(e);
      }
      catch (IOException e)
      {
         throw new LuceneAlertException(e);
      }

      if (logger.isInfoEnabled())
         logger.info("Document added for alert: " + alertDTO.getId());

   }

Update Operation

    public static void main(String[] args)
    {
        //Input folder
        String docsPath = "inputFiles";
         
        //Output folder
        String indexPath = "C:/index/indexedFiles";
 
        //Input Path Variable
        final Path docDir = Paths.get(docsPath);
 
        try
        {
            //org.apache.lucene.store.Directory instance
            Directory dir = FSDirectory.open( Paths.get(indexPath) );
             
            //analyzer with the default stop words
            Analyzer analyzer = new StandardAnalyzer();
             
            //IndexWriter Configuration
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
             
            //IndexWriter writes new index files to the directory
            IndexWriter writer = new IndexWriter(dir, iwc);
             
            //Its recursive method to iterate all files and directories
            indexDocs(writer, docDir);
 
            writer.close();
        } 
        catch (IOException e) 
        {
            e.printStackTrace();
        }
    }
     
    static void indexDocs(final IndexWriter writer, Path path) throws IOException 
    {
        //Directory?
        if (Files.isDirectory(path)) 
        {
            //Iterate directory
            Files.walkFileTree(path, new SimpleFileVisitor<Path>() 
            {
                @Override
                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException 
                {
                    try
                    {
                        //Index this file
                        indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
                    } 
                    catch (IOException ioe) 
                    {
                        ioe.printStackTrace();
                    }
                    return FileVisitResult.CONTINUE;
                }
            });
        } 
        else
        {
            //Index this file
            indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
        }
    }
 
    static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException 
    {
        try (InputStream stream = Files.newInputStream(file)) 
        {
            //Create lucene Document
            Document doc = new Document();
             
            doc.add(new StringField("path", file.toString(), Field.Store.YES));
            doc.add(new LongPoint("modified", lastModified));
            doc.add(new TextField("contents", new String(Files.readAllBytes(file)), Store.YES));
             
            //Updates a document by first deleting the document(s) 
            //containing <code>term</code> and then adding the new
            //document.  The delete and then add are atomic as seen
            //by a reader on the same index
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

Delete Operation

    @Override
   public void deleteDocument(AlertDTO alertDTO) throws LuceneAlertException
   {

      if (logger.isInfoEnabled())
         logger.info("delete document for alert:" + alertDTO.getId());

      IndexWriter writer = luceneAlertHelper.retrieveIndexWriter();

      if (writer == null)
      {
         logger.error("Failed to obtain the index writer." + " Adding document for  alert: " + alertDTO.getId()
            + " failed.");
         throw new LuceneAlertException("Failed to obtain index writer");
      }

      if ((new Long(alertDTO.getId()) == null))
      {
         logger.error("Failed to delete alert document for  alert: " + alertDTO.getId());
         throw new LuceneAlertException("Invalid alert ID.");

      }


      try
      {
         writer.deleteDocuments(new Term(LuceneAlertFields.ID, (new Long(alertDTO.getId()).toString())));
         luceneAlertHelper.commit();
         luceneAlertHelper.setIndexUpdated(true);
      }
      catch (CorruptIndexException e)
      {
         logger.error("A CorrupIndexException is thrown when Document is deleted from  Lucene Interface for " +
            alertDTO.getBan() + ". " + e.getMessage());
         throw new LuceneAlertException(e);
      }
      catch (IOException e)
      {
         logger.error("A IOException is thrown when Document is deleted from  Lucene Interface for " +
            alertDTO.getBan() + ". " + e.getMessage());
         throw new LuceneAlertException(e);
      }


      if (logger.isInfoEnabled())
         logger.info(alertDTO.getId() + " deleted.");


   }

Known Limitations

If the index location or path is not found in the source application, it will result in an Unknown index object