use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class LineageQuery method computeLineageForFlowFiles.
public static Set<ProvenanceEventRecord> computeLineageForFlowFiles(final IndexManager indexManager, final File indexDirectory, final String lineageIdentifier, final Collection<String> flowFileUuids, final DocumentToEventConverter docsToEventConverter) throws IOException {
if (requireNonNull(flowFileUuids).size() > MAX_LINEAGE_UUIDS) {
throw new IllegalArgumentException(String.format("Cannot compute lineage for more than %s FlowFiles. This lineage contains %s.", MAX_LINEAGE_UUIDS, flowFileUuids.size()));
}
if (lineageIdentifier == null && (flowFileUuids == null || flowFileUuids.isEmpty())) {
throw new IllegalArgumentException("Must specify either Lineage Identifier or FlowFile UUIDs to compute lineage");
}
final EventIndexSearcher searcher;
try {
searcher = indexManager.borrowIndexSearcher(indexDirectory);
try {
// Create a query for all Events related to the FlowFiles of interest. We do this by adding all ID's as
// "SHOULD" clauses and then setting the minimum required to 1.
final BooleanQuery flowFileIdQuery;
if (flowFileUuids == null || flowFileUuids.isEmpty()) {
flowFileIdQuery = null;
} else {
flowFileIdQuery = new BooleanQuery();
for (final String flowFileUuid : flowFileUuids) {
flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD);
}
flowFileIdQuery.setMinimumNumberShouldMatch(1);
}
final long searchStart = System.nanoTime();
logger.debug("Searching {} for {}", indexDirectory, flowFileIdQuery);
final TopDocs uuidQueryTopDocs = searcher.getIndexSearcher().search(flowFileIdQuery, MAX_QUERY_RESULTS);
final long searchEnd = System.nanoTime();
final Set<ProvenanceEventRecord> recs = docsToEventConverter.convert(uuidQueryTopDocs, searcher.getIndexSearcher().getIndexReader());
final long readDocsEnd = System.nanoTime();
logger.debug("Finished Lineage Query against {}; Lucene search took {} millis, reading records took {} millis", indexDirectory, TimeUnit.NANOSECONDS.toMillis(searchEnd - searchStart), TimeUnit.NANOSECONDS.toMillis(readDocsEnd - searchEnd));
return recs;
} finally {
indexManager.returnIndexSearcher(searcher);
}
} catch (final FileNotFoundException fnfe) {
// nothing has been indexed yet, or the data has already aged off
logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, fnfe);
if (logger.isDebugEnabled()) {
logger.warn("", fnfe);
}
return Collections.emptySet();
}
}
use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class SimpleIndexManager method borrowIndexSearcher.
@Override
public EventIndexSearcher borrowIndexSearcher(final File indexDir) throws IOException {
final File absoluteFile = indexDir.getAbsoluteFile();
final IndexWriterCount writerCount;
synchronized (writerCounts) {
writerCount = writerCounts.remove(absoluteFile);
if (writerCount != null) {
// Increment writer count and create an Index Searcher based on the writer
writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(), writerCount.getAnalyzer(), writerCount.getDirectory(), writerCount.getCount() + 1, writerCount.isCloseableWhenUnused()));
}
}
final DirectoryReader directoryReader;
if (writerCount == null) {
logger.trace("Creating index searcher for {}", indexDir);
final Directory directory = FSDirectory.open(indexDir);
directoryReader = DirectoryReader.open(directory);
} else {
final EventIndexWriter eventIndexWriter = writerCount.getWriter();
directoryReader = DirectoryReader.open(eventIndexWriter.getIndexWriter(), false);
}
final IndexSearcher searcher = new IndexSearcher(directoryReader, this.searchExecutor);
logger.trace("Created index searcher {} for {}", searcher, indexDir);
return new LuceneEventIndexSearcher(searcher, indexDir, null, directoryReader);
}
use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class TestSimpleIndexManager method testMultipleWritersSimultaneouslySameIndex.
@Test
public void testMultipleWritersSimultaneouslySameIndex() throws IOException {
final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration());
final File dir = new File("target/" + UUID.randomUUID().toString());
try {
final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);
final Document doc1 = new Document();
doc1.add(new StringField("id", "1", Store.YES));
final Document doc2 = new Document();
doc2.add(new StringField("id", "2", Store.YES));
writer1.index(doc1, 1000);
writer2.index(doc2, 1000);
mgr.returnIndexWriter(writer2);
mgr.returnIndexWriter(writer1);
final EventIndexSearcher searcher = mgr.borrowIndexSearcher(dir);
final TopDocs topDocs = searcher.getIndexSearcher().search(new MatchAllDocsQuery(), 2);
assertEquals(2, topDocs.totalHits);
mgr.returnIndexSearcher(searcher);
} finally {
FileUtils.deleteFile(dir, true);
}
}
use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class LuceneCacheWarmer method run.
@Override
public void run() {
try {
final File[] indexDirs = storageDir.listFiles(DirectoryUtils.INDEX_FILE_FILTER);
if (indexDirs == null) {
logger.info("Cannot warm Lucene Index Cache for " + storageDir + " because the directory could not be read");
return;
}
logger.info("Beginning warming of Lucene Index Cache for " + storageDir);
final long startNanos = System.nanoTime();
for (final File indexDir : indexDirs) {
final long indexStartNanos = System.nanoTime();
final EventIndexSearcher eventSearcher = indexManager.borrowIndexSearcher(indexDir);
indexManager.returnIndexSearcher(eventSearcher);
final long indexWarmMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - indexStartNanos);
logger.debug("Took {} ms to warm Lucene Index {}", indexWarmMillis, indexDir);
}
final long warmSecs = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - startNanos);
logger.info("Finished warming all Lucene Indexes for {} in {} seconds", storageDir, warmSecs);
} catch (final Exception e) {
logger.error("Failed to warm Lucene Index Cache for " + storageDir, e);
}
}
use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class CachingIndexManager method borrowIndexSearcher.
@Override
public EventIndexSearcher borrowIndexSearcher(final File indexDir) throws IOException {
final File absoluteFile = indexDir.getAbsoluteFile();
logger.trace("Borrowing index searcher for {}", indexDir);
lock.lock();
try {
// check if we already have a reader cached.
List<ActiveIndexSearcher> currentlyCached = activeSearchers.get(absoluteFile);
if (currentlyCached == null) {
currentlyCached = new ArrayList<>();
activeSearchers.put(absoluteFile, currentlyCached);
} else {
// from our cache later.
for (final ActiveIndexSearcher searcher : currentlyCached) {
if (searcher.isCache()) {
// if the searcher is poisoned, we want to close and expire it.
if (searcher.isPoisoned()) {
continue;
}
// if there are no references to the reader, it will have been closed. Since there is no
// isClosed() method, this is how we determine whether it's been closed or not.
final int refCount = searcher.getSearcher().getIndexSearcher().getIndexReader().getRefCount();
if (refCount <= 0) {
// if refCount == 0, then the reader has been closed, so we cannot use the searcher
logger.debug("Reference count for cached Index Searcher for {} is currently {}; " + "removing cached searcher", absoluteFile, refCount);
continue;
}
final int referenceCount = searcher.incrementReferenceCount();
logger.debug("Providing previously cached index searcher for {} and incrementing Reference Count to {}", indexDir, referenceCount);
return searcher.getSearcher();
}
}
}
// We found no cached Index Readers. Create a new one. To do this, we need to check
// if we have an Index Writer, and if so create a Reader based on the Index Writer.
// This will provide us a 'near real time' index reader.
final IndexWriterCount writerCount = writerCounts.remove(absoluteFile);
if (writerCount == null) {
final Directory directory = FSDirectory.open(absoluteFile);
logger.debug("No Index Writer currently exists for {}; creating a cachable reader", indexDir);
try {
final DirectoryReader directoryReader = DirectoryReader.open(directory);
final IndexSearcher searcher = new IndexSearcher(directoryReader);
final EventIndexSearcher eventIndexSearcher = new LuceneEventIndexSearcher(searcher, indexDir, directory, directoryReader);
// we want to cache the searcher that we create, since it's just a reader.
final ActiveIndexSearcher cached = new ActiveIndexSearcher(eventIndexSearcher, absoluteFile, directoryReader, directory, true);
currentlyCached.add(cached);
return cached.getSearcher();
} catch (final IOException e) {
logger.error("Failed to create Index Searcher for {} due to {}", absoluteFile, e.toString());
logger.error("", e);
try {
directory.close();
} catch (final IOException ioe) {
e.addSuppressed(ioe);
}
throw e;
}
} else {
logger.debug("Index Writer currently exists for {}; creating a non-cachable reader and incrementing " + "counter to {}", indexDir, writerCount.getCount() + 1);
// increment the writer count to ensure that it's kept open.
writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(), writerCount.getAnalyzer(), writerCount.getDirectory(), writerCount.getCount() + 1));
// create a new Index Searcher from the writer so that we don't have an issue with trying
// to read from a directory that's locked. If we get the "no segments* file found" with
// Lucene, this indicates that an IndexWriter already has the directory open.
final EventIndexWriter writer = writerCount.getWriter();
final DirectoryReader directoryReader = DirectoryReader.open(writer.getIndexWriter(), false);
final IndexSearcher searcher = new IndexSearcher(directoryReader);
final EventIndexSearcher eventIndexSearcher = new LuceneEventIndexSearcher(searcher, indexDir, null, directoryReader);
// we don't want to cache this searcher because it's based on a writer, so we want to get
// new values the next time that we search.
final ActiveIndexSearcher activeSearcher = new ActiveIndexSearcher(eventIndexSearcher, absoluteFile, directoryReader, null, false);
currentlyCached.add(activeSearcher);
return activeSearcher.getSearcher();
}
} finally {
lock.unlock();
}
}
Aggregations