Search in sources :

Example 6 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class LineageQuery method computeLineageForFlowFiles.

public static Set<ProvenanceEventRecord> computeLineageForFlowFiles(final IndexManager indexManager, final File indexDirectory, final String lineageIdentifier, final Collection<String> flowFileUuids, final DocumentToEventConverter docsToEventConverter) throws IOException {
    if (requireNonNull(flowFileUuids).size() > MAX_LINEAGE_UUIDS) {
        throw new IllegalArgumentException(String.format("Cannot compute lineage for more than %s FlowFiles. This lineage contains %s.", MAX_LINEAGE_UUIDS, flowFileUuids.size()));
    }
    if (lineageIdentifier == null && (flowFileUuids == null || flowFileUuids.isEmpty())) {
        throw new IllegalArgumentException("Must specify either Lineage Identifier or FlowFile UUIDs to compute lineage");
    }
    final EventIndexSearcher searcher;
    try {
        searcher = indexManager.borrowIndexSearcher(indexDirectory);
        try {
            // Create a query for all Events related to the FlowFiles of interest. We do this by adding all ID's as
            // "SHOULD" clauses and then setting the minimum required to 1.
            final BooleanQuery flowFileIdQuery;
            if (flowFileUuids == null || flowFileUuids.isEmpty()) {
                flowFileIdQuery = null;
            } else {
                flowFileIdQuery = new BooleanQuery();
                for (final String flowFileUuid : flowFileUuids) {
                    flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD);
                }
                flowFileIdQuery.setMinimumNumberShouldMatch(1);
            }
            final long searchStart = System.nanoTime();
            logger.debug("Searching {} for {}", indexDirectory, flowFileIdQuery);
            final TopDocs uuidQueryTopDocs = searcher.getIndexSearcher().search(flowFileIdQuery, MAX_QUERY_RESULTS);
            final long searchEnd = System.nanoTime();
            final Set<ProvenanceEventRecord> recs = docsToEventConverter.convert(uuidQueryTopDocs, searcher.getIndexSearcher().getIndexReader());
            final long readDocsEnd = System.nanoTime();
            logger.debug("Finished Lineage Query against {}; Lucene search took {} millis, reading records took {} millis", indexDirectory, TimeUnit.NANOSECONDS.toMillis(searchEnd - searchStart), TimeUnit.NANOSECONDS.toMillis(readDocsEnd - searchEnd));
            return recs;
        } finally {
            indexManager.returnIndexSearcher(searcher);
        }
    } catch (final FileNotFoundException fnfe) {
        // nothing has been indexed yet, or the data has already aged off
        logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, fnfe);
        if (logger.isDebugEnabled()) {
            logger.warn("", fnfe);
        }
        return Collections.emptySet();
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) FileNotFoundException(java.io.FileNotFoundException) Term(org.apache.lucene.index.Term)

Example 7 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class SimpleIndexManager method borrowIndexSearcher.

@Override
public EventIndexSearcher borrowIndexSearcher(final File indexDir) throws IOException {
    final File absoluteFile = indexDir.getAbsoluteFile();
    final IndexWriterCount writerCount;
    synchronized (writerCounts) {
        writerCount = writerCounts.remove(absoluteFile);
        if (writerCount != null) {
            // Increment writer count and create an Index Searcher based on the writer
            writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(), writerCount.getAnalyzer(), writerCount.getDirectory(), writerCount.getCount() + 1, writerCount.isCloseableWhenUnused()));
        }
    }
    final DirectoryReader directoryReader;
    if (writerCount == null) {
        logger.trace("Creating index searcher for {}", indexDir);
        final Directory directory = FSDirectory.open(indexDir);
        directoryReader = DirectoryReader.open(directory);
    } else {
        final EventIndexWriter eventIndexWriter = writerCount.getWriter();
        directoryReader = DirectoryReader.open(eventIndexWriter.getIndexWriter(), false);
    }
    final IndexSearcher searcher = new IndexSearcher(directoryReader, this.searchExecutor);
    logger.trace("Created index searcher {} for {}", searcher, indexDir);
    return new LuceneEventIndexSearcher(searcher, indexDir, null, directoryReader);
}
Also used : EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) DirectoryReader(org.apache.lucene.index.DirectoryReader) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) File(java.io.File) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 8 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class TestSimpleIndexManager method testMultipleWritersSimultaneouslySameIndex.

@Test
public void testMultipleWritersSimultaneouslySameIndex() throws IOException {
    final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration());
    final File dir = new File("target/" + UUID.randomUUID().toString());
    try {
        final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
        final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);
        final Document doc1 = new Document();
        doc1.add(new StringField("id", "1", Store.YES));
        final Document doc2 = new Document();
        doc2.add(new StringField("id", "2", Store.YES));
        writer1.index(doc1, 1000);
        writer2.index(doc2, 1000);
        mgr.returnIndexWriter(writer2);
        mgr.returnIndexWriter(writer1);
        final EventIndexSearcher searcher = mgr.borrowIndexSearcher(dir);
        final TopDocs topDocs = searcher.getIndexSearcher().search(new MatchAllDocsQuery(), 2);
        assertEquals(2, topDocs.totalHits);
        mgr.returnIndexSearcher(searcher);
    } finally {
        FileUtils.deleteFile(dir, true);
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) StringField(org.apache.lucene.document.StringField) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) File(java.io.File) Test(org.junit.Test)

Example 9 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class LuceneCacheWarmer method run.

@Override
public void run() {
    try {
        final File[] indexDirs = storageDir.listFiles(DirectoryUtils.INDEX_FILE_FILTER);
        if (indexDirs == null) {
            logger.info("Cannot warm Lucene Index Cache for " + storageDir + " because the directory could not be read");
            return;
        }
        logger.info("Beginning warming of Lucene Index Cache for " + storageDir);
        final long startNanos = System.nanoTime();
        for (final File indexDir : indexDirs) {
            final long indexStartNanos = System.nanoTime();
            final EventIndexSearcher eventSearcher = indexManager.borrowIndexSearcher(indexDir);
            indexManager.returnIndexSearcher(eventSearcher);
            final long indexWarmMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - indexStartNanos);
            logger.debug("Took {} ms to warm Lucene Index {}", indexWarmMillis, indexDir);
        }
        final long warmSecs = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - startNanos);
        logger.info("Finished warming all Lucene Indexes for {} in {} seconds", storageDir, warmSecs);
    } catch (final Exception e) {
        logger.error("Failed to warm Lucene Index Cache for " + storageDir, e);
    }
}
Also used : EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) File(java.io.File)

Example 10 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class CachingIndexManager method borrowIndexSearcher.

@Override
public EventIndexSearcher borrowIndexSearcher(final File indexDir) throws IOException {
    final File absoluteFile = indexDir.getAbsoluteFile();
    logger.trace("Borrowing index searcher for {}", indexDir);
    lock.lock();
    try {
        // check if we already have a reader cached.
        List<ActiveIndexSearcher> currentlyCached = activeSearchers.get(absoluteFile);
        if (currentlyCached == null) {
            currentlyCached = new ArrayList<>();
            activeSearchers.put(absoluteFile, currentlyCached);
        } else {
            // from our cache later.
            for (final ActiveIndexSearcher searcher : currentlyCached) {
                if (searcher.isCache()) {
                    // if the searcher is poisoned, we want to close and expire it.
                    if (searcher.isPoisoned()) {
                        continue;
                    }
                    // if there are no references to the reader, it will have been closed. Since there is no
                    // isClosed() method, this is how we determine whether it's been closed or not.
                    final int refCount = searcher.getSearcher().getIndexSearcher().getIndexReader().getRefCount();
                    if (refCount <= 0) {
                        // if refCount == 0, then the reader has been closed, so we cannot use the searcher
                        logger.debug("Reference count for cached Index Searcher for {} is currently {}; " + "removing cached searcher", absoluteFile, refCount);
                        continue;
                    }
                    final int referenceCount = searcher.incrementReferenceCount();
                    logger.debug("Providing previously cached index searcher for {} and incrementing Reference Count to {}", indexDir, referenceCount);
                    return searcher.getSearcher();
                }
            }
        }
        // We found no cached Index Readers. Create a new one. To do this, we need to check
        // if we have an Index Writer, and if so create a Reader based on the Index Writer.
        // This will provide us a 'near real time' index reader.
        final IndexWriterCount writerCount = writerCounts.remove(absoluteFile);
        if (writerCount == null) {
            final Directory directory = FSDirectory.open(absoluteFile);
            logger.debug("No Index Writer currently exists for {}; creating a cachable reader", indexDir);
            try {
                final DirectoryReader directoryReader = DirectoryReader.open(directory);
                final IndexSearcher searcher = new IndexSearcher(directoryReader);
                final EventIndexSearcher eventIndexSearcher = new LuceneEventIndexSearcher(searcher, indexDir, directory, directoryReader);
                // we want to cache the searcher that we create, since it's just a reader.
                final ActiveIndexSearcher cached = new ActiveIndexSearcher(eventIndexSearcher, absoluteFile, directoryReader, directory, true);
                currentlyCached.add(cached);
                return cached.getSearcher();
            } catch (final IOException e) {
                logger.error("Failed to create Index Searcher for {} due to {}", absoluteFile, e.toString());
                logger.error("", e);
                try {
                    directory.close();
                } catch (final IOException ioe) {
                    e.addSuppressed(ioe);
                }
                throw e;
            }
        } else {
            logger.debug("Index Writer currently exists for {}; creating a non-cachable reader and incrementing " + "counter to {}", indexDir, writerCount.getCount() + 1);
            // increment the writer count to ensure that it's kept open.
            writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(), writerCount.getAnalyzer(), writerCount.getDirectory(), writerCount.getCount() + 1));
            // create a new Index Searcher from the writer so that we don't have an issue with trying
            // to read from a directory that's locked. If we get the "no segments* file found" with
            // Lucene, this indicates that an IndexWriter already has the directory open.
            final EventIndexWriter writer = writerCount.getWriter();
            final DirectoryReader directoryReader = DirectoryReader.open(writer.getIndexWriter(), false);
            final IndexSearcher searcher = new IndexSearcher(directoryReader);
            final EventIndexSearcher eventIndexSearcher = new LuceneEventIndexSearcher(searcher, indexDir, null, directoryReader);
            // we don't want to cache this searcher because it's based on a writer, so we want to get
            // new values the next time that we search.
            final ActiveIndexSearcher activeSearcher = new ActiveIndexSearcher(eventIndexSearcher, absoluteFile, directoryReader, null, false);
            currentlyCached.add(activeSearcher);
            return activeSearcher.getSearcher();
        }
    } finally {
        lock.unlock();
    }
}
Also used : EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) File(java.io.File) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Aggregations

EventIndexSearcher (org.apache.nifi.provenance.index.EventIndexSearcher)10 File (java.io.File)6 IOException (java.io.IOException)5 EventIndexWriter (org.apache.nifi.provenance.index.EventIndexWriter)5 TopDocs (org.apache.lucene.search.TopDocs)4 FileNotFoundException (java.io.FileNotFoundException)3 Document (org.apache.lucene.document.Document)3 Test (org.junit.Test)3 List (java.util.List)2 StringField (org.apache.lucene.document.StringField)2 DirectoryReader (org.apache.lucene.index.DirectoryReader)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 Directory (org.apache.lucene.store.Directory)2 FSDirectory (org.apache.lucene.store.FSDirectory)2 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)2 Date (java.util.Date)1 HashMap (java.util.HashMap)1 Set (java.util.Set)1 CountDownLatch (java.util.concurrent.CountDownLatch)1