Search in sources :

Example 1 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class TestPersistentProvenanceRepository method testModifyIndexWhileSearching.

@Test(timeout = 10000)
public void testModifyIndexWhileSearching() throws IOException, InterruptedException, ParseException {
    assumeFalse(isWindowsEnvironment());
    final RepositoryConfiguration config = createConfiguration();
    config.setMaxRecordLife(30, TimeUnit.SECONDS);
    config.setMaxStorageCapacity(1024L * 1024L * 10);
    config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
    config.setMaxEventFileCapacity(1024L * 1024L * 10);
    config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
    final CountDownLatch obtainIndexSearcherLatch = new CountDownLatch(2);
    repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS) {

        private CachingIndexManager wrappedManager = null;

        // Create an IndexManager that adds a delay before returning the Index Searcher.
        @Override
        protected synchronized CachingIndexManager getIndexManager() {
            if (wrappedManager == null) {
                final IndexManager mgr = super.getIndexManager();
                final Logger logger = LoggerFactory.getLogger("IndexManager");
                wrappedManager = new CachingIndexManager() {

                    final AtomicInteger indexSearcherCount = new AtomicInteger(0);

                    @Override
                    public EventIndexSearcher borrowIndexSearcher(File indexDir) throws IOException {
                        final EventIndexSearcher searcher = mgr.borrowIndexSearcher(indexDir);
                        final int idx = indexSearcherCount.incrementAndGet();
                        obtainIndexSearcherLatch.countDown();
                        // second thread is still holding the searcher
                        try {
                            if (idx == 1) {
                                Thread.sleep(3000L);
                            } else {
                                Thread.sleep(5000L);
                            }
                        } catch (InterruptedException e) {
                            throw new IOException("Interrupted", e);
                        }
                        logger.info("Releasing index searcher");
                        return searcher;
                    }

                    @Override
                    public EventIndexWriter borrowIndexWriter(File indexingDirectory) throws IOException {
                        return mgr.borrowIndexWriter(indexingDirectory);
                    }

                    @Override
                    public void close() throws IOException {
                        mgr.close();
                    }

                    @Override
                    public boolean removeIndex(File indexDirectory) {
                        mgr.removeIndex(indexDirectory);
                        return true;
                    }

                    @Override
                    public void returnIndexSearcher(EventIndexSearcher searcher) {
                        mgr.returnIndexSearcher(searcher);
                    }

                    @Override
                    public void returnIndexWriter(EventIndexWriter writer) {
                        mgr.returnIndexWriter(writer);
                    }
                };
            }
            return wrappedManager;
        }
    };
    repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
    final String uuid = "10000000-0000-0000-0000-000000000000";
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("abc", "xyz");
    attributes.put("xyz", "abc");
    attributes.put("filename", "file-" + uuid);
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    attributes.put("uuid", uuid);
    builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    for (int i = 0; i < 10; i++) {
        builder.fromFlowFile(createFlowFile(i, 3000L, attributes));
        attributes.put("uuid", "00000000-0000-0000-0000-00000000000" + i);
        repo.registerEvent(builder.build());
    }
    repo.waitForRollover();
    // Perform a query. This will ensure that an IndexSearcher is created and cached.
    final Query query = new Query(UUID.randomUUID().toString());
    query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.Filename, "file-*"));
    query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.ComponentID, "12?4"));
    query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.TransitURI, "nifi://*"));
    query.setMaxResults(100);
    // Run a query in a background thread. When this thread goes to obtain the IndexSearcher, it will have a 5 second delay.
    // That delay will occur as the main thread is updating the index. This should result in the search creating a new Index Reader
    // that can properly query the index.
    final int numThreads = 2;
    final CountDownLatch performSearchLatch = new CountDownLatch(numThreads);
    final Runnable searchRunnable = new Runnable() {

        @Override
        public void run() {
            QueryResult result;
            try {
                result = repo.queryEvents(query, createUser());
            } catch (IOException e) {
                e.printStackTrace();
                Assert.fail(e.toString());
                return;
            }
            System.out.println("Finished search: " + result);
            performSearchLatch.countDown();
        }
    };
    // Kick off the searcher threads
    for (int i = 0; i < numThreads; i++) {
        final Thread searchThread = new Thread(searchRunnable);
        searchThread.start();
    }
    // Wait until we've obtained the Index Searchers before modifying the index.
    obtainIndexSearcherLatch.await();
    // add more events to the repo
    for (int i = 0; i < 10; i++) {
        builder.fromFlowFile(createFlowFile(i, 3000L, attributes));
        attributes.put("uuid", "00000000-0000-0000-0000-00000000000" + i);
        repo.registerEvent(builder.build());
    }
    // Force a rollover to occur. This will modify the index.
    repo.rolloverWithLock(true);
    // Wait for the repository to roll over.
    repo.waitForRollover();
    // Wait for the searches to complete.
    performSearchLatch.await();
}
Also used : Query(org.apache.nifi.provenance.search.Query) CachingIndexManager(org.apache.nifi.provenance.lucene.CachingIndexManager) HashMap(java.util.HashMap) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) Logger(org.slf4j.Logger) IndexManager(org.apache.nifi.provenance.lucene.IndexManager) CachingIndexManager(org.apache.nifi.provenance.lucene.CachingIndexManager) QueryResult(org.apache.nifi.provenance.search.QueryResult) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) TestUtil.createFlowFile(org.apache.nifi.provenance.TestUtil.createFlowFile) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File) Test(org.junit.Test)

Example 2 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class TestCachingIndexManager method test.

@Test
public void test() throws IOException {
    // Create and IndexWriter and add a document to the index, then close the writer.
    // This gives us something that we can query.
    final EventIndexWriter writer = manager.borrowIndexWriter(indexDir);
    final Document doc = new Document();
    doc.add(new StringField("unit test", "true", Store.YES));
    writer.index(doc, 1000);
    manager.returnIndexWriter(writer);
    // Get an Index Searcher that we can use to query the index.
    final EventIndexSearcher cachedSearcher = manager.borrowIndexSearcher(indexDir);
    // Ensure that we get the expected results.
    assertCount(cachedSearcher, 1);
    // While we already have an Index Searcher, get a writer for the same index.
    // This will cause the Index Searcher to be marked as poisoned.
    final EventIndexWriter writer2 = manager.borrowIndexWriter(indexDir);
    // Obtain a new Index Searcher with the writer open. This Index Searcher should *NOT*
    // be the same as the previous searcher because the new one will be a Near-Real-Time Index Searcher
    // while the other is not.
    final EventIndexSearcher nrtSearcher = manager.borrowIndexSearcher(indexDir);
    assertNotSame(cachedSearcher, nrtSearcher);
    // Ensure that we get the expected query results.
    assertCount(nrtSearcher, 1);
    // Return the writer, so that there is no longer an active writer for the index.
    manager.returnIndexWriter(writer2);
    // Ensure that we still get the same result.
    assertCount(cachedSearcher, 1);
    manager.returnIndexSearcher(cachedSearcher);
    // Ensure that our near-real-time index searcher still gets the same result.
    assertCount(nrtSearcher, 1);
    manager.returnIndexSearcher(nrtSearcher);
}
Also used : EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) StringField(org.apache.lucene.document.StringField) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) Document(org.apache.lucene.document.Document) Test(org.junit.Test)

Example 3 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class LuceneEventIndex method getMaxEventId.

long getMaxEventId(final String partitionName) {
    final List<File> allDirectories = getDirectoryManager().getDirectories(0L, Long.MAX_VALUE, partitionName);
    if (allDirectories.isEmpty()) {
        return -1L;
    }
    Collections.sort(allDirectories, DirectoryUtils.NEWEST_INDEX_FIRST);
    for (final File directory : allDirectories) {
        final EventIndexSearcher searcher;
        try {
            searcher = indexManager.borrowIndexSearcher(directory);
        } catch (final IOException ioe) {
            logger.warn("Unable to read from Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory);
            continue;
        }
        try {
            final IndexReader reader = searcher.getIndexSearcher().getIndexReader();
            final int maxDocId = reader.maxDoc() - 1;
            final Document document = reader.document(maxDocId);
            final long eventId = document.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue();
            logger.info("Determined that Max Event ID indexed for Partition {} is approximately {} based on index {}", partitionName, eventId, directory);
            return eventId;
        } catch (final IOException ioe) {
            logger.warn("Unable to search Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory, ioe);
        } finally {
            indexManager.returnIndexSearcher(searcher);
        }
    }
    return -1L;
}
Also used : EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) IndexReader(org.apache.lucene.index.IndexReader) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) File(java.io.File)

Example 4 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class QueryTask method run.

@Override
public void run() {
    if (queryResult.getTotalHitCount() >= maxResults) {
        logger.debug("Will not query lucene index {} because maximum results have already been obtained", indexDir);
        queryResult.update(Collections.emptyList(), 0L);
        return;
    }
    if (queryResult.isFinished()) {
        logger.debug("Will not query lucene index {} because the query is already finished", indexDir);
        return;
    }
    final long borrowStart = System.nanoTime();
    final EventIndexSearcher searcher;
    try {
        searcher = indexManager.borrowIndexSearcher(indexDir);
    } catch (final FileNotFoundException fnfe) {
        // We do not consider this an error because it may well just be the case that the event index has aged off and
        // been deleted or that we've just created the index and haven't yet committed the writer. So instead, we just
        // update the result ot indicate that this index search is complete with no results.
        queryResult.update(Collections.emptyList(), 0);
        // nothing has been indexed yet, or the data has already aged off
        logger.info("Attempted to search Provenance Index {} but could not find the directory or the directory did not contain a valid Lucene index. " + "This usually indicates that either the index was just created and hasn't fully been initialized, or that the index was recently aged off.", indexDir);
        return;
    } catch (final IOException ioe) {
        queryResult.setError("Failed to query index " + indexDir + "; see logs for more details");
        logger.error("Failed to query index " + indexDir, ioe);
        return;
    }
    try {
        final long borrowMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - borrowStart);
        logger.debug("Borrowing index searcher for {} took {} ms", indexDir, borrowMillis);
        final long startNanos = System.nanoTime();
        // If max number of results are retrieved, do not bother querying lucene
        if (queryResult.getTotalHitCount() >= maxResults) {
            logger.debug("Will not query lucene index {} because maximum results have already been obtained", indexDir);
            queryResult.update(Collections.emptyList(), 0L);
            return;
        }
        if (queryResult.isFinished()) {
            logger.debug("Will not query lucene index {} because the query is already finished", indexDir);
            return;
        }
        // Query lucene
        final IndexReader indexReader = searcher.getIndexSearcher().getIndexReader();
        final TopDocs topDocs;
        try {
            topDocs = searcher.getIndexSearcher().search(query, maxResults);
        } catch (final Exception e) {
            logger.error("Failed to query Lucene for index " + indexDir, e);
            queryResult.setError("Failed to query Lucene for index " + indexDir + " due to " + e);
            return;
        } finally {
            final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
            logger.debug("Querying Lucene for index {} took {} ms", indexDir, ms);
        }
        // If max number of results are retrieved, do not bother reading docs
        if (queryResult.getTotalHitCount() >= maxResults) {
            logger.debug("Will not read events from store for {} because maximum results have already been obtained", indexDir);
            queryResult.update(Collections.emptyList(), 0L);
            return;
        }
        if (queryResult.isFinished()) {
            logger.debug("Will not read events from store for {} because the query has already finished", indexDir);
            return;
        }
        final Tuple<List<ProvenanceEventRecord>, Integer> eventsAndTotalHits = readDocuments(topDocs, indexReader);
        if (eventsAndTotalHits == null) {
            queryResult.update(Collections.emptyList(), 0L);
            logger.info("Will not update query results for queried index {} for query {} because the maximum number of results have been reached already", indexDir, query);
        } else {
            queryResult.update(eventsAndTotalHits.getKey(), eventsAndTotalHits.getValue());
            final long searchNanos = System.nanoTime() - startNanos;
            final long millis = TimeUnit.NANOSECONDS.toMillis(searchNanos);
            logger.info("Successfully queried index {} for query {}; retrieved {} events with a total of {} hits in {} millis", indexDir, query, eventsAndTotalHits.getKey().size(), eventsAndTotalHits.getValue(), millis);
        }
    } catch (final Exception e) {
        logger.error("Failed to query events against index " + indexDir, e);
        queryResult.setError("Failed to complete query due to " + e);
    } finally {
        indexManager.returnIndexSearcher(searcher);
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) FileNotFoundException(java.io.FileNotFoundException) IndexReader(org.apache.lucene.index.IndexReader) List(java.util.List) IOException(java.io.IOException) SearchFailedException(org.apache.nifi.provenance.index.SearchFailedException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException)

Example 5 with EventIndexSearcher

use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.

the class IndexSearch method search.

public StandardQueryResult search(final org.apache.nifi.provenance.search.Query provenanceQuery, final NiFiUser user, final AtomicInteger retrievedCount, final long firstEventTimestamp) throws IOException {
    if (retrievedCount.get() >= provenanceQuery.getMaxResults()) {
        final StandardQueryResult sqr = new StandardQueryResult(provenanceQuery, 1);
        sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0L);
        logger.info("Skipping search of Provenance Index {} for {} because the max number of results ({}) has already been retrieved", indexDirectory, provenanceQuery, provenanceQuery.getMaxResults());
        return sqr;
    }
    final long startNanos = System.nanoTime();
    if (!indexDirectory.exists() && !indexDirectory.mkdirs()) {
        throw new IOException("Unable to create Indexing Directory " + indexDirectory);
    }
    if (!indexDirectory.isDirectory()) {
        throw new IOException("Indexing Directory specified is " + indexDirectory + ", but this is not a directory");
    }
    final StandardQueryResult sqr = new StandardQueryResult(provenanceQuery, 1);
    final Set<ProvenanceEventRecord> matchingRecords;
    // the repository, and we don't want those events to count toward the total number of matches.
    if (provenanceQuery.getStartDate() == null || provenanceQuery.getStartDate().getTime() < firstEventTimestamp) {
        provenanceQuery.setStartDate(new Date(firstEventTimestamp));
    }
    if (provenanceQuery.getEndDate() == null) {
        provenanceQuery.setEndDate(new Date());
    }
    final Query luceneQuery = LuceneUtil.convertQuery(provenanceQuery);
    final long start = System.nanoTime();
    EventIndexSearcher searcher = null;
    try {
        searcher = indexManager.borrowIndexSearcher(indexDirectory);
        final long searchStartNanos = System.nanoTime();
        final long openSearcherNanos = searchStartNanos - start;
        logger.debug("Searching {} for {}", this, provenanceQuery);
        final TopDocs topDocs = searcher.getIndexSearcher().search(luceneQuery, provenanceQuery.getMaxResults());
        final long finishSearch = System.nanoTime();
        final long searchNanos = finishSearch - searchStartNanos;
        logger.debug("Searching {} for {} took {} millis; opening searcher took {} millis", this, provenanceQuery, TimeUnit.NANOSECONDS.toMillis(searchNanos), TimeUnit.NANOSECONDS.toMillis(openSearcherNanos));
        if (topDocs.totalHits == 0) {
            sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0);
            return sqr;
        }
        final DocsReader docsReader = new DocsReader();
        final EventAuthorizer authorizer = new EventAuthorizer() {

            @Override
            public boolean isAuthorized(ProvenanceEventRecord event) {
                return repository.isAuthorized(event, user);
            }

            @Override
            public void authorize(ProvenanceEventRecord event) throws AccessDeniedException {
                repository.authorize(event, user);
            }

            @Override
            public List<ProvenanceEventRecord> filterUnauthorizedEvents(List<ProvenanceEventRecord> events) {
                return repository.filterUnauthorizedEvents(events, user);
            }

            @Override
            public Set<ProvenanceEventRecord> replaceUnauthorizedWithPlaceholders(Set<ProvenanceEventRecord> events) {
                return repository.replaceUnauthorizedWithPlaceholders(events, user);
            }
        };
        matchingRecords = docsReader.read(topDocs, authorizer, searcher.getIndexSearcher().getIndexReader(), repository.getAllLogFiles(), retrievedCount, provenanceQuery.getMaxResults(), maxAttributeChars);
        final long readRecordsNanos = System.nanoTime() - finishSearch;
        logger.debug("Reading {} records took {} millis for {}", matchingRecords.size(), TimeUnit.NANOSECONDS.toMillis(readRecordsNanos), this);
        sqr.update(matchingRecords, topDocs.totalHits);
        final long queryNanos = System.nanoTime() - startNanos;
        logger.info("Successfully executed {} against Index {}; Search took {} milliseconds; Total Hits = {}", provenanceQuery, indexDirectory, TimeUnit.NANOSECONDS.toMillis(queryNanos), topDocs.totalHits);
        return sqr;
    } catch (final FileNotFoundException e) {
        // nothing has been indexed yet, or the data has already aged off
        logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, e);
        if (logger.isDebugEnabled()) {
            logger.warn("", e);
        }
        sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0);
        return sqr;
    } finally {
        if (searcher != null) {
            indexManager.returnIndexSearcher(searcher);
        }
    }
}
Also used : Set(java.util.Set) Query(org.apache.lucene.search.Query) EventAuthorizer(org.apache.nifi.provenance.authorization.EventAuthorizer) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) Date(java.util.Date) TopDocs(org.apache.lucene.search.TopDocs) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) List(java.util.List) StandardQueryResult(org.apache.nifi.provenance.StandardQueryResult)

Aggregations

EventIndexSearcher (org.apache.nifi.provenance.index.EventIndexSearcher)10 File (java.io.File)6 IOException (java.io.IOException)5 EventIndexWriter (org.apache.nifi.provenance.index.EventIndexWriter)5 TopDocs (org.apache.lucene.search.TopDocs)4 FileNotFoundException (java.io.FileNotFoundException)3 Document (org.apache.lucene.document.Document)3 Test (org.junit.Test)3 List (java.util.List)2 StringField (org.apache.lucene.document.StringField)2 DirectoryReader (org.apache.lucene.index.DirectoryReader)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 Directory (org.apache.lucene.store.Directory)2 FSDirectory (org.apache.lucene.store.FSDirectory)2 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)2 Date (java.util.Date)1 HashMap (java.util.HashMap)1 Set (java.util.Set)1 CountDownLatch (java.util.concurrent.CountDownLatch)1