Search in sources :

Example 46 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class TestWriteAheadStorePartition method testReindex.

@Test
@SuppressWarnings("unchecked")
public void testReindex() throws IOException {
    final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
    repoConfig.setMaxEventFileCount(5);
    final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
    final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();
    final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
        final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
        return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
    };
    final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
    final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory, recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP);
    for (int i = 0; i < 100; i++) {
        partition.addEvents(Collections.singleton(TestUtil.createEvent()));
    }
    final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
    final EventIndex eventIndex = Mockito.mock(EventIndex.class);
    Mockito.doAnswer(new Answer<Object>() {

        @Override
        public Object answer(final InvocationOnMock invocation) throws Throwable {
            final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgumentAt(0, Map.class);
            reindexedEvents.putAll(events);
            return null;
        }
    }).when(eventIndex).reindexEvents(Mockito.anyMap());
    Mockito.doReturn(18L).when(eventIndex).getMinimumEventIdToReindex("1");
    partition.reindexLatestEvents(eventIndex);
    final List<Long> eventIdsReindexed = reindexedEvents.values().stream().map(StorageSummary::getEventId).sorted().collect(Collectors.toList());
    assertEquals(82, eventIdsReindexed.size());
    for (int i = 0; i < eventIdsReindexed.size(); i++) {
        assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) IdentifierLookup(org.apache.nifi.provenance.IdentifierLookup) RecordReaders(org.apache.nifi.provenance.serialization.RecordReaders) TocWriter(org.apache.nifi.provenance.toc.TocWriter) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) EventIndex(org.apache.nifi.provenance.index.EventIndex) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) Test(org.junit.Test) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) File(java.io.File) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito(org.mockito.Mockito) List(java.util.List) EventReporter(org.apache.nifi.events.EventReporter) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) TestUtil(org.apache.nifi.provenance.TestUtil) Collections(java.util.Collections) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) Assert.assertEquals(org.junit.Assert.assertEquals) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) AtomicLong(java.util.concurrent.atomic.AtomicLong) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) InvocationOnMock(org.mockito.invocation.InvocationOnMock) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File) EventIndex(org.apache.nifi.provenance.index.EventIndex) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Test(org.junit.Test)

Example 47 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class TestSelectiveRecordReaderEventIterator method time.

private void time(final Callable<ProvenanceEventRecord> task, final long id) throws Exception {
    final long start = System.nanoTime();
    final ProvenanceEventRecord event = task.call();
    Assert.assertNotNull(event);
    Assert.assertEquals(id, event.getEventId());
    // System.out.println(event);
    final long nanos = System.nanoTime() - start;
    final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
// System.out.println("Took " + millis + " ms to " + taskDescription);
}
Also used : ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord)

Example 48 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class QueryTask method readDocuments.

private Tuple<List<ProvenanceEventRecord>, Integer> readDocuments(final TopDocs topDocs, final IndexReader indexReader) {
    // If no topDocs is supplied, just provide a Tuple that has no records and a hit count of 0.
    if (topDocs == null || topDocs.totalHits == 0) {
        return new Tuple<>(Collections.<ProvenanceEventRecord>emptyList(), 0);
    }
    final long start = System.nanoTime();
    final List<Long> eventIds = Arrays.stream(topDocs.scoreDocs).mapToInt(scoreDoc -> scoreDoc.doc).mapToObj(docId -> {
        try {
            return indexReader.document(docId, LUCENE_FIELDS_TO_LOAD);
        } catch (final Exception e) {
            throw new SearchFailedException("Failed to read Provenance Events from Event File", e);
        }
    }).map(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue()).collect(Collectors.toList());
    final long endConvert = System.nanoTime();
    final long ms = TimeUnit.NANOSECONDS.toMillis(endConvert - start);
    logger.debug("Converting documents took {} ms", ms);
    List<ProvenanceEventRecord> events;
    try {
        events = eventStore.getEvents(eventIds, authorizer, transformer);
    } catch (IOException e) {
        throw new SearchFailedException("Unable to retrieve events from the Provenance Store", e);
    }
    final long fetchEventNanos = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - endConvert);
    logger.debug("Fetching {} events from Event Store took {} ms ({} events actually fetched)", eventIds.size(), fetchEventNanos, events.size());
    final int totalHits = topDocs.totalHits;
    return new Tuple<>(events, totalHits);
}
Also used : Query(org.apache.lucene.search.Query) TopDocs(org.apache.lucene.search.TopDocs) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) Arrays(java.util.Arrays) EventTransformer(org.apache.nifi.provenance.authorization.EventTransformer) Logger(org.slf4j.Logger) SearchFailedException(org.apache.nifi.provenance.index.SearchFailedException) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) IOException(java.io.IOException) SearchableFields(org.apache.nifi.provenance.SearchableFields) Collectors(java.util.stream.Collectors) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Tuple(org.apache.nifi.util.Tuple) IndexManager(org.apache.nifi.provenance.lucene.IndexManager) EventStore(org.apache.nifi.provenance.store.EventStore) ProgressiveResult(org.apache.nifi.provenance.ProgressiveResult) Collections(java.util.Collections) IndexReader(org.apache.lucene.index.IndexReader) EventAuthorizer(org.apache.nifi.provenance.authorization.EventAuthorizer) SearchFailedException(org.apache.nifi.provenance.index.SearchFailedException) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) IOException(java.io.IOException) Tuple(org.apache.nifi.util.Tuple) SearchFailedException(org.apache.nifi.provenance.index.SearchFailedException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException)

Example 49 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class DocsReader method read.

public Set<ProvenanceEventRecord> read(final List<Document> docs, final EventAuthorizer authorizer, final Collection<Path> allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {
    if (retrievalCount.get() >= maxResults) {
        return Collections.emptySet();
    }
    final long start = System.nanoTime();
    final Set<ProvenanceEventRecord> matchingRecords = new LinkedHashSet<>();
    final Map<String, List<Document>> byStorageNameDocGroups = LuceneUtil.groupDocsByStorageFileName(docs);
    int eventsReadThisFile = 0;
    int logFileCount = 0;
    for (String storageFileName : byStorageNameDocGroups.keySet()) {
        final File provenanceEventFile = LuceneUtil.getProvenanceLogFile(storageFileName, allProvenanceLogFiles);
        if (provenanceEventFile == null) {
            logger.warn("Could not find Provenance Log File with " + "basename {} in the Provenance Repository; assuming " + "file has expired and continuing without it", storageFileName);
            continue;
        }
        try (final RecordReader reader = RecordReaders.newRecordReader(provenanceEventFile, allProvenanceLogFiles, maxAttributeChars)) {
            final Iterator<Document> docIter = byStorageNameDocGroups.get(storageFileName).iterator();
            while (docIter.hasNext() && retrievalCount.getAndIncrement() < maxResults) {
                final ProvenanceEventRecord event = getRecord(docIter.next(), reader);
                if (event != null && authorizer.isAuthorized(event)) {
                    matchingRecords.add(event);
                    eventsReadThisFile++;
                }
            }
        } catch (final Exception e) {
            logger.warn("Failed to read Provenance Events. The event file '" + provenanceEventFile.getAbsolutePath() + "' may be missing or corrupt.", e);
        }
    }
    logger.debug("Read {} records from previous file", eventsReadThisFile);
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    logger.debug("Took {} ms to read {} events from {} prov log files", millis, matchingRecords.size(), logFileCount);
    return matchingRecords;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File)

Example 50 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class IndexSearch method search.

public StandardQueryResult search(final org.apache.nifi.provenance.search.Query provenanceQuery, final NiFiUser user, final AtomicInteger retrievedCount, final long firstEventTimestamp) throws IOException {
    if (retrievedCount.get() >= provenanceQuery.getMaxResults()) {
        final StandardQueryResult sqr = new StandardQueryResult(provenanceQuery, 1);
        sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0L);
        logger.info("Skipping search of Provenance Index {} for {} because the max number of results ({}) has already been retrieved", indexDirectory, provenanceQuery, provenanceQuery.getMaxResults());
        return sqr;
    }
    final long startNanos = System.nanoTime();
    if (!indexDirectory.exists() && !indexDirectory.mkdirs()) {
        throw new IOException("Unable to create Indexing Directory " + indexDirectory);
    }
    if (!indexDirectory.isDirectory()) {
        throw new IOException("Indexing Directory specified is " + indexDirectory + ", but this is not a directory");
    }
    final StandardQueryResult sqr = new StandardQueryResult(provenanceQuery, 1);
    final Set<ProvenanceEventRecord> matchingRecords;
    // the repository, and we don't want those events to count toward the total number of matches.
    if (provenanceQuery.getStartDate() == null || provenanceQuery.getStartDate().getTime() < firstEventTimestamp) {
        provenanceQuery.setStartDate(new Date(firstEventTimestamp));
    }
    if (provenanceQuery.getEndDate() == null) {
        provenanceQuery.setEndDate(new Date());
    }
    final Query luceneQuery = LuceneUtil.convertQuery(provenanceQuery);
    final long start = System.nanoTime();
    EventIndexSearcher searcher = null;
    try {
        searcher = indexManager.borrowIndexSearcher(indexDirectory);
        final long searchStartNanos = System.nanoTime();
        final long openSearcherNanos = searchStartNanos - start;
        logger.debug("Searching {} for {}", this, provenanceQuery);
        final TopDocs topDocs = searcher.getIndexSearcher().search(luceneQuery, provenanceQuery.getMaxResults());
        final long finishSearch = System.nanoTime();
        final long searchNanos = finishSearch - searchStartNanos;
        logger.debug("Searching {} for {} took {} millis; opening searcher took {} millis", this, provenanceQuery, TimeUnit.NANOSECONDS.toMillis(searchNanos), TimeUnit.NANOSECONDS.toMillis(openSearcherNanos));
        if (topDocs.totalHits == 0) {
            sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0);
            return sqr;
        }
        final DocsReader docsReader = new DocsReader();
        final EventAuthorizer authorizer = new EventAuthorizer() {

            @Override
            public boolean isAuthorized(ProvenanceEventRecord event) {
                return repository.isAuthorized(event, user);
            }

            @Override
            public void authorize(ProvenanceEventRecord event) throws AccessDeniedException {
                repository.authorize(event, user);
            }

            @Override
            public List<ProvenanceEventRecord> filterUnauthorizedEvents(List<ProvenanceEventRecord> events) {
                return repository.filterUnauthorizedEvents(events, user);
            }

            @Override
            public Set<ProvenanceEventRecord> replaceUnauthorizedWithPlaceholders(Set<ProvenanceEventRecord> events) {
                return repository.replaceUnauthorizedWithPlaceholders(events, user);
            }
        };
        matchingRecords = docsReader.read(topDocs, authorizer, searcher.getIndexSearcher().getIndexReader(), repository.getAllLogFiles(), retrievedCount, provenanceQuery.getMaxResults(), maxAttributeChars);
        final long readRecordsNanos = System.nanoTime() - finishSearch;
        logger.debug("Reading {} records took {} millis for {}", matchingRecords.size(), TimeUnit.NANOSECONDS.toMillis(readRecordsNanos), this);
        sqr.update(matchingRecords, topDocs.totalHits);
        final long queryNanos = System.nanoTime() - startNanos;
        logger.info("Successfully executed {} against Index {}; Search took {} milliseconds; Total Hits = {}", provenanceQuery, indexDirectory, TimeUnit.NANOSECONDS.toMillis(queryNanos), topDocs.totalHits);
        return sqr;
    } catch (final FileNotFoundException e) {
        // nothing has been indexed yet, or the data has already aged off
        logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, e);
        if (logger.isDebugEnabled()) {
            logger.warn("", e);
        }
        sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0);
        return sqr;
    } finally {
        if (searcher != null) {
            indexManager.returnIndexSearcher(searcher);
        }
    }
}
Also used : Set(java.util.Set) Query(org.apache.lucene.search.Query) EventAuthorizer(org.apache.nifi.provenance.authorization.EventAuthorizer) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) Date(java.util.Date) TopDocs(org.apache.lucene.search.TopDocs) EventIndexSearcher(org.apache.nifi.provenance.index.EventIndexSearcher) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) List(java.util.List) StandardQueryResult(org.apache.nifi.provenance.StandardQueryResult)

Aggregations

ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)194 Test (org.junit.Test)118 StandardProvenanceEventRecord (org.apache.nifi.provenance.StandardProvenanceEventRecord)69 HashMap (java.util.HashMap)57 MockFlowFile (org.apache.nifi.util.MockFlowFile)52 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)32 TestRunner (org.apache.nifi.util.TestRunner)24 FlowFileHandlingException (org.apache.nifi.processor.exception.FlowFileHandlingException)23 DataSetRefs (org.apache.nifi.atlas.provenance.DataSetRefs)21 AnalysisContext (org.apache.nifi.atlas.provenance.AnalysisContext)20 Referenceable (org.apache.atlas.typesystem.Referenceable)19 NiFiProvenanceEventAnalyzer (org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer)18 ClusterResolvers (org.apache.nifi.atlas.resolver.ClusterResolvers)18 RepositoryConfiguration (org.apache.nifi.provenance.RepositoryConfiguration)17 File (java.io.File)16 List (java.util.List)16 AtomicLong (java.util.concurrent.atomic.AtomicLong)16 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)15 Map (java.util.Map)12