Search in sources :

Example 11 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class TestLuceneEventIndex method addThenQueryWithEmptyQuery.

@Test(timeout = 60000)
public void addThenQueryWithEmptyQuery() throws InterruptedException {
    assumeFalse(isWindowsEnvironment());
    final RepositoryConfiguration repoConfig = createConfig();
    final IndexManager indexManager = new SimpleIndexManager(repoConfig);
    final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 1, EventReporter.NO_OP);
    final ProvenanceEventRecord event = createEvent();
    index.addEvent(event, new StorageSummary(event.getEventId(), "1.prov", "1", 1, 2L, 2L));
    final Query query = new Query(UUID.randomUUID().toString());
    final ArrayListEventStore eventStore = new ArrayListEventStore();
    eventStore.addEvent(event);
    index.initialize(eventStore);
    // We don't know how long it will take for the event to be indexed, so keep querying until
    // we get a result. The test will timeout after 5 seconds if we've still not succeeded.
    List<ProvenanceEventRecord> matchingEvents = Collections.emptyList();
    while (matchingEvents.isEmpty()) {
        final QuerySubmission submission = index.submitQuery(query, EventAuthorizer.GRANT_ALL, "unit test user");
        assertNotNull(submission);
        final QueryResult result = submission.getResult();
        assertNotNull(result);
        result.awaitCompletion(100, TimeUnit.MILLISECONDS);
        assertTrue(result.isFinished());
        assertNull(result.getError());
        matchingEvents = result.getMatchingEvents();
        assertNotNull(matchingEvents);
        // avoid crushing the CPU
        Thread.sleep(100L);
    }
    assertEquals(1, matchingEvents.size());
    assertEquals(event, matchingEvents.get(0));
}
Also used : SimpleIndexManager(org.apache.nifi.provenance.lucene.SimpleIndexManager) IndexManager(org.apache.nifi.provenance.lucene.IndexManager) QueryResult(org.apache.nifi.provenance.search.QueryResult) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) QuerySubmission(org.apache.nifi.provenance.search.QuerySubmission) Query(org.apache.nifi.provenance.search.Query) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) SimpleIndexManager(org.apache.nifi.provenance.lucene.SimpleIndexManager) ArrayListEventStore(org.apache.nifi.provenance.store.ArrayListEventStore) Test(org.junit.Test)

Example 12 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class TestLuceneEventIndex method testQuerySpecificField.

@Test(timeout = 50000)
public void testQuerySpecificField() throws InterruptedException {
    final RepositoryConfiguration repoConfig = createConfig();
    final IndexManager indexManager = new SimpleIndexManager(repoConfig);
    final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 2, EventReporter.NO_OP);
    // add 2 events, one of which we will query for.
    final ProvenanceEventRecord event = createEvent();
    index.addEvent(event, new StorageSummary(event.getEventId(), "1.prov", "1", 1, 2L, 2L));
    index.addEvent(createEvent(), new StorageSummary(2L, "1.prov", "1", 1, 2L, 2L));
    // Create a query that searches for the event with the FlowFile UUID equal to the first event's.
    final Query query = new Query(UUID.randomUUID().toString());
    query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.FlowFileUUID, event.getFlowFileUuid()));
    final ArrayListEventStore eventStore = new ArrayListEventStore();
    eventStore.addEvent(event);
    index.initialize(eventStore);
    // We don't know how long it will take for the event to be indexed, so keep querying until
    // we get a result. The test will timeout after 5 seconds if we've still not succeeded.
    List<ProvenanceEventRecord> matchingEvents = Collections.emptyList();
    while (matchingEvents.isEmpty()) {
        final QuerySubmission submission = index.submitQuery(query, EventAuthorizer.GRANT_ALL, "unit test user");
        assertNotNull(submission);
        final QueryResult result = submission.getResult();
        assertNotNull(result);
        result.awaitCompletion(100, TimeUnit.MILLISECONDS);
        assertTrue(result.isFinished());
        assertNull(result.getError());
        matchingEvents = result.getMatchingEvents();
        assertNotNull(matchingEvents);
        // avoid crushing the CPU
        Thread.sleep(100L);
    }
    assertEquals(1, matchingEvents.size());
    assertEquals(event, matchingEvents.get(0));
}
Also used : SimpleIndexManager(org.apache.nifi.provenance.lucene.SimpleIndexManager) IndexManager(org.apache.nifi.provenance.lucene.IndexManager) QueryResult(org.apache.nifi.provenance.search.QueryResult) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) QuerySubmission(org.apache.nifi.provenance.search.QuerySubmission) Query(org.apache.nifi.provenance.search.Query) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) SimpleIndexManager(org.apache.nifi.provenance.lucene.SimpleIndexManager) ArrayListEventStore(org.apache.nifi.provenance.store.ArrayListEventStore) Test(org.junit.Test)

Example 13 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class LuceneEventIndex method reindexEvents.

@Override
public void reindexEvents(final Map<ProvenanceEventRecord, StorageSummary> events) {
    final EventIndexTask indexTask = new EventIndexTask(documentQueue, config, indexManager, directoryManager, EventIndexTask.DEFAULT_MAX_EVENTS_PER_COMMIT, eventReporter);
    File lastIndexDir = null;
    long lastEventTime = -2L;
    final List<IndexableDocument> indexableDocs = new ArrayList<>(events.size());
    for (final Map.Entry<ProvenanceEventRecord, StorageSummary> entry : events.entrySet()) {
        final ProvenanceEventRecord event = entry.getKey();
        final StorageSummary summary = entry.getValue();
        for (final CachedQuery cachedQuery : cachedQueries) {
            cachedQuery.update(event, summary);
        }
        final Document document = eventConverter.convert(event, summary);
        if (document == null) {
            logger.debug("Received Provenance Event {} to index but it contained no information that should be indexed, so skipping it", event.getEventId());
        } else {
            final File indexDir;
            if (event.getEventTime() == lastEventTime) {
                indexDir = lastIndexDir;
            } else {
                final List<File> files = getDirectoryManager().getDirectories(event.getEventTime(), null);
                indexDir = files.isEmpty() ? null : files.get(0);
                lastIndexDir = indexDir;
            }
            final IndexableDocument doc = new IndexableDocument(document, summary, indexDir);
            indexableDocs.add(doc);
        }
    }
    try {
        indexTask.reIndex(indexableDocs, CommitPreference.PREVENT_COMMIT);
    } catch (final IOException ioe) {
        logger.error("Failed to reindex some Provenance Events", ioe);
        eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to re-index some Provenance Events. " + "Some Provenance Events may not be available for querying. See logs for more information.");
    }
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) File(java.io.File) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 14 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class WriteAheadStorePartition method addEvents.

@Override
public StorageResult addEvents(final Iterable<ProvenanceEventRecord> events) throws IOException {
    if (closed) {
        throw new IOException(this + " is closed");
    }
    // Claim a Record Writer Lease so that we have a writer to persist the events to
    boolean claimed = false;
    RecordWriterLease lease = null;
    while (!claimed) {
        lease = getLease();
        claimed = lease.tryClaim();
        if (claimed) {
            break;
        }
        if (lease.shouldRoll()) {
            tryRollover(lease);
        }
    }
    // Add the events to the writer and ensure that we always
    // relinquish the claim that we've obtained on the writer
    Map<ProvenanceEventRecord, StorageSummary> storageMap;
    final RecordWriter writer = lease.getWriter();
    try {
        storageMap = addEvents(events, writer);
    } finally {
        lease.relinquishClaim();
    }
    // Roll over the writer if necessary
    Integer eventsRolledOver = null;
    final boolean shouldRoll = lease.shouldRoll();
    try {
        if (shouldRoll && tryRollover(lease)) {
            eventsRolledOver = writer.getRecordsWritten();
        }
    } catch (final IOException ioe) {
        logger.error("Updated {} but failed to rollover to a new Event File", this, ioe);
    }
    final Integer rolloverCount = eventsRolledOver;
    return new StorageResult() {

        @Override
        public Map<ProvenanceEventRecord, StorageSummary> getStorageLocations() {
            return storageMap;
        }

        @Override
        public boolean triggeredRollover() {
            return rolloverCount != null;
        }

        @Override
        public Integer getEventsRolledOver() {
            return rolloverCount;
        }

        @Override
        public String toString() {
            return getStorageLocations().toString();
        }
    };
}
Also used : StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) IOException(java.io.IOException)

Example 15 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class WriteAheadStorePartition method addEvents.

private Map<ProvenanceEventRecord, StorageSummary> addEvents(final Iterable<ProvenanceEventRecord> events, final RecordWriter writer) throws IOException {
    final Map<ProvenanceEventRecord, StorageSummary> locationMap = new HashMap<>();
    try {
        long maxId = -1L;
        int numEvents = 0;
        for (final ProvenanceEventRecord nextEvent : events) {
            final StorageSummary writerSummary = writer.writeRecord(nextEvent);
            final StorageSummary summaryWithIndex = new StorageSummary(writerSummary.getEventId(), writerSummary.getStorageLocation(), this.partitionName, writerSummary.getBlockIndex(), writerSummary.getSerializedLength(), writerSummary.getBytesWritten());
            locationMap.put(nextEvent, summaryWithIndex);
            maxId = summaryWithIndex.getEventId();
            numEvents++;
        }
        if (numEvents == 0) {
            return locationMap;
        }
        writer.flush();
        // Update max event id to be equal to be the greater of the current value or the
        // max value just written.
        final long maxIdWritten = maxId;
        this.maxEventId.getAndUpdate(cur -> maxIdWritten > cur ? maxIdWritten : cur);
        if (config.isAlwaysSync()) {
            writer.sync();
        }
    } catch (final Exception e) {
        // We need to set the repoDirty flag before we release the lock for this journal.
        // Otherwise, another thread may write to this journal -- this is a problem because
        // the journal contains part of our record but not all of it. Writing to the end of this
        // journal will result in corruption!
        writer.markDirty();
        throw e;
    }
    return locationMap;
}
Also used : StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) IOException(java.io.IOException) EOFException(java.io.EOFException) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

StorageSummary (org.apache.nifi.provenance.serialization.StorageSummary)16 IOException (java.io.IOException)10 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)9 File (java.io.File)6 StandardProvenanceEventRecord (org.apache.nifi.provenance.StandardProvenanceEventRecord)6 Test (org.junit.Test)5 EOFException (java.io.EOFException)4 ExecutionException (java.util.concurrent.ExecutionException)4 RepositoryConfiguration (org.apache.nifi.provenance.RepositoryConfiguration)4 HashMap (java.util.HashMap)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 IndexManager (org.apache.nifi.provenance.lucene.IndexManager)3 TocWriter (org.apache.nifi.provenance.toc.TocWriter)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 DataOutputStream (java.io.DataOutputStream)2 FileNotFoundException (java.io.FileNotFoundException)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)2