Search in sources :

Example 1 with StandardProvenanceEventRecord

use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.

the class EventRecord method getEvent.

@SuppressWarnings("unchecked")
public static StandardProvenanceEventRecord getEvent(final Record record, final String storageFilename, final long storageByteOffset, final int maxAttributeLength) {
    final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder();
    builder.setAlternateIdentifierUri((String) record.getFieldValue(EventFieldNames.ALTERNATE_IDENTIFIER));
    builder.setChildUuids((List<String>) record.getFieldValue(EventFieldNames.CHILD_UUIDS));
    builder.setComponentId((String) record.getFieldValue(EventFieldNames.COMPONENT_ID));
    builder.setComponentType((String) record.getFieldValue(EventFieldNames.COMPONENT_TYPE));
    builder.setDetails((String) record.getFieldValue(EventFieldNames.EVENT_DETAILS));
    builder.setEventDuration((Long) record.getFieldValue(EventFieldNames.EVENT_DURATION));
    builder.setEventTime((Long) record.getFieldValue(EventFieldNames.EVENT_TIME));
    builder.setEventType(ProvenanceEventType.valueOf((String) record.getFieldValue(EventFieldNames.EVENT_TYPE)));
    builder.setFlowFileEntryDate((Long) record.getFieldValue(EventFieldNames.FLOWFILE_ENTRY_DATE));
    builder.setFlowFileUUID((String) record.getFieldValue(EventFieldNames.FLOWFILE_UUID));
    builder.setLineageStartDate((Long) record.getFieldValue(EventFieldNames.LINEAGE_START_DATE));
    builder.setParentUuids((List<String>) record.getFieldValue(EventFieldNames.PARENT_UUIDS));
    builder.setPreviousAttributes(truncateAttributes((Map<String, String>) record.getFieldValue(EventFieldNames.PREVIOUS_ATTRIBUTES), maxAttributeLength));
    builder.setRelationship((String) record.getFieldValue(EventFieldNames.RELATIONSHIP));
    builder.setSourceQueueIdentifier((String) record.getFieldValue(EventFieldNames.SOURCE_QUEUE_IDENTIFIER));
    builder.setSourceSystemFlowFileIdentifier((String) record.getFieldValue(EventFieldNames.SOURCE_SYSTEM_FLOWFILE_IDENTIFIER));
    builder.setTransitUri((String) record.getFieldValue(EventFieldNames.TRANSIT_URI));
    builder.setUpdatedAttributes(truncateAttributes((Map<String, String>) record.getFieldValue(EventFieldNames.UPDATED_ATTRIBUTES), maxAttributeLength));
    final Long eventId = (Long) record.getFieldValue(EventFieldNames.EVENT_IDENTIFIER);
    if (eventId != null) {
        builder.setEventId(eventId);
    }
    builder.setStorageLocation(storageFilename, storageByteOffset);
    final Record currentClaimRecord = (Record) record.getFieldValue(EventFieldNames.CONTENT_CLAIM);
    if (currentClaimRecord == null) {
        builder.setCurrentContentClaim(null, null, null, null, 0L);
    } else {
        builder.setCurrentContentClaim((String) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_CONTAINER), (String) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_SECTION), (String) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_IDENTIFIER), (Long) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_OFFSET), (Long) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_SIZE));
    }
    final Record previousClaimRecord = (Record) record.getFieldValue(EventFieldNames.PREVIOUS_CONTENT_CLAIM);
    if (previousClaimRecord != null) {
        builder.setPreviousContentClaim((String) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_CONTAINER), (String) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_SECTION), (String) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_IDENTIFIER), (Long) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_OFFSET), (Long) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_SIZE));
    }
    return builder.build();
}
Also used : StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) Record(org.apache.nifi.repository.schema.Record) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) FieldMapRecord(org.apache.nifi.repository.schema.FieldMapRecord) Map(java.util.Map) HashMap(java.util.HashMap)

Example 2 with StandardProvenanceEventRecord

use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.

the class CompressableRecordReader method nextRecord.

@Override
public StandardProvenanceEventRecord nextRecord() throws IOException {
    if (pushbackEvent != null) {
        final StandardProvenanceEventRecord toReturn = pushbackEvent;
        pushbackEvent = null;
        return toReturn;
    }
    if (isData()) {
        while (true) {
            try {
                return nextRecord(dis, serializationVersion);
            } catch (final IOException ioe) {
                throw ioe;
            } catch (final Exception e) {
                // This would only happen if a bug were to exist such that an 'invalid' event were written
                // out. For example an Event that has no FlowFile UUID. While there is in fact an underlying
                // cause that would need to be sorted out in this case, the Provenance Repository should be
                // resilient enough to handle this. Otherwise, we end up throwing an Exception, which may
                // prevent iterating over additional events in the repository.
                logger.error("Failed to read Provenance Event from " + filename + "; will skip this event and continue reading subsequent events", e);
            }
        }
    } else {
        return null;
    }
}
Also used : StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) IOException(java.io.IOException) IOException(java.io.IOException) EOFException(java.io.EOFException)

Example 3 with StandardProvenanceEventRecord

use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.

the class WriteAheadStorePartition method reindexLatestEvents.

void reindexLatestEvents(final EventIndex eventIndex) {
    final List<File> eventFiles = getEventFilesFromDisk().sorted(DirectoryUtils.SMALLEST_ID_FIRST).collect(Collectors.toList());
    if (eventFiles.isEmpty()) {
        return;
    }
    final long minEventIdToReindex = eventIndex.getMinimumEventIdToReindex(partitionName);
    final long maxEventId = getMaxEventId();
    final long eventsToReindex = maxEventId - minEventIdToReindex;
    logger.info("The last Provenance Event indexed for partition {} is {}, but the last event written to partition has ID {}. " + "Re-indexing up to the last {} events to ensure that the Event Index is accurate and up-to-date", partitionName, minEventIdToReindex, maxEventId, eventsToReindex, partitionDirectory);
    // Find the first event file that we care about.
    int firstEventFileIndex = 0;
    for (int i = eventFiles.size() - 1; i >= 0; i--) {
        final File eventFile = eventFiles.get(i);
        final long minIdInFile = DirectoryUtils.getMinId(eventFile);
        if (minIdInFile <= minEventIdToReindex) {
            firstEventFileIndex = i;
            break;
        }
    }
    // Create a subList that contains the files of interest
    final List<File> eventFilesToReindex = eventFiles.subList(firstEventFileIndex, eventFiles.size());
    final ExecutorService executor = Executors.newFixedThreadPool(Math.min(4, eventFilesToReindex.size()), new NamedThreadFactory("Re-Index Provenance Events", true));
    final List<Future<?>> futures = new ArrayList<>(eventFilesToReindex.size());
    final AtomicLong reindexedCount = new AtomicLong(0L);
    // Re-Index the last bunch of events.
    // We don't use an Event Iterator here because it's possible that one of the event files could be corrupt (for example, if NiFi does while
    // writing to the file, a record may be incomplete). We don't want to prevent us from moving on and continuing to index the rest of the
    // un-indexed events. So we just use a List of files and create a reader for each one.
    final long start = System.nanoTime();
    int fileCount = 0;
    for (final File eventFile : eventFilesToReindex) {
        final boolean skipToEvent;
        if (fileCount++ == 0) {
            skipToEvent = true;
        } else {
            skipToEvent = false;
        }
        final Runnable reindexTask = new Runnable() {

            @Override
            public void run() {
                final Map<ProvenanceEventRecord, StorageSummary> storageMap = new HashMap<>(1000);
                try (final RecordReader recordReader = recordReaderFactory.newRecordReader(eventFile, Collections.emptyList(), Integer.MAX_VALUE)) {
                    if (skipToEvent) {
                        final Optional<ProvenanceEventRecord> eventOption = recordReader.skipToEvent(minEventIdToReindex);
                        if (!eventOption.isPresent()) {
                            return;
                        }
                    }
                    StandardProvenanceEventRecord event = null;
                    while (true) {
                        final long startBytesConsumed = recordReader.getBytesConsumed();
                        event = recordReader.nextRecord();
                        if (event == null) {
                            eventIndex.reindexEvents(storageMap);
                            reindexedCount.addAndGet(storageMap.size());
                            storageMap.clear();
                            // stop reading from this file
                            break;
                        } else {
                            final long eventSize = recordReader.getBytesConsumed() - startBytesConsumed;
                            storageMap.put(event, new StorageSummary(event.getEventId(), eventFile.getName(), partitionName, recordReader.getBlockIndex(), eventSize, 0L));
                            if (storageMap.size() == 1000) {
                                eventIndex.reindexEvents(storageMap);
                                reindexedCount.addAndGet(storageMap.size());
                                storageMap.clear();
                            }
                        }
                    }
                } catch (final EOFException eof) {
                    // Ran out of data. Continue on.
                    logger.warn("Failed to find event with ID {} in Event File {} due to {}", minEventIdToReindex, eventFile, eof.toString());
                } catch (final Exception e) {
                    logger.error("Failed to index Provenance Events found in {}", eventFile, e);
                }
            }
        };
        futures.add(executor.submit(reindexTask));
    }
    for (final Future<?> future : futures) {
        try {
            future.get();
        } catch (final ExecutionException ee) {
            logger.error("Failed to re-index some Provenance events. These events may not be query-able via the Provenance interface", ee.getCause());
        } catch (final InterruptedException e) {
            Thread.currentThread().interrupt();
            logger.error("Interrupted while waiting for Provenance events to be re-indexed", e);
            break;
        }
    }
    try {
        eventIndex.commitChanges(partitionName);
    } catch (final IOException e) {
        logger.error("Failed to re-index Provenance Events for partition " + partitionName, e);
    }
    executor.shutdown();
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    final long seconds = millis / 1000L;
    final long millisRemainder = millis % 1000L;
    logger.info("Finished re-indexing {} events across {} files for {} in {}.{} seconds", reindexedCount.get(), eventFilesToReindex.size(), partitionDirectory, seconds, millisRemainder);
}
Also used : HashMap(java.util.HashMap) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) EOFException(java.io.EOFException) ExecutionException(java.util.concurrent.ExecutionException) NamedThreadFactory(org.apache.nifi.provenance.util.NamedThreadFactory) IOException(java.io.IOException) IOException(java.io.IOException) EOFException(java.io.EOFException) ExecutionException(java.util.concurrent.ExecutionException) AtomicLong(java.util.concurrent.atomic.AtomicLong) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) File(java.io.File)

Example 4 with StandardProvenanceEventRecord

use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.

the class StandardProcessSession method enrich.

@Override
public StandardProvenanceEventRecord enrich(final ProvenanceEventRecord rawEvent, final FlowFile flowFile) {
    verifyTaskActive();
    final StandardRepositoryRecord repoRecord = records.get(flowFile);
    if (repoRecord == null) {
        throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")");
    }
    final StandardProvenanceEventRecord.Builder recordBuilder = new StandardProvenanceEventRecord.Builder().fromEvent(rawEvent);
    if (repoRecord.getCurrent() != null && repoRecord.getCurrentClaim() != null) {
        final ContentClaim currentClaim = repoRecord.getCurrentClaim();
        final long currentOffset = repoRecord.getCurrentClaimOffset();
        final long size = flowFile.getSize();
        final ResourceClaim resourceClaim = currentClaim.getResourceClaim();
        recordBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), currentOffset + currentClaim.getOffset(), size);
    }
    if (repoRecord.getOriginal() != null && repoRecord.getOriginalClaim() != null) {
        final ContentClaim originalClaim = repoRecord.getOriginalClaim();
        final long originalOffset = repoRecord.getOriginal().getContentClaimOffset();
        final long originalSize = repoRecord.getOriginal().getSize();
        final ResourceClaim resourceClaim = originalClaim.getResourceClaim();
        recordBuilder.setPreviousContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), originalOffset + originalClaim.getOffset(), originalSize);
    }
    final FlowFileQueue originalQueue = repoRecord.getOriginalQueue();
    if (originalQueue != null) {
        recordBuilder.setSourceQueueIdentifier(originalQueue.getIdentifier());
    }
    recordBuilder.setAttributes(repoRecord.getOriginalAttributes(), repoRecord.getUpdatedAttributes());
    return recordBuilder.build();
}
Also used : StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) FlowFileQueue(org.apache.nifi.controller.queue.FlowFileQueue)

Example 5 with StandardProvenanceEventRecord

use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.

the class StandardProcessSession method enrich.

private StandardProvenanceEventRecord enrich(final ProvenanceEventRecord rawEvent, final Map<String, FlowFileRecord> flowFileRecordMap, final Map<FlowFileRecord, StandardRepositoryRecord> records, final boolean updateAttributes) {
    final StandardProvenanceEventRecord.Builder recordBuilder = new StandardProvenanceEventRecord.Builder().fromEvent(rawEvent);
    final FlowFileRecord eventFlowFile = flowFileRecordMap.get(rawEvent.getFlowFileUuid());
    if (eventFlowFile != null) {
        final StandardRepositoryRecord repoRecord = records.get(eventFlowFile);
        if (repoRecord.getCurrent() != null && repoRecord.getCurrentClaim() != null) {
            final ContentClaim currentClaim = repoRecord.getCurrentClaim();
            final long currentOffset = repoRecord.getCurrentClaimOffset();
            final long size = eventFlowFile.getSize();
            final ResourceClaim resourceClaim = currentClaim.getResourceClaim();
            recordBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), currentOffset + currentClaim.getOffset(), size);
        }
        if (repoRecord.getOriginal() != null && repoRecord.getOriginalClaim() != null) {
            final ContentClaim originalClaim = repoRecord.getOriginalClaim();
            final long originalOffset = repoRecord.getOriginal().getContentClaimOffset();
            final long originalSize = repoRecord.getOriginal().getSize();
            final ResourceClaim resourceClaim = originalClaim.getResourceClaim();
            recordBuilder.setPreviousContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), originalOffset + originalClaim.getOffset(), originalSize);
        }
        final FlowFileQueue originalQueue = repoRecord.getOriginalQueue();
        if (originalQueue != null) {
            recordBuilder.setSourceQueueIdentifier(originalQueue.getIdentifier());
        }
    }
    if (updateAttributes) {
        final FlowFileRecord flowFileRecord = flowFileRecordMap.get(rawEvent.getFlowFileUuid());
        if (flowFileRecord != null) {
            final StandardRepositoryRecord record = records.get(flowFileRecord);
            if (record != null) {
                recordBuilder.setAttributes(record.getOriginalAttributes(), record.getUpdatedAttributes());
            }
        }
    }
    return recordBuilder.build();
}
Also used : StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) FlowFileQueue(org.apache.nifi.controller.queue.FlowFileQueue)

Aggregations

StandardProvenanceEventRecord (org.apache.nifi.provenance.StandardProvenanceEventRecord)8 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)3 EOFException (java.io.EOFException)2 File (java.io.File)2 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 FlowFileQueue (org.apache.nifi.controller.queue.FlowFileQueue)2 ContentClaim (org.apache.nifi.controller.repository.claim.ContentClaim)2 ResourceClaim (org.apache.nifi.controller.repository.claim.ResourceClaim)2 RecordReader (org.apache.nifi.provenance.serialization.RecordReader)2 FieldMapRecord (org.apache.nifi.repository.schema.FieldMapRecord)2 Record (org.apache.nifi.repository.schema.Record)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 IndexableField (org.apache.lucene.index.IndexableField)1