use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.
the class EventRecord method getEvent.
@SuppressWarnings("unchecked")
public static StandardProvenanceEventRecord getEvent(final Record record, final String storageFilename, final long storageByteOffset, final int maxAttributeLength) {
final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder();
builder.setAlternateIdentifierUri((String) record.getFieldValue(EventFieldNames.ALTERNATE_IDENTIFIER));
builder.setChildUuids((List<String>) record.getFieldValue(EventFieldNames.CHILD_UUIDS));
builder.setComponentId((String) record.getFieldValue(EventFieldNames.COMPONENT_ID));
builder.setComponentType((String) record.getFieldValue(EventFieldNames.COMPONENT_TYPE));
builder.setDetails((String) record.getFieldValue(EventFieldNames.EVENT_DETAILS));
builder.setEventDuration((Long) record.getFieldValue(EventFieldNames.EVENT_DURATION));
builder.setEventTime((Long) record.getFieldValue(EventFieldNames.EVENT_TIME));
builder.setEventType(ProvenanceEventType.valueOf((String) record.getFieldValue(EventFieldNames.EVENT_TYPE)));
builder.setFlowFileEntryDate((Long) record.getFieldValue(EventFieldNames.FLOWFILE_ENTRY_DATE));
builder.setFlowFileUUID((String) record.getFieldValue(EventFieldNames.FLOWFILE_UUID));
builder.setLineageStartDate((Long) record.getFieldValue(EventFieldNames.LINEAGE_START_DATE));
builder.setParentUuids((List<String>) record.getFieldValue(EventFieldNames.PARENT_UUIDS));
builder.setPreviousAttributes(truncateAttributes((Map<String, String>) record.getFieldValue(EventFieldNames.PREVIOUS_ATTRIBUTES), maxAttributeLength));
builder.setRelationship((String) record.getFieldValue(EventFieldNames.RELATIONSHIP));
builder.setSourceQueueIdentifier((String) record.getFieldValue(EventFieldNames.SOURCE_QUEUE_IDENTIFIER));
builder.setSourceSystemFlowFileIdentifier((String) record.getFieldValue(EventFieldNames.SOURCE_SYSTEM_FLOWFILE_IDENTIFIER));
builder.setTransitUri((String) record.getFieldValue(EventFieldNames.TRANSIT_URI));
builder.setUpdatedAttributes(truncateAttributes((Map<String, String>) record.getFieldValue(EventFieldNames.UPDATED_ATTRIBUTES), maxAttributeLength));
final Long eventId = (Long) record.getFieldValue(EventFieldNames.EVENT_IDENTIFIER);
if (eventId != null) {
builder.setEventId(eventId);
}
builder.setStorageLocation(storageFilename, storageByteOffset);
final Record currentClaimRecord = (Record) record.getFieldValue(EventFieldNames.CONTENT_CLAIM);
if (currentClaimRecord == null) {
builder.setCurrentContentClaim(null, null, null, null, 0L);
} else {
builder.setCurrentContentClaim((String) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_CONTAINER), (String) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_SECTION), (String) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_IDENTIFIER), (Long) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_OFFSET), (Long) currentClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_SIZE));
}
final Record previousClaimRecord = (Record) record.getFieldValue(EventFieldNames.PREVIOUS_CONTENT_CLAIM);
if (previousClaimRecord != null) {
builder.setPreviousContentClaim((String) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_CONTAINER), (String) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_SECTION), (String) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_IDENTIFIER), (Long) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_OFFSET), (Long) previousClaimRecord.getFieldValue(EventFieldNames.CONTENT_CLAIM_SIZE));
}
return builder.build();
}
use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.
the class CompressableRecordReader method nextRecord.
@Override
public StandardProvenanceEventRecord nextRecord() throws IOException {
if (pushbackEvent != null) {
final StandardProvenanceEventRecord toReturn = pushbackEvent;
pushbackEvent = null;
return toReturn;
}
if (isData()) {
while (true) {
try {
return nextRecord(dis, serializationVersion);
} catch (final IOException ioe) {
throw ioe;
} catch (final Exception e) {
// This would only happen if a bug were to exist such that an 'invalid' event were written
// out. For example an Event that has no FlowFile UUID. While there is in fact an underlying
// cause that would need to be sorted out in this case, the Provenance Repository should be
// resilient enough to handle this. Otherwise, we end up throwing an Exception, which may
// prevent iterating over additional events in the repository.
logger.error("Failed to read Provenance Event from " + filename + "; will skip this event and continue reading subsequent events", e);
}
}
} else {
return null;
}
}
use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.
the class WriteAheadStorePartition method reindexLatestEvents.
void reindexLatestEvents(final EventIndex eventIndex) {
final List<File> eventFiles = getEventFilesFromDisk().sorted(DirectoryUtils.SMALLEST_ID_FIRST).collect(Collectors.toList());
if (eventFiles.isEmpty()) {
return;
}
final long minEventIdToReindex = eventIndex.getMinimumEventIdToReindex(partitionName);
final long maxEventId = getMaxEventId();
final long eventsToReindex = maxEventId - minEventIdToReindex;
logger.info("The last Provenance Event indexed for partition {} is {}, but the last event written to partition has ID {}. " + "Re-indexing up to the last {} events to ensure that the Event Index is accurate and up-to-date", partitionName, minEventIdToReindex, maxEventId, eventsToReindex, partitionDirectory);
// Find the first event file that we care about.
int firstEventFileIndex = 0;
for (int i = eventFiles.size() - 1; i >= 0; i--) {
final File eventFile = eventFiles.get(i);
final long minIdInFile = DirectoryUtils.getMinId(eventFile);
if (minIdInFile <= minEventIdToReindex) {
firstEventFileIndex = i;
break;
}
}
// Create a subList that contains the files of interest
final List<File> eventFilesToReindex = eventFiles.subList(firstEventFileIndex, eventFiles.size());
final ExecutorService executor = Executors.newFixedThreadPool(Math.min(4, eventFilesToReindex.size()), new NamedThreadFactory("Re-Index Provenance Events", true));
final List<Future<?>> futures = new ArrayList<>(eventFilesToReindex.size());
final AtomicLong reindexedCount = new AtomicLong(0L);
// Re-Index the last bunch of events.
// We don't use an Event Iterator here because it's possible that one of the event files could be corrupt (for example, if NiFi does while
// writing to the file, a record may be incomplete). We don't want to prevent us from moving on and continuing to index the rest of the
// un-indexed events. So we just use a List of files and create a reader for each one.
final long start = System.nanoTime();
int fileCount = 0;
for (final File eventFile : eventFilesToReindex) {
final boolean skipToEvent;
if (fileCount++ == 0) {
skipToEvent = true;
} else {
skipToEvent = false;
}
final Runnable reindexTask = new Runnable() {
@Override
public void run() {
final Map<ProvenanceEventRecord, StorageSummary> storageMap = new HashMap<>(1000);
try (final RecordReader recordReader = recordReaderFactory.newRecordReader(eventFile, Collections.emptyList(), Integer.MAX_VALUE)) {
if (skipToEvent) {
final Optional<ProvenanceEventRecord> eventOption = recordReader.skipToEvent(minEventIdToReindex);
if (!eventOption.isPresent()) {
return;
}
}
StandardProvenanceEventRecord event = null;
while (true) {
final long startBytesConsumed = recordReader.getBytesConsumed();
event = recordReader.nextRecord();
if (event == null) {
eventIndex.reindexEvents(storageMap);
reindexedCount.addAndGet(storageMap.size());
storageMap.clear();
// stop reading from this file
break;
} else {
final long eventSize = recordReader.getBytesConsumed() - startBytesConsumed;
storageMap.put(event, new StorageSummary(event.getEventId(), eventFile.getName(), partitionName, recordReader.getBlockIndex(), eventSize, 0L));
if (storageMap.size() == 1000) {
eventIndex.reindexEvents(storageMap);
reindexedCount.addAndGet(storageMap.size());
storageMap.clear();
}
}
}
} catch (final EOFException eof) {
// Ran out of data. Continue on.
logger.warn("Failed to find event with ID {} in Event File {} due to {}", minEventIdToReindex, eventFile, eof.toString());
} catch (final Exception e) {
logger.error("Failed to index Provenance Events found in {}", eventFile, e);
}
}
};
futures.add(executor.submit(reindexTask));
}
for (final Future<?> future : futures) {
try {
future.get();
} catch (final ExecutionException ee) {
logger.error("Failed to re-index some Provenance events. These events may not be query-able via the Provenance interface", ee.getCause());
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
logger.error("Interrupted while waiting for Provenance events to be re-indexed", e);
break;
}
}
try {
eventIndex.commitChanges(partitionName);
} catch (final IOException e) {
logger.error("Failed to re-index Provenance Events for partition " + partitionName, e);
}
executor.shutdown();
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
final long seconds = millis / 1000L;
final long millisRemainder = millis % 1000L;
logger.info("Finished re-indexing {} events across {} files for {} in {}.{} seconds", reindexedCount.get(), eventFilesToReindex.size(), partitionDirectory, seconds, millisRemainder);
}
use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.
the class StandardProcessSession method enrich.
@Override
public StandardProvenanceEventRecord enrich(final ProvenanceEventRecord rawEvent, final FlowFile flowFile) {
verifyTaskActive();
final StandardRepositoryRecord repoRecord = records.get(flowFile);
if (repoRecord == null) {
throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")");
}
final StandardProvenanceEventRecord.Builder recordBuilder = new StandardProvenanceEventRecord.Builder().fromEvent(rawEvent);
if (repoRecord.getCurrent() != null && repoRecord.getCurrentClaim() != null) {
final ContentClaim currentClaim = repoRecord.getCurrentClaim();
final long currentOffset = repoRecord.getCurrentClaimOffset();
final long size = flowFile.getSize();
final ResourceClaim resourceClaim = currentClaim.getResourceClaim();
recordBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), currentOffset + currentClaim.getOffset(), size);
}
if (repoRecord.getOriginal() != null && repoRecord.getOriginalClaim() != null) {
final ContentClaim originalClaim = repoRecord.getOriginalClaim();
final long originalOffset = repoRecord.getOriginal().getContentClaimOffset();
final long originalSize = repoRecord.getOriginal().getSize();
final ResourceClaim resourceClaim = originalClaim.getResourceClaim();
recordBuilder.setPreviousContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), originalOffset + originalClaim.getOffset(), originalSize);
}
final FlowFileQueue originalQueue = repoRecord.getOriginalQueue();
if (originalQueue != null) {
recordBuilder.setSourceQueueIdentifier(originalQueue.getIdentifier());
}
recordBuilder.setAttributes(repoRecord.getOriginalAttributes(), repoRecord.getUpdatedAttributes());
return recordBuilder.build();
}
use of org.apache.nifi.provenance.StandardProvenanceEventRecord in project nifi by apache.
the class StandardProcessSession method enrich.
private StandardProvenanceEventRecord enrich(final ProvenanceEventRecord rawEvent, final Map<String, FlowFileRecord> flowFileRecordMap, final Map<FlowFileRecord, StandardRepositoryRecord> records, final boolean updateAttributes) {
final StandardProvenanceEventRecord.Builder recordBuilder = new StandardProvenanceEventRecord.Builder().fromEvent(rawEvent);
final FlowFileRecord eventFlowFile = flowFileRecordMap.get(rawEvent.getFlowFileUuid());
if (eventFlowFile != null) {
final StandardRepositoryRecord repoRecord = records.get(eventFlowFile);
if (repoRecord.getCurrent() != null && repoRecord.getCurrentClaim() != null) {
final ContentClaim currentClaim = repoRecord.getCurrentClaim();
final long currentOffset = repoRecord.getCurrentClaimOffset();
final long size = eventFlowFile.getSize();
final ResourceClaim resourceClaim = currentClaim.getResourceClaim();
recordBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), currentOffset + currentClaim.getOffset(), size);
}
if (repoRecord.getOriginal() != null && repoRecord.getOriginalClaim() != null) {
final ContentClaim originalClaim = repoRecord.getOriginalClaim();
final long originalOffset = repoRecord.getOriginal().getContentClaimOffset();
final long originalSize = repoRecord.getOriginal().getSize();
final ResourceClaim resourceClaim = originalClaim.getResourceClaim();
recordBuilder.setPreviousContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), originalOffset + originalClaim.getOffset(), originalSize);
}
final FlowFileQueue originalQueue = repoRecord.getOriginalQueue();
if (originalQueue != null) {
recordBuilder.setSourceQueueIdentifier(originalQueue.getIdentifier());
}
}
if (updateAttributes) {
final FlowFileRecord flowFileRecord = flowFileRecordMap.get(rawEvent.getFlowFileUuid());
if (flowFileRecord != null) {
final StandardRepositoryRecord record = records.get(flowFileRecord);
if (record != null) {
recordBuilder.setAttributes(record.getOriginalAttributes(), record.getUpdatedAttributes());
}
}
}
return recordBuilder.build();
}
Aggregations