use of org.apache.nifi.controller.queue.FlowFileQueue in project nifi by apache.
the class NiFiRegistryFlowMapper method mapConnection.
public VersionedConnection mapConnection(final Connection connection) {
final FlowFileQueue queue = connection.getFlowFileQueue();
final VersionedConnection versionedConnection = new InstantiatedVersionedConnection(connection.getIdentifier(), connection.getProcessGroup().getIdentifier());
versionedConnection.setIdentifier(getId(connection.getVersionedComponentId(), connection.getIdentifier()));
versionedConnection.setGroupIdentifier(getGroupId(connection.getProcessGroup().getIdentifier()));
versionedConnection.setName(connection.getName());
versionedConnection.setBackPressureDataSizeThreshold(queue.getBackPressureDataSizeThreshold());
versionedConnection.setBackPressureObjectThreshold(queue.getBackPressureObjectThreshold());
versionedConnection.setFlowFileExpiration(queue.getFlowFileExpiration());
versionedConnection.setLabelIndex(connection.getLabelIndex());
versionedConnection.setPrioritizers(queue.getPriorities().stream().map(p -> p.getClass().getName()).collect(Collectors.toList()));
versionedConnection.setSelectedRelationships(connection.getRelationships().stream().map(Relationship::getName).collect(Collectors.toSet()));
versionedConnection.setzIndex(connection.getZIndex());
versionedConnection.setBends(connection.getBendPoints().stream().map(this::mapPosition).collect(Collectors.toList()));
versionedConnection.setSource(mapConnectable(connection.getSource()));
versionedConnection.setDestination(mapConnectable(connection.getDestination()));
return versionedConnection;
}
use of org.apache.nifi.controller.queue.FlowFileQueue in project nifi by apache.
the class StandardProcessSession method commit.
@SuppressWarnings({ "unchecked", "rawtypes" })
private void commit(final Checkpoint checkpoint) {
try {
final long commitStartNanos = System.nanoTime();
resetReadClaim();
try {
claimCache.flush();
} finally {
claimCache.reset();
}
final long updateProvenanceStart = System.nanoTime();
updateProvenanceRepo(checkpoint);
final long claimRemovalStart = System.nanoTime();
final long updateProvenanceNanos = claimRemovalStart - updateProvenanceStart;
/**
* Figure out which content claims can be released. At this point,
* we will decrement the Claimant Count for the claims via the
* Content Repository. We do not actually destroy the content
* because otherwise, we could remove the Original Claim and
* crash/restart before the FlowFileRepository is updated. This will
* result in the FlowFile being restored such that the content claim
* points to the Original Claim -- which has already been removed!
*/
for (final Map.Entry<FlowFileRecord, StandardRepositoryRecord> entry : checkpoint.records.entrySet()) {
final FlowFile flowFile = entry.getKey();
final StandardRepositoryRecord record = entry.getValue();
if (record.isMarkedForDelete()) {
// if the working claim is not the same as the original claim, we can immediately destroy the working claim
// because it was created in this session and is to be deleted. We don't need to wait for the FlowFile Repo to sync.
decrementClaimCount(record.getWorkingClaim());
if (record.getOriginalClaim() != null && !record.getOriginalClaim().equals(record.getWorkingClaim())) {
// if working & original claim are same, don't remove twice; we only want to remove the original
// if it's different from the working. Otherwise, we remove two claimant counts. This causes
// an issue if we only updated the FlowFile attributes.
decrementClaimCount(record.getOriginalClaim());
}
final long flowFileLife = System.currentTimeMillis() - flowFile.getEntryDate();
final Connectable connectable = context.getConnectable();
final Object terminator = connectable instanceof ProcessorNode ? ((ProcessorNode) connectable).getProcessor() : connectable;
LOG.info("{} terminated by {}; life of FlowFile = {} ms", new Object[] { flowFile, terminator, flowFileLife });
} else if (record.isWorking() && record.getWorkingClaim() != record.getOriginalClaim()) {
// records which have been updated - remove original if exists
decrementClaimCount(record.getOriginalClaim());
}
}
final long claimRemovalFinishNanos = System.nanoTime();
final long claimRemovalNanos = claimRemovalFinishNanos - claimRemovalStart;
// Update the FlowFile Repository
try {
final Collection<StandardRepositoryRecord> repoRecords = checkpoint.records.values();
context.getFlowFileRepository().updateRepository((Collection) repoRecords);
} catch (final IOException ioe) {
// if we fail to commit the session, we need to roll back
// the checkpoints as well because none of the checkpoints
// were ever committed.
rollback(false, true);
throw new ProcessException("FlowFile Repository failed to update", ioe);
}
final long flowFileRepoUpdateFinishNanos = System.nanoTime();
final long flowFileRepoUpdateNanos = flowFileRepoUpdateFinishNanos - claimRemovalFinishNanos;
updateEventRepository(checkpoint);
final long updateEventRepositoryFinishNanos = System.nanoTime();
final long updateEventRepositoryNanos = updateEventRepositoryFinishNanos - flowFileRepoUpdateFinishNanos;
// transfer the flowfiles to the connections' queues.
final Map<FlowFileQueue, Collection<FlowFileRecord>> recordMap = new HashMap<>();
for (final StandardRepositoryRecord record : checkpoint.records.values()) {
if (record.isMarkedForAbort() || record.isMarkedForDelete()) {
// these don't need to be transferred
continue;
}
// in this case, we just ignore it, and it will be cleaned up by clearing the records map.
if (record.getCurrent() != null) {
Collection<FlowFileRecord> collection = recordMap.get(record.getDestination());
if (collection == null) {
collection = new ArrayList<>();
recordMap.put(record.getDestination(), collection);
}
collection.add(record.getCurrent());
}
}
for (final Map.Entry<FlowFileQueue, Collection<FlowFileRecord>> entry : recordMap.entrySet()) {
entry.getKey().putAll(entry.getValue());
}
final long enqueueFlowFileFinishNanos = System.nanoTime();
final long enqueueFlowFileNanos = enqueueFlowFileFinishNanos - updateEventRepositoryFinishNanos;
// Delete any files from disk that need to be removed.
for (final Path path : checkpoint.deleteOnCommit.values()) {
try {
Files.deleteIfExists(path);
} catch (final IOException e) {
throw new FlowFileAccessException("Unable to delete " + path.toFile().getAbsolutePath(), e);
}
}
checkpoint.deleteOnCommit.clear();
if (LOG.isInfoEnabled()) {
final String sessionSummary = summarizeEvents(checkpoint);
if (!sessionSummary.isEmpty()) {
LOG.info("{} for {}, committed the following events: {}", new Object[] { this, connectableDescription, sessionSummary });
}
}
for (final Map.Entry<String, Long> entry : checkpoint.countersOnCommit.entrySet()) {
context.adjustCounter(entry.getKey(), entry.getValue());
}
acknowledgeRecords();
resetState();
if (LOG.isDebugEnabled()) {
final StringBuilder timingInfo = new StringBuilder();
timingInfo.append("Session commit for ").append(this).append(" [").append(connectableDescription).append("]").append(" took ");
final long commitNanos = System.nanoTime() - commitStartNanos;
formatNanos(commitNanos, timingInfo);
timingInfo.append("; FlowFile Repository Update took ");
formatNanos(flowFileRepoUpdateNanos, timingInfo);
timingInfo.append("; Claim Removal took ");
formatNanos(claimRemovalNanos, timingInfo);
timingInfo.append("; FlowFile Event Update took ");
formatNanos(updateEventRepositoryNanos, timingInfo);
timingInfo.append("; Enqueuing FlowFiles took ");
formatNanos(enqueueFlowFileNanos, timingInfo);
timingInfo.append("; Updating Provenance Event Repository took ");
formatNanos(updateProvenanceNanos, timingInfo);
LOG.debug(timingInfo.toString());
}
} catch (final Exception e) {
try {
// if we fail to commit the session, we need to roll back
// the checkpoints as well because none of the checkpoints
// were ever committed.
rollback(false, true);
} catch (final Exception e1) {
e.addSuppressed(e1);
}
if (e instanceof RuntimeException) {
throw (RuntimeException) e;
} else {
throw new ProcessException(e);
}
}
}
use of org.apache.nifi.controller.queue.FlowFileQueue in project nifi by apache.
the class StandardProcessSession method enrich.
@Override
public StandardProvenanceEventRecord enrich(final ProvenanceEventRecord rawEvent, final FlowFile flowFile) {
verifyTaskActive();
final StandardRepositoryRecord repoRecord = records.get(flowFile);
if (repoRecord == null) {
throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")");
}
final StandardProvenanceEventRecord.Builder recordBuilder = new StandardProvenanceEventRecord.Builder().fromEvent(rawEvent);
if (repoRecord.getCurrent() != null && repoRecord.getCurrentClaim() != null) {
final ContentClaim currentClaim = repoRecord.getCurrentClaim();
final long currentOffset = repoRecord.getCurrentClaimOffset();
final long size = flowFile.getSize();
final ResourceClaim resourceClaim = currentClaim.getResourceClaim();
recordBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), currentOffset + currentClaim.getOffset(), size);
}
if (repoRecord.getOriginal() != null && repoRecord.getOriginalClaim() != null) {
final ContentClaim originalClaim = repoRecord.getOriginalClaim();
final long originalOffset = repoRecord.getOriginal().getContentClaimOffset();
final long originalSize = repoRecord.getOriginal().getSize();
final ResourceClaim resourceClaim = originalClaim.getResourceClaim();
recordBuilder.setPreviousContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), originalOffset + originalClaim.getOffset(), originalSize);
}
final FlowFileQueue originalQueue = repoRecord.getOriginalQueue();
if (originalQueue != null) {
recordBuilder.setSourceQueueIdentifier(originalQueue.getIdentifier());
}
recordBuilder.setAttributes(repoRecord.getOriginalAttributes(), repoRecord.getUpdatedAttributes());
return recordBuilder.build();
}
use of org.apache.nifi.controller.queue.FlowFileQueue in project nifi by apache.
the class StandardProcessSession method migrate.
private void migrate(final StandardProcessSession newOwner, Collection<FlowFile> flowFiles) {
// We don't call validateRecordState() here because we want to allow migration of FlowFiles that have already been marked as removed or transferred, etc.
flowFiles = flowFiles.stream().map(this::getMostRecent).collect(Collectors.toList());
for (final FlowFile flowFile : flowFiles) {
if (openInputStreams.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open InputStream for the FlowFile, created by calling ProcessSession.read(FlowFile)");
}
if (openOutputStreams.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open OutputStream for the FlowFile, created by calling ProcessSession.write(FlowFile)");
}
if (readRecursionSet.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " already in use for an active callback or InputStream created by ProcessSession.read(FlowFile) has not been closed");
}
if (writeRecursionSet.contains(flowFile)) {
throw new IllegalStateException(flowFile + " already in use for an active callback or OutputStream created by ProcessSession.write(FlowFile) has not been closed");
}
final StandardRepositoryRecord record = records.get(flowFile);
if (record == null) {
throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")");
}
}
// If we have a FORK event for one of the given FlowFiles, then all children must also be migrated. Otherwise, we
// could have a case where we have FlowFile A transferred and eventually exiting the flow and later the 'newOwner'
// ProcessSession is committed, claiming to have created FlowFiles from the parent, which is no longer even in
// the flow. This would be very confusing when looking at the provenance for the FlowFile, so it is best to avoid this.
final Set<String> flowFileIds = flowFiles.stream().map(ff -> ff.getAttribute(CoreAttributes.UUID.key())).collect(Collectors.toSet());
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) {
final FlowFile eventFlowFile = entry.getKey();
if (flowFiles.contains(eventFlowFile)) {
final ProvenanceEventBuilder eventBuilder = entry.getValue();
for (final String childId : eventBuilder.getChildFlowFileIds()) {
if (!flowFileIds.contains(childId)) {
throw new IllegalStateException("Cannot migrate " + eventFlowFile + " to a new session because it was forked to create " + eventBuilder.getChildFlowFileIds().size() + " children and not all children are being migrated. If any FlowFile is forked, all of its children must also be migrated at the same time as the forked FlowFile");
}
}
}
}
// event builder for the new owner of the FlowFile and remove the child from our fork event builder.
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) {
final FlowFile eventFlowFile = entry.getKey();
final ProvenanceEventBuilder eventBuilder = entry.getValue();
final Set<String> childrenIds = new HashSet<>(eventBuilder.getChildFlowFileIds());
ProvenanceEventBuilder copy = null;
for (final FlowFile flowFile : flowFiles) {
final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (childrenIds.contains(flowFileId)) {
eventBuilder.removeChildFlowFile(flowFile);
if (copy == null) {
copy = eventBuilder.copy();
copy.getChildFlowFileIds().clear();
}
copy.addChildFlowFile(flowFileId);
}
}
if (copy != null) {
newOwner.forkEventBuilders.put(eventFlowFile, copy);
}
}
newOwner.processingStartTime = Math.min(newOwner.processingStartTime, processingStartTime);
for (final FlowFile flowFile : flowFiles) {
final FlowFileRecord flowFileRecord = (FlowFileRecord) flowFile;
final StandardRepositoryRecord repoRecord = this.records.remove(flowFile);
newOwner.records.put(flowFileRecord, repoRecord);
// Adjust the counts for Connections for each FlowFile that was pulled from a Connection.
// We do not have to worry about accounting for 'input counts' on connections because those
// are incremented only during a checkpoint, and anything that's been checkpointed has
// also been committed above.
final FlowFileQueue inputQueue = repoRecord.getOriginalQueue();
if (inputQueue != null) {
final String connectionId = inputQueue.getIdentifier();
incrementConnectionOutputCounts(connectionId, -1, -repoRecord.getOriginal().getSize());
newOwner.incrementConnectionOutputCounts(connectionId, 1, repoRecord.getOriginal().getSize());
unacknowledgedFlowFiles.get(inputQueue).remove(flowFile);
newOwner.unacknowledgedFlowFiles.computeIfAbsent(inputQueue, queue -> new HashSet<>()).add(flowFileRecord);
flowFilesIn--;
contentSizeIn -= flowFile.getSize();
newOwner.flowFilesIn++;
newOwner.contentSizeIn += flowFile.getSize();
}
final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (removedFlowFiles.remove(flowFileId)) {
newOwner.removedFlowFiles.add(flowFileId);
newOwner.removedCount++;
newOwner.removedBytes += flowFile.getSize();
removedCount--;
removedBytes -= flowFile.getSize();
}
if (createdFlowFiles.remove(flowFileId)) {
newOwner.createdFlowFiles.add(flowFileId);
}
if (repoRecord.getTransferRelationship() != null) {
flowFilesOut--;
contentSizeOut -= flowFile.getSize();
newOwner.flowFilesOut++;
newOwner.contentSizeOut += flowFile.getSize();
}
final List<ProvenanceEventRecord> events = generatedProvenanceEvents.remove(flowFile);
if (events != null) {
newOwner.generatedProvenanceEvents.put(flowFile, events);
}
final ContentClaim currentClaim = repoRecord.getCurrentClaim();
if (currentClaim != null) {
final ByteCountingOutputStream appendableStream = appendableStreams.remove(currentClaim);
if (appendableStream != null) {
newOwner.appendableStreams.put(currentClaim, appendableStream);
}
}
final Path toDelete = deleteOnCommit.remove(flowFile);
if (toDelete != null) {
newOwner.deleteOnCommit.put(flowFile, toDelete);
}
}
provenanceReporter.migrate(newOwner.provenanceReporter, flowFileIds);
}
use of org.apache.nifi.controller.queue.FlowFileQueue in project nifi by apache.
the class WriteAheadFlowFileRepository method loadFlowFiles.
@Override
public long loadFlowFiles(final QueueProvider queueProvider, final long minimumSequenceNumber) throws IOException {
final Map<String, FlowFileQueue> queueMap = new HashMap<>();
for (final FlowFileQueue queue : queueProvider.getAllQueues()) {
queueMap.put(queue.getIdentifier(), queue);
}
serdeFactory.setQueueMap(queueMap);
// Since we used to use the MinimalLockingWriteAheadRepository, we need to ensure that if the FlowFile
// Repo was written using that impl, that we properly recover from the implementation.
Collection<RepositoryRecord> recordList = wal.recoverRecords();
// then we will update the new WAL (with fsync()) and delete the old repository so that we won't recover it again.
if (recordList == null || recordList.isEmpty()) {
if (walImplementation.equals(SEQUENTIAL_ACCESS_WAL)) {
// Configured to use Sequential Access WAL but it has no records. Check if there are records in
// a MinimalLockingWriteAheadLog that we can recover.
recordList = migrateFromMinimalLockingLog(wal).orElse(new ArrayList<>());
} else {
// Configured to use Minimal Locking WAL but it has no records. Check if there are records in
// a SequentialAccess Log that we can recover.
recordList = migrateFromSequentialAccessLog(wal).orElse(new ArrayList<>());
}
}
serdeFactory.setQueueMap(null);
for (final RepositoryRecord record : recordList) {
final ContentClaim claim = record.getCurrentClaim();
if (claim != null) {
claimManager.incrementClaimantCount(claim.getResourceClaim());
}
}
// Determine the next sequence number for FlowFiles
int numFlowFilesMissingQueue = 0;
long maxId = minimumSequenceNumber;
for (final RepositoryRecord record : recordList) {
final long recordId = serdeFactory.getRecordIdentifier(record);
if (recordId > maxId) {
maxId = recordId;
}
final FlowFileRecord flowFile = record.getCurrent();
final FlowFileQueue queue = record.getOriginalQueue();
if (queue == null) {
numFlowFilesMissingQueue++;
} else {
queue.put(flowFile);
}
}
// Set the AtomicLong to 1 more than the max ID so that calls to #getNextFlowFileSequence() will
// return the appropriate number.
flowFileSequenceGenerator.set(maxId + 1);
logger.info("Successfully restored {} FlowFiles", recordList.size() - numFlowFilesMissingQueue);
if (numFlowFilesMissingQueue > 0) {
logger.warn("On recovery, found {} FlowFiles whose queue no longer exists. These FlowFiles will be dropped.", numFlowFilesMissingQueue);
}
final Runnable checkpointRunnable = new Runnable() {
@Override
public void run() {
try {
logger.info("Initiating checkpoint of FlowFile Repository");
final long start = System.nanoTime();
final int numRecordsCheckpointed = checkpoint();
final long end = System.nanoTime();
final long millis = TimeUnit.MILLISECONDS.convert(end - start, TimeUnit.NANOSECONDS);
logger.info("Successfully checkpointed FlowFile Repository with {} records in {} milliseconds", numRecordsCheckpointed, millis);
} catch (final Throwable t) {
logger.error("Unable to checkpoint FlowFile Repository due to " + t.toString(), t);
}
}
};
checkpointFuture = checkpointExecutor.scheduleWithFixedDelay(checkpointRunnable, checkpointDelayMillis, checkpointDelayMillis, TimeUnit.MILLISECONDS);
return maxId;
}
Aggregations