use of org.apache.nifi.stream.io.ByteCountingOutputStream in project nifi by apache.
the class StandardProcessSession method write.
@Override
public OutputStream write(FlowFile source) {
verifyTaskActive();
source = validateRecordState(source);
final StandardRepositoryRecord record = records.get(source);
ContentClaim newClaim = null;
try {
newClaim = claimCache.getContentClaim();
claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source);
ensureNotAppending(newClaim);
final OutputStream rawStream = claimCache.write(newClaim);
final OutputStream disableOnClose = new DisableOnCloseOutputStream(rawStream);
final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnClose);
final FlowFile sourceFlowFile = source;
final ContentClaim updatedClaim = newClaim;
final OutputStream errorHandlingOutputStream = new OutputStream() {
private boolean closed = false;
@Override
public void write(final int b) throws IOException {
try {
countingOut.write(b);
} catch (final IOException ioe) {
LOG.error("Failed to write content to " + sourceFlowFile + "; rolling back session", ioe);
rollback(true);
close();
throw new FlowFileAccessException("Failed to write to Content Repository for " + sourceFlowFile, ioe);
}
}
@Override
public void write(final byte[] b) throws IOException {
try {
countingOut.write(b);
} catch (final IOException ioe) {
LOG.error("Failed to write content to " + sourceFlowFile + "; rolling back session", ioe);
rollback(true);
close();
throw new FlowFileAccessException("Failed to write to Content Repository for " + sourceFlowFile, ioe);
}
}
@Override
public void write(final byte[] b, final int off, final int len) throws IOException {
try {
countingOut.write(b, off, len);
} catch (final IOException ioe) {
LOG.error("Failed to write content to " + sourceFlowFile + "; rolling back session", ioe);
rollback(true);
close();
throw new FlowFileAccessException("Failed to write to Content Repository for " + sourceFlowFile, ioe);
}
}
@Override
public void flush() throws IOException {
try {
countingOut.flush();
} catch (final IOException ioe) {
LOG.error("Failed to write content to " + sourceFlowFile + "; rolling back session", ioe);
rollback(true);
close();
throw new FlowFileAccessException("Failed to write to Content Repository for " + sourceFlowFile, ioe);
}
}
@Override
public void close() throws IOException {
if (closed) {
return;
}
closed = true;
writeRecursionSet.remove(sourceFlowFile);
final long bytesWritten = countingOut.getBytesWritten();
StandardProcessSession.this.bytesWritten += bytesWritten;
final OutputStream removed = openOutputStreams.remove(sourceFlowFile);
if (removed == null) {
LOG.error("Closed Session's OutputStream but there was no entry for it in the map; sourceFlowFile={}; map={}", sourceFlowFile, openOutputStreams);
}
flush();
removeTemporaryClaim(record);
final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).contentClaim(updatedClaim).contentClaimOffset(Math.max(0, updatedClaim.getLength() - bytesWritten)).size(bytesWritten).build();
record.setWorking(newFile);
}
};
writeRecursionSet.add(source);
openOutputStreams.put(source, errorHandlingOutputStream);
return createTaskTerminationStream(errorHandlingOutputStream);
} catch (final ContentNotFoundException nfe) {
// need to reset write claim before we can remove the claim
resetWriteClaims();
destroyContent(newClaim);
handleContentNotFound(nfe, record);
throw nfe;
} catch (final FlowFileAccessException ffae) {
// need to reset write claim before we can remove the claim
resetWriteClaims();
destroyContent(newClaim);
throw ffae;
} catch (final IOException ioe) {
// need to reset write claim before we can remove the claim
resetWriteClaims();
destroyContent(newClaim);
throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe);
} catch (final Throwable t) {
// need to reset write claim before we can remove the claim
resetWriteClaims();
destroyContent(newClaim);
throw t;
}
}
use of org.apache.nifi.stream.io.ByteCountingOutputStream in project nifi by apache.
the class StandardProcessSession method migrate.
private void migrate(final StandardProcessSession newOwner, Collection<FlowFile> flowFiles) {
// We don't call validateRecordState() here because we want to allow migration of FlowFiles that have already been marked as removed or transferred, etc.
flowFiles = flowFiles.stream().map(this::getMostRecent).collect(Collectors.toList());
for (final FlowFile flowFile : flowFiles) {
if (openInputStreams.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open InputStream for the FlowFile, created by calling ProcessSession.read(FlowFile)");
}
if (openOutputStreams.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open OutputStream for the FlowFile, created by calling ProcessSession.write(FlowFile)");
}
if (readRecursionSet.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " already in use for an active callback or InputStream created by ProcessSession.read(FlowFile) has not been closed");
}
if (writeRecursionSet.contains(flowFile)) {
throw new IllegalStateException(flowFile + " already in use for an active callback or OutputStream created by ProcessSession.write(FlowFile) has not been closed");
}
final StandardRepositoryRecord record = records.get(flowFile);
if (record == null) {
throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")");
}
}
// If we have a FORK event for one of the given FlowFiles, then all children must also be migrated. Otherwise, we
// could have a case where we have FlowFile A transferred and eventually exiting the flow and later the 'newOwner'
// ProcessSession is committed, claiming to have created FlowFiles from the parent, which is no longer even in
// the flow. This would be very confusing when looking at the provenance for the FlowFile, so it is best to avoid this.
final Set<String> flowFileIds = flowFiles.stream().map(ff -> ff.getAttribute(CoreAttributes.UUID.key())).collect(Collectors.toSet());
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) {
final FlowFile eventFlowFile = entry.getKey();
if (flowFiles.contains(eventFlowFile)) {
final ProvenanceEventBuilder eventBuilder = entry.getValue();
for (final String childId : eventBuilder.getChildFlowFileIds()) {
if (!flowFileIds.contains(childId)) {
throw new IllegalStateException("Cannot migrate " + eventFlowFile + " to a new session because it was forked to create " + eventBuilder.getChildFlowFileIds().size() + " children and not all children are being migrated. If any FlowFile is forked, all of its children must also be migrated at the same time as the forked FlowFile");
}
}
}
}
// event builder for the new owner of the FlowFile and remove the child from our fork event builder.
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) {
final FlowFile eventFlowFile = entry.getKey();
final ProvenanceEventBuilder eventBuilder = entry.getValue();
final Set<String> childrenIds = new HashSet<>(eventBuilder.getChildFlowFileIds());
ProvenanceEventBuilder copy = null;
for (final FlowFile flowFile : flowFiles) {
final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (childrenIds.contains(flowFileId)) {
eventBuilder.removeChildFlowFile(flowFile);
if (copy == null) {
copy = eventBuilder.copy();
copy.getChildFlowFileIds().clear();
}
copy.addChildFlowFile(flowFileId);
}
}
if (copy != null) {
newOwner.forkEventBuilders.put(eventFlowFile, copy);
}
}
newOwner.processingStartTime = Math.min(newOwner.processingStartTime, processingStartTime);
for (final FlowFile flowFile : flowFiles) {
final FlowFileRecord flowFileRecord = (FlowFileRecord) flowFile;
final StandardRepositoryRecord repoRecord = this.records.remove(flowFile);
newOwner.records.put(flowFileRecord, repoRecord);
// Adjust the counts for Connections for each FlowFile that was pulled from a Connection.
// We do not have to worry about accounting for 'input counts' on connections because those
// are incremented only during a checkpoint, and anything that's been checkpointed has
// also been committed above.
final FlowFileQueue inputQueue = repoRecord.getOriginalQueue();
if (inputQueue != null) {
final String connectionId = inputQueue.getIdentifier();
incrementConnectionOutputCounts(connectionId, -1, -repoRecord.getOriginal().getSize());
newOwner.incrementConnectionOutputCounts(connectionId, 1, repoRecord.getOriginal().getSize());
unacknowledgedFlowFiles.get(inputQueue).remove(flowFile);
newOwner.unacknowledgedFlowFiles.computeIfAbsent(inputQueue, queue -> new HashSet<>()).add(flowFileRecord);
flowFilesIn--;
contentSizeIn -= flowFile.getSize();
newOwner.flowFilesIn++;
newOwner.contentSizeIn += flowFile.getSize();
}
final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (removedFlowFiles.remove(flowFileId)) {
newOwner.removedFlowFiles.add(flowFileId);
newOwner.removedCount++;
newOwner.removedBytes += flowFile.getSize();
removedCount--;
removedBytes -= flowFile.getSize();
}
if (createdFlowFiles.remove(flowFileId)) {
newOwner.createdFlowFiles.add(flowFileId);
}
if (repoRecord.getTransferRelationship() != null) {
flowFilesOut--;
contentSizeOut -= flowFile.getSize();
newOwner.flowFilesOut++;
newOwner.contentSizeOut += flowFile.getSize();
}
final List<ProvenanceEventRecord> events = generatedProvenanceEvents.remove(flowFile);
if (events != null) {
newOwner.generatedProvenanceEvents.put(flowFile, events);
}
final ContentClaim currentClaim = repoRecord.getCurrentClaim();
if (currentClaim != null) {
final ByteCountingOutputStream appendableStream = appendableStreams.remove(currentClaim);
if (appendableStream != null) {
newOwner.appendableStreams.put(currentClaim, appendableStream);
}
}
final Path toDelete = deleteOnCommit.remove(flowFile);
if (toDelete != null) {
newOwner.deleteOnCommit.put(flowFile, toDelete);
}
}
provenanceReporter.migrate(newOwner.provenanceReporter, flowFileIds);
}
use of org.apache.nifi.stream.io.ByteCountingOutputStream in project nifi by apache.
the class StandardProcessSession method ensureNotAppending.
private void ensureNotAppending(final ContentClaim claim) throws IOException {
if (claim == null) {
return;
}
final ByteCountingOutputStream outStream = appendableStreams.remove(claim);
if (outStream == null) {
return;
}
outStream.flush();
outStream.close();
}
use of org.apache.nifi.stream.io.ByteCountingOutputStream in project nifi by apache.
the class StandardProcessSession method write.
@Override
public FlowFile write(FlowFile source, final StreamCallback writer) {
verifyTaskActive();
source = validateRecordState(source);
final StandardRepositoryRecord record = records.get(source);
final ContentClaim currClaim = record.getCurrentClaim();
long writtenToFlowFile = 0L;
ContentClaim newClaim = null;
try {
newClaim = claimCache.getContentClaim();
claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source);
ensureNotAppending(newClaim);
if (currClaim != null) {
claimCache.flush(currClaim.getResourceClaim());
}
try (final InputStream is = getInputStream(source, currClaim, record.getCurrentClaimOffset(), true);
final InputStream limitedIn = new LimitedInputStream(is, source.getSize());
final InputStream disableOnCloseIn = new DisableOnCloseInputStream(limitedIn);
final ByteCountingInputStream countingIn = new ByteCountingInputStream(disableOnCloseIn, bytesRead);
final OutputStream os = claimCache.write(newClaim);
final OutputStream disableOnCloseOut = new DisableOnCloseOutputStream(os);
final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnCloseOut)) {
writeRecursionSet.add(source);
// We want to differentiate between IOExceptions thrown by the repository and IOExceptions thrown from
// Processor code. As a result, as have the FlowFileAccessInputStream that catches IOException from the repository
// and translates into either FlowFileAccessException or ContentNotFoundException. We keep track of any
// ContentNotFoundException because if it is thrown, the Processor code may catch it and do something else with it
// but in reality, if it is thrown, we want to know about it and handle it, even if the Processor code catches it.
final FlowFileAccessInputStream ffais = new FlowFileAccessInputStream(countingIn, source, currClaim);
final FlowFileAccessOutputStream ffaos = new FlowFileAccessOutputStream(countingOut, source);
boolean cnfeThrown = false;
try {
writer.process(createTaskTerminationStream(ffais), createTaskTerminationStream(ffaos));
} catch (final ContentNotFoundException cnfe) {
cnfeThrown = true;
throw cnfe;
} finally {
writtenToFlowFile = countingOut.getBytesWritten();
this.bytesWritten += writtenToFlowFile;
this.bytesRead += countingIn.getBytesRead();
writeRecursionSet.remove(source);
// if cnfeThrown is true, we don't need to re-thrown the Exception; it will propagate.
if (!cnfeThrown && ffais.getContentNotFoundException() != null) {
throw ffais.getContentNotFoundException();
}
}
}
} catch (final ContentNotFoundException nfe) {
destroyContent(newClaim);
handleContentNotFound(nfe, record);
} catch (final IOException ioe) {
destroyContent(newClaim);
throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe);
} catch (final FlowFileAccessException ffae) {
destroyContent(newClaim);
throw ffae;
} catch (final Throwable t) {
destroyContent(newClaim);
throw t;
}
removeTemporaryClaim(record);
final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).contentClaim(newClaim).contentClaimOffset(Math.max(0L, newClaim.getLength() - writtenToFlowFile)).size(writtenToFlowFile).build();
record.setWorking(newFile);
return newFile;
}
use of org.apache.nifi.stream.io.ByteCountingOutputStream in project nifi by apache.
the class StandardProcessSession method append.
@Override
public FlowFile append(FlowFile source, final OutputStreamCallback writer) {
verifyTaskActive();
source = validateRecordState(source);
final StandardRepositoryRecord record = records.get(source);
long newSize = 0L;
// Get the current Content Claim from the record and see if we already have
// an OutputStream that we can append to.
final ContentClaim oldClaim = record.getCurrentClaim();
ByteCountingOutputStream outStream = oldClaim == null ? null : appendableStreams.get(oldClaim);
long originalByteWrittenCount = 0;
ContentClaim newClaim = null;
try {
if (outStream == null) {
claimCache.flush(oldClaim);
try (final InputStream oldClaimIn = context.getContentRepository().read(oldClaim)) {
newClaim = context.getContentRepository().create(context.getConnectable().isLossTolerant());
claimLog.debug("Creating ContentClaim {} for 'append' for {}", newClaim, source);
final OutputStream rawOutStream = context.getContentRepository().write(newClaim);
final OutputStream bufferedOutStream = new BufferedOutputStream(rawOutStream);
outStream = new ByteCountingOutputStream(bufferedOutStream);
originalByteWrittenCount = 0;
appendableStreams.put(newClaim, outStream);
// We need to copy all of the data from the old claim to the new claim
StreamUtils.copy(oldClaimIn, outStream);
// wrap our OutputStreams so that the processor cannot close it
try (final OutputStream disableOnClose = new DisableOnCloseOutputStream(outStream)) {
writeRecursionSet.add(source);
writer.process(new FlowFileAccessOutputStream(disableOnClose, source));
} finally {
writeRecursionSet.remove(source);
}
}
} else {
newClaim = oldClaim;
originalByteWrittenCount = outStream.getBytesWritten();
// wrap our OutputStreams so that the processor cannot close it
try (final OutputStream disableOnClose = new DisableOnCloseOutputStream(outStream);
final OutputStream flowFileAccessOutStream = new FlowFileAccessOutputStream(disableOnClose, source)) {
writeRecursionSet.add(source);
writer.process(flowFileAccessOutStream);
} finally {
writeRecursionSet.remove(source);
}
}
// update the newSize to reflect the number of bytes written
newSize = outStream.getBytesWritten();
} catch (final ContentNotFoundException nfe) {
// need to reset write claim before we can remove the claim
resetWriteClaims();
// it here also, we would be decrementing the claimant count twice!
if (newClaim != oldClaim) {
destroyContent(newClaim);
}
handleContentNotFound(nfe, record);
} catch (final IOException ioe) {
// need to reset write claim before we can remove the claim
resetWriteClaims();
// See above explanation for why this is done only if newClaim != oldClaim
if (newClaim != oldClaim) {
destroyContent(newClaim);
}
throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe);
} catch (final Throwable t) {
// need to reset write claim before we can remove the claim
resetWriteClaims();
// See above explanation for why this is done only if newClaim != oldClaim
if (newClaim != oldClaim) {
destroyContent(newClaim);
}
throw t;
} finally {
if (outStream != null) {
final long bytesWrittenThisIteration = outStream.getBytesWritten() - originalByteWrittenCount;
bytesWritten += bytesWrittenThisIteration;
}
}
// the FlowFile was written to, via #write() and then append() was called.
if (newClaim != oldClaim) {
removeTemporaryClaim(record);
}
final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).contentClaim(newClaim).contentClaimOffset(0).size(newSize).build();
record.setWorking(newFile);
return newFile;
}
Aggregations