use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.
the class FlowFileAccessInputStream method skip.
@Override
public long skip(final long n) throws IOException {
try {
final long count = super.skip(n);
bytesConsumed += count;
return count;
} catch (final IOException ioe) {
throw new FlowFileAccessException("Could not skip data in " + flowFile, ioe);
}
}
use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.
the class AbstractFetchHDFSRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
// do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
final FileSystem fileSystem = getFileSystem();
final Configuration configuration = getConfiguration();
final UserGroupInformation ugi = getUserGroupInformation();
if (configuration == null || fileSystem == null || ugi == null) {
getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
context.yield();
return;
}
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
context.yield();
return;
}
ugi.doAs((PrivilegedAction<Object>) () -> {
FlowFile child = null;
final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(originalFlowFile).getValue();
try {
final Path path = new Path(filenameValue);
final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final StopWatch stopWatch = new StopWatch(true);
// use a child FlowFile so that if any error occurs we can route the original untouched FlowFile to retry/failure
child = session.create(originalFlowFile);
final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
child = session.write(child, (final OutputStream rawOut) -> {
try (final BufferedOutputStream out = new BufferedOutputStream(rawOut);
final HDFSRecordReader recordReader = createHDFSRecordReader(context, originalFlowFile, configuration, path)) {
Record record = recordReader.nextRecord();
final RecordSchema schema = recordSetWriterFactory.getSchema(originalFlowFile.getAttributes(), record == null ? null : record.getSchema());
try (final RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(getLogger(), schema, out)) {
recordSetWriter.beginRecordSet();
if (record != null) {
recordSetWriter.write(record);
}
while ((record = recordReader.nextRecord()) != null) {
recordSetWriter.write(record);
}
writeResult.set(recordSetWriter.finishRecordSet());
mimeTypeRef.set(recordSetWriter.getMimeType());
}
} catch (Exception e) {
exceptionHolder.set(e);
}
});
stopWatch.stop();
// into one of the appropriate catch blocks below
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
}
FlowFile successFlowFile = postProcess(context, session, child, path);
final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
successFlowFile = session.putAllAttributes(successFlowFile, attributes);
final Path qualifiedPath = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
getLogger().info("Successfully received content from {} for {} in {} milliseconds", new Object[] { qualifiedPath, successFlowFile, stopWatch.getDuration() });
session.getProvenanceReporter().fetch(successFlowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(successFlowFile, REL_SUCCESS);
session.remove(originalFlowFile);
return null;
} catch (final FileNotFoundException | AccessControlException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, e });
final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, e.getMessage() == null ? e.toString() : e.getMessage());
session.transfer(failureFlowFile, REL_FAILURE);
} catch (final IOException | FlowFileAccessException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to retry", new Object[] { filenameValue, originalFlowFile, e });
session.transfer(session.penalize(originalFlowFile), REL_RETRY);
context.yield();
} catch (final Throwable t) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, t });
final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, t.getMessage() == null ? t.toString() : t.getMessage());
session.transfer(failureFlowFile, REL_FAILURE);
}
// if we got this far then we weren't successful so we need to clean up the child flow file if it got initialized
if (child != null) {
session.remove(child);
}
return null;
});
}
use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.
the class AbstractPutHDFSRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
// do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
final FileSystem fileSystem = getFileSystem();
final Configuration configuration = getConfiguration();
final UserGroupInformation ugi = getUserGroupInformation();
if (configuration == null || fileSystem == null || ugi == null) {
getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
context.yield();
return;
}
final FlowFile flowFile = session.get();
if (flowFile == null) {
context.yield();
return;
}
ugi.doAs((PrivilegedAction<Object>) () -> {
Path tempDotCopyFile = null;
FlowFile putFlowFile = flowFile;
try {
// TODO codec extension
final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
final String directoryValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
// create the directory if it doesn't exist
final Path directoryPath = new Path(directoryValue);
createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup);
// write to tempFile first and on success rename to destFile
final Path tempFile = new Path(directoryPath, "." + filenameValue);
final Path destFile = new Path(directoryPath, filenameValue);
final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile);
final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean();
// if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure
if (destinationExists && !shouldOverwrite) {
session.transfer(session.penalize(putFlowFile), REL_FAILURE);
getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
return null;
}
final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final FlowFile flowFileIn = putFlowFile;
final StopWatch stopWatch = new StopWatch(true);
// Read records from the incoming FlowFile and write them the tempFile
session.read(putFlowFile, (final InputStream rawIn) -> {
RecordReader recordReader = null;
HDFSRecordWriter recordWriter = null;
try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
// handle this separately from the other IOExceptions which normally route to retry
try {
recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger());
} catch (Exception e) {
final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", e);
exceptionHolder.set(rrfe);
return;
}
final RecordSet recordSet = recordReader.createRecordSet();
recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile, recordReader.getSchema());
writeResult.set(recordWriter.write(recordSet));
} catch (Exception e) {
exceptionHolder.set(e);
} finally {
IOUtils.closeQuietly(recordReader);
IOUtils.closeQuietly(recordWriter);
}
});
stopWatch.stop();
final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
tempDotCopyFile = tempFile;
// into one of the appropriate catch blocks below
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
}
// Attempt to rename from the tempFile to destFile, and change owner if successfully renamed
rename(fileSystem, tempFile, destFile);
changeOwner(fileSystem, destFile, remoteOwner, remoteGroup);
getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, destFile, millis, dataRate });
putFlowFile = postProcess(context, session, putFlowFile, destFile);
final String newFilename = destFile.getName();
final String hdfsPath = destFile.getParent().toString();
// Update the filename and absolute path attributes
final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
attributes.put(CoreAttributes.FILENAME.key(), newFilename);
attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
putFlowFile = session.putAllAttributes(putFlowFile, attributes);
// Send a provenance event and transfer to success
final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
session.transfer(putFlowFile, REL_SUCCESS);
} catch (IOException | FlowFileAccessException e) {
deleteQuietly(fileSystem, tempDotCopyFile);
getLogger().error("Failed to write due to {}", new Object[] { e });
session.transfer(session.penalize(putFlowFile), REL_RETRY);
context.yield();
} catch (Throwable t) {
deleteQuietly(fileSystem, tempDotCopyFile);
getLogger().error("Failed to write due to {}", new Object[] { t });
session.transfer(putFlowFile, REL_FAILURE);
}
return null;
});
}
use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.
the class StandardProcessSession method importFrom.
@Override
public FlowFile importFrom(final Path source, final boolean keepSourceFile, FlowFile destination) {
verifyTaskActive();
destination = validateRecordState(destination);
// TODO: find a better solution. With Windows 7 and Java 7 (very early update, at least), Files.isWritable(source.getParent()) returns false, even when it should be true.
if (!keepSourceFile && !Files.isWritable(source.getParent()) && !source.getParent().toFile().canWrite()) {
// If we do NOT want to keep the file, ensure that we can delete it, or else error.
throw new FlowFileAccessException("Cannot write to path " + source.getParent().toFile().getAbsolutePath() + " so cannot delete file; will not import.");
}
final StandardRepositoryRecord record = records.get(destination);
final ContentClaim newClaim;
final long claimOffset;
try {
newClaim = context.getContentRepository().create(context.getConnectable().isLossTolerant());
claimLog.debug("Creating ContentClaim {} for 'importFrom' for {}", newClaim, destination);
} catch (final IOException e) {
throw new FlowFileAccessException("Unable to create ContentClaim due to " + e.toString(), e);
}
claimOffset = 0L;
long newSize = 0L;
try {
newSize = context.getContentRepository().importFrom(source, newClaim);
bytesWritten += newSize;
bytesRead += newSize;
} catch (final Throwable t) {
destroyContent(newClaim);
throw new FlowFileAccessException("Failed to import data from " + source + " for " + destination + " due to " + t.toString(), t);
}
removeTemporaryClaim(record);
final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).contentClaim(newClaim).contentClaimOffset(claimOffset).size(newSize).addAttribute(CoreAttributes.FILENAME.key(), source.toFile().getName()).build();
record.setWorking(newFile, CoreAttributes.FILENAME.key(), source.toFile().getName());
if (!keepSourceFile) {
deleteOnCommit.put(newFile, source);
}
return newFile;
}
use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.
the class StandardProcessSession method write.
@Override
public FlowFile write(FlowFile source, final StreamCallback writer) {
verifyTaskActive();
source = validateRecordState(source);
final StandardRepositoryRecord record = records.get(source);
final ContentClaim currClaim = record.getCurrentClaim();
long writtenToFlowFile = 0L;
ContentClaim newClaim = null;
try {
newClaim = claimCache.getContentClaim();
claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source);
ensureNotAppending(newClaim);
if (currClaim != null) {
claimCache.flush(currClaim.getResourceClaim());
}
try (final InputStream is = getInputStream(source, currClaim, record.getCurrentClaimOffset(), true);
final InputStream limitedIn = new LimitedInputStream(is, source.getSize());
final InputStream disableOnCloseIn = new DisableOnCloseInputStream(limitedIn);
final ByteCountingInputStream countingIn = new ByteCountingInputStream(disableOnCloseIn, bytesRead);
final OutputStream os = claimCache.write(newClaim);
final OutputStream disableOnCloseOut = new DisableOnCloseOutputStream(os);
final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnCloseOut)) {
writeRecursionSet.add(source);
// We want to differentiate between IOExceptions thrown by the repository and IOExceptions thrown from
// Processor code. As a result, as have the FlowFileAccessInputStream that catches IOException from the repository
// and translates into either FlowFileAccessException or ContentNotFoundException. We keep track of any
// ContentNotFoundException because if it is thrown, the Processor code may catch it and do something else with it
// but in reality, if it is thrown, we want to know about it and handle it, even if the Processor code catches it.
final FlowFileAccessInputStream ffais = new FlowFileAccessInputStream(countingIn, source, currClaim);
final FlowFileAccessOutputStream ffaos = new FlowFileAccessOutputStream(countingOut, source);
boolean cnfeThrown = false;
try {
writer.process(createTaskTerminationStream(ffais), createTaskTerminationStream(ffaos));
} catch (final ContentNotFoundException cnfe) {
cnfeThrown = true;
throw cnfe;
} finally {
writtenToFlowFile = countingOut.getBytesWritten();
this.bytesWritten += writtenToFlowFile;
this.bytesRead += countingIn.getBytesRead();
writeRecursionSet.remove(source);
// if cnfeThrown is true, we don't need to re-thrown the Exception; it will propagate.
if (!cnfeThrown && ffais.getContentNotFoundException() != null) {
throw ffais.getContentNotFoundException();
}
}
}
} catch (final ContentNotFoundException nfe) {
destroyContent(newClaim);
handleContentNotFound(nfe, record);
} catch (final IOException ioe) {
destroyContent(newClaim);
throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe);
} catch (final FlowFileAccessException ffae) {
destroyContent(newClaim);
throw ffae;
} catch (final Throwable t) {
destroyContent(newClaim);
throw t;
}
removeTemporaryClaim(record);
final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).contentClaim(newClaim).contentClaimOffset(Math.max(0L, newClaim.getLength() - writtenToFlowFile)).size(writtenToFlowFile).build();
record.setWorking(newFile);
return newFile;
}
Aggregations