use of org.apache.nifi.processors.hadoop.record.HDFSRecordReader in project nifi by apache.
the class FetchParquetTest method testIOExceptionWhileReadingShouldRouteToRetry.
@Test
public void testIOExceptionWhileReadingShouldRouteToRetry() throws IOException, InitializationException {
final FetchParquet proc = new FetchParquet() {
@Override
public HDFSRecordReader createHDFSRecordReader(ProcessContext context, FlowFile flowFile, Configuration conf, Path path) throws IOException {
return new HDFSRecordReader() {
@Override
public Record nextRecord() throws IOException {
throw new IOException("IOException");
}
@Override
public void close() throws IOException {
}
};
}
};
configure(proc);
final File parquetDir = new File(DIRECTORY);
final File parquetFile = new File(parquetDir, "testFetchParquetToCSV.parquet");
final int numUsers = 10;
writeParquetUsers(parquetFile, numUsers);
final Map<String, String> attributes = new HashMap<>();
attributes.put(CoreAttributes.PATH.key(), parquetDir.getAbsolutePath());
attributes.put(CoreAttributes.FILENAME.key(), parquetFile.getName());
testRunner.enqueue("TRIGGER", attributes);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(FetchParquet.REL_RETRY, 1);
final MockFlowFile flowFile = testRunner.getFlowFilesForRelationship(FetchParquet.REL_RETRY).get(0);
flowFile.assertContentEquals("TRIGGER");
}
use of org.apache.nifi.processors.hadoop.record.HDFSRecordReader in project nifi by apache.
the class AbstractFetchHDFSRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
// do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
final FileSystem fileSystem = getFileSystem();
final Configuration configuration = getConfiguration();
final UserGroupInformation ugi = getUserGroupInformation();
if (configuration == null || fileSystem == null || ugi == null) {
getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
context.yield();
return;
}
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
context.yield();
return;
}
ugi.doAs((PrivilegedAction<Object>) () -> {
FlowFile child = null;
final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(originalFlowFile).getValue();
try {
final Path path = new Path(filenameValue);
final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final StopWatch stopWatch = new StopWatch(true);
// use a child FlowFile so that if any error occurs we can route the original untouched FlowFile to retry/failure
child = session.create(originalFlowFile);
final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
child = session.write(child, (final OutputStream rawOut) -> {
try (final BufferedOutputStream out = new BufferedOutputStream(rawOut);
final HDFSRecordReader recordReader = createHDFSRecordReader(context, originalFlowFile, configuration, path)) {
Record record = recordReader.nextRecord();
final RecordSchema schema = recordSetWriterFactory.getSchema(originalFlowFile.getAttributes(), record == null ? null : record.getSchema());
try (final RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(getLogger(), schema, out)) {
recordSetWriter.beginRecordSet();
if (record != null) {
recordSetWriter.write(record);
}
while ((record = recordReader.nextRecord()) != null) {
recordSetWriter.write(record);
}
writeResult.set(recordSetWriter.finishRecordSet());
mimeTypeRef.set(recordSetWriter.getMimeType());
}
} catch (Exception e) {
exceptionHolder.set(e);
}
});
stopWatch.stop();
// into one of the appropriate catch blocks below
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
}
FlowFile successFlowFile = postProcess(context, session, child, path);
final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
successFlowFile = session.putAllAttributes(successFlowFile, attributes);
final Path qualifiedPath = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
getLogger().info("Successfully received content from {} for {} in {} milliseconds", new Object[] { qualifiedPath, successFlowFile, stopWatch.getDuration() });
session.getProvenanceReporter().fetch(successFlowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(successFlowFile, REL_SUCCESS);
session.remove(originalFlowFile);
return null;
} catch (final FileNotFoundException | AccessControlException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, e });
final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, e.getMessage() == null ? e.toString() : e.getMessage());
session.transfer(failureFlowFile, REL_FAILURE);
} catch (final IOException | FlowFileAccessException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to retry", new Object[] { filenameValue, originalFlowFile, e });
session.transfer(session.penalize(originalFlowFile), REL_RETRY);
context.yield();
} catch (final Throwable t) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, t });
final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, t.getMessage() == null ? t.toString() : t.getMessage());
session.transfer(failureFlowFile, REL_FAILURE);
}
// if we got this far then we weren't successful so we need to clean up the child flow file if it got initialized
if (child != null) {
session.remove(child);
}
return null;
});
}
Aggregations