use of org.apache.nifi.processors.hadoop.record.HDFSRecordWriter in project nifi by apache.
the class AbstractPutHDFSRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
// do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
final FileSystem fileSystem = getFileSystem();
final Configuration configuration = getConfiguration();
final UserGroupInformation ugi = getUserGroupInformation();
if (configuration == null || fileSystem == null || ugi == null) {
getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
context.yield();
return;
}
final FlowFile flowFile = session.get();
if (flowFile == null) {
context.yield();
return;
}
ugi.doAs((PrivilegedAction<Object>) () -> {
Path tempDotCopyFile = null;
FlowFile putFlowFile = flowFile;
try {
// TODO codec extension
final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
final String directoryValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
// create the directory if it doesn't exist
final Path directoryPath = new Path(directoryValue);
createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup);
// write to tempFile first and on success rename to destFile
final Path tempFile = new Path(directoryPath, "." + filenameValue);
final Path destFile = new Path(directoryPath, filenameValue);
final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile);
final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean();
// if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure
if (destinationExists && !shouldOverwrite) {
session.transfer(session.penalize(putFlowFile), REL_FAILURE);
getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
return null;
}
final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final FlowFile flowFileIn = putFlowFile;
final StopWatch stopWatch = new StopWatch(true);
// Read records from the incoming FlowFile and write them the tempFile
session.read(putFlowFile, (final InputStream rawIn) -> {
RecordReader recordReader = null;
HDFSRecordWriter recordWriter = null;
try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
// handle this separately from the other IOExceptions which normally route to retry
try {
recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger());
} catch (Exception e) {
final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", e);
exceptionHolder.set(rrfe);
return;
}
final RecordSet recordSet = recordReader.createRecordSet();
recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile, recordReader.getSchema());
writeResult.set(recordWriter.write(recordSet));
} catch (Exception e) {
exceptionHolder.set(e);
} finally {
IOUtils.closeQuietly(recordReader);
IOUtils.closeQuietly(recordWriter);
}
});
stopWatch.stop();
final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
tempDotCopyFile = tempFile;
// into one of the appropriate catch blocks below
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
}
// Attempt to rename from the tempFile to destFile, and change owner if successfully renamed
rename(fileSystem, tempFile, destFile);
changeOwner(fileSystem, destFile, remoteOwner, remoteGroup);
getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, destFile, millis, dataRate });
putFlowFile = postProcess(context, session, putFlowFile, destFile);
final String newFilename = destFile.getName();
final String hdfsPath = destFile.getParent().toString();
// Update the filename and absolute path attributes
final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
attributes.put(CoreAttributes.FILENAME.key(), newFilename);
attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
putFlowFile = session.putAllAttributes(putFlowFile, attributes);
// Send a provenance event and transfer to success
final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
session.transfer(putFlowFile, REL_SUCCESS);
} catch (IOException | FlowFileAccessException e) {
deleteQuietly(fileSystem, tempDotCopyFile);
getLogger().error("Failed to write due to {}", new Object[] { e });
session.transfer(session.penalize(putFlowFile), REL_RETRY);
context.yield();
} catch (Throwable t) {
deleteQuietly(fileSystem, tempDotCopyFile);
getLogger().error("Failed to write due to {}", new Object[] { t });
session.transfer(putFlowFile, REL_FAILURE);
}
return null;
});
}
Aggregations