use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class CreateHadoopSequenceFile method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
String mimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
String packagingFormat = NOT_PACKAGED;
if (null != mimeType) {
switch(mimeType.toLowerCase()) {
case "application/tar":
packagingFormat = TAR_FORMAT;
break;
case "application/zip":
packagingFormat = ZIP_FORMAT;
break;
case "application/flowfile-v3":
packagingFormat = FLOWFILE_STREAM_FORMAT_V3;
break;
default:
getLogger().warn("Cannot unpack {} because its mime.type attribute is set to '{}', which is not a format that can be unpacked", new Object[] { flowFile, mimeType });
}
}
final SequenceFileWriter sequenceFileWriter;
switch(packagingFormat) {
case TAR_FORMAT:
sequenceFileWriter = new TarUnpackerSequenceFileWriter();
break;
case ZIP_FORMAT:
sequenceFileWriter = new ZipUnpackerSequenceFileWriter();
break;
case FLOWFILE_STREAM_FORMAT_V3:
sequenceFileWriter = new FlowFileStreamUnpackerSequenceFileWriter();
break;
default:
sequenceFileWriter = new SequenceFileWriterImpl();
}
final Configuration configuration = getConfiguration();
if (configuration == null) {
getLogger().error("HDFS not configured properly");
session.transfer(flowFile, RELATIONSHIP_FAILURE);
context.yield();
return;
}
final CompressionCodec codec = getCompressionCodec(context, configuration);
final String value = context.getProperty(COMPRESSION_TYPE).getValue();
final SequenceFile.CompressionType compressionType = value == null ? SequenceFile.CompressionType.valueOf(DEFAULT_COMPRESSION_TYPE) : SequenceFile.CompressionType.valueOf(value);
final String fileName = flowFile.getAttribute(CoreAttributes.FILENAME.key()) + ".sf";
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
try {
StopWatch stopWatch = new StopWatch(true);
flowFile = sequenceFileWriter.writeSequenceFile(flowFile, session, configuration, compressionType, codec);
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(flowFile, RELATIONSHIP_SUCCESS);
getLogger().info("Transferred flowfile {} to {}", new Object[] { flowFile, RELATIONSHIP_SUCCESS });
} catch (ProcessException e) {
getLogger().error("Failed to create Sequence File. Transferring {} to 'failure'", new Object[] { flowFile }, e);
session.transfer(flowFile, RELATIONSHIP_FAILURE);
}
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class FetchHDFS method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final FileSystem hdfs = getFileSystem();
final UserGroupInformation ugi = getUserGroupInformation();
final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(flowFile).getValue();
final Path path;
try {
path = new Path(filenameValue);
} catch (IllegalArgumentException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, flowFile, e });
flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
}
final StopWatch stopWatch = new StopWatch(true);
final FlowFile finalFlowFile = flowFile;
ugi.doAs(new PrivilegedAction<Object>() {
@Override
public Object run() {
InputStream stream = null;
CompressionCodec codec = null;
Configuration conf = getConfiguration();
final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
final CompressionType compressionType = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).toString());
final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;
if (inferCompressionCodec) {
codec = compressionCodecFactory.getCodec(path);
} else if (compressionType != CompressionType.NONE) {
codec = getCompressionCodec(context, getConfiguration());
}
FlowFile flowFile = finalFlowFile;
final Path qualifiedPath = path.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
try {
final String outputFilename;
final String originalFilename = path.getName();
stream = hdfs.open(path, 16384);
// Check if compression codec is defined (inferred or otherwise)
if (codec != null) {
stream = codec.createInputStream(stream);
outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
} else {
outputFilename = originalFilename;
}
flowFile = session.importFrom(stream, finalFlowFile);
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);
stopWatch.stop();
getLogger().info("Successfully received content from {} for {} in {}", new Object[] { qualifiedPath, flowFile, stopWatch.getDuration() });
session.getProvenanceReporter().fetch(flowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);
} catch (final FileNotFoundException | AccessControlException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { qualifiedPath, flowFile, e });
flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
} catch (final IOException e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to comms.failure", new Object[] { qualifiedPath, flowFile, e });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_COMMS_FAILURE);
} finally {
IOUtils.closeQuietly(stream);
}
return null;
}
});
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class GetHDFS method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
int batchSize = context.getProperty(BATCH_SIZE).asInteger();
final List<Path> files = new ArrayList<>(batchSize);
// retrieve new file names from HDFS and place them into work queue
if (filePathQueue.size() < MAX_WORKING_QUEUE_SIZE / 2) {
try {
final StopWatch stopWatch = new StopWatch(true);
Set<Path> listedFiles = performListing(context);
stopWatch.stop();
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
if (listedFiles != null) {
// place files into the work queue
int newItems = 0;
queueLock.lock();
try {
for (Path file : listedFiles) {
if (!filePathQueue.contains(file) && !processing.contains(file)) {
if (!filePathQueue.offer(file)) {
break;
}
newItems++;
}
}
} catch (Exception e) {
getLogger().warn("Could not add to processing queue due to {}", new Object[] { e });
} finally {
queueLock.unlock();
}
if (listedFiles.size() > 0) {
logEmptyListing.set(3L);
}
if (logEmptyListing.getAndDecrement() > 0) {
getLogger().info("Obtained file listing in {} milliseconds; listing had {} items, {} of which were new", new Object[] { millis, listedFiles.size(), newItems });
}
}
} catch (IOException e) {
context.yield();
getLogger().warn("Error while retrieving list of files due to {}", new Object[] { e });
return;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
context.yield();
getLogger().warn("Interrupted while retrieving files", e);
return;
}
}
// prepare to process a batch of files in the queue
queueLock.lock();
try {
filePathQueue.drainTo(files, batchSize);
if (files.isEmpty()) {
// nothing to do!
context.yield();
return;
}
processing.addAll(files);
} finally {
queueLock.unlock();
}
processBatchOfFiles(files, context, session);
queueLock.lock();
try {
processing.removeAll(files);
} finally {
queueLock.unlock();
}
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class GetHDFSSequenceFile method processBatchOfFiles.
@Override
protected void processBatchOfFiles(final List<Path> files, final ProcessContext context, final ProcessSession session) {
final Configuration conf = getConfiguration();
final FileSystem hdfs = getFileSystem();
final String flowFileContentValue = context.getProperty(FLOWFILE_CONTENT).getValue();
final boolean keepSourceFiles = context.getProperty(KEEP_SOURCE_FILE).asBoolean();
final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
if (bufferSizeProp != null) {
int bufferSize = bufferSizeProp.intValue();
conf.setInt(BUFFER_SIZE_KEY, bufferSize);
}
ComponentLog logger = getLogger();
final SequenceFileReader<Set<FlowFile>> reader;
if (flowFileContentValue.equalsIgnoreCase(VALUE_ONLY)) {
reader = new ValueReader(session);
} else {
reader = new KeyValueReader(session);
}
Set<FlowFile> flowFiles = Collections.emptySet();
for (final Path file : files) {
if (!this.isScheduled()) {
// This processor should stop running immediately.
break;
}
final StopWatch stopWatch = new StopWatch(false);
try {
stopWatch.start();
if (!hdfs.exists(file)) {
// If file is no longer here move on.
continue;
}
logger.debug("Reading file");
flowFiles = getFlowFiles(conf, hdfs, reader, file);
if (!keepSourceFiles && !hdfs.delete(file, false)) {
logger.warn("Unable to delete path " + file.toString() + " from HDFS. Will likely be picked up over and over...");
}
} catch (Throwable t) {
logger.error("Error retrieving file {} from HDFS due to {}", new Object[] { file, t });
session.rollback();
context.yield();
} finally {
stopWatch.stop();
long totalSize = 0;
for (FlowFile flowFile : flowFiles) {
totalSize += flowFile.getSize();
session.getProvenanceReporter().receive(flowFile, file.toString());
}
if (totalSize > 0) {
final String dataRate = stopWatch.calculateDataRate(totalSize);
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
logger.info("Created {} flowFiles from SequenceFile {}. Ingested in {} milliseconds at a rate of {}", new Object[] { flowFiles.size(), file.toUri().toASCIIString(), millis, dataRate });
logger.info("Transferred flowFiles {} to success", new Object[] { flowFiles });
session.transfer(flowFiles, REL_SUCCESS);
}
}
}
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class MoveHDFS method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
// MoveHDFS
FlowFile parentFlowFile = session.get();
if (parentFlowFile == null) {
return;
}
final FileSystem hdfs = getFileSystem();
final String filenameValue = context.getProperty(INPUT_DIRECTORY_OR_FILE).evaluateAttributeExpressions(parentFlowFile).getValue();
Path inputPath = null;
try {
inputPath = new Path(filenameValue);
if (!hdfs.exists(inputPath)) {
throw new IOException("Input Directory or File does not exist in HDFS");
}
} catch (Exception e) {
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, parentFlowFile, e });
parentFlowFile = session.putAttribute(parentFlowFile, "hdfs.failure.reason", e.getMessage());
parentFlowFile = session.penalize(parentFlowFile);
session.transfer(parentFlowFile, REL_FAILURE);
return;
}
List<Path> files = new ArrayList<Path>();
try {
final StopWatch stopWatch = new StopWatch(true);
Set<Path> listedFiles = performListing(context, inputPath);
stopWatch.stop();
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
if (listedFiles != null) {
// place files into the work queue
int newItems = 0;
queueLock.lock();
try {
for (Path file : listedFiles) {
if (!filePathQueue.contains(file) && !processing.contains(file)) {
if (!filePathQueue.offer(file)) {
break;
}
newItems++;
}
}
} catch (Exception e) {
getLogger().warn("Could not add to processing queue due to {}", new Object[] { e.getMessage() }, e);
} finally {
queueLock.unlock();
}
if (listedFiles.size() > 0) {
logEmptyListing.set(3L);
}
if (logEmptyListing.getAndDecrement() > 0) {
getLogger().info("Obtained file listing in {} milliseconds; listing had {} items, {} of which were new", new Object[] { millis, listedFiles.size(), newItems });
}
}
} catch (IOException e) {
context.yield();
getLogger().warn("Error while retrieving list of files due to {}", new Object[] { e });
return;
}
// prepare to process a batch of files in the queue
queueLock.lock();
try {
filePathQueue.drainTo(files);
if (files.isEmpty()) {
// nothing to do!
session.remove(parentFlowFile);
context.yield();
return;
}
} finally {
queueLock.unlock();
}
processBatchOfFiles(files, context, session, parentFlowFile);
queueLock.lock();
try {
processing.removeAll(files);
} finally {
queueLock.unlock();
}
session.remove(parentFlowFile);
}
Aggregations