Search in sources :

Example 66 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class CreateHadoopSequenceFile method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    String mimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
    String packagingFormat = NOT_PACKAGED;
    if (null != mimeType) {
        switch(mimeType.toLowerCase()) {
            case "application/tar":
                packagingFormat = TAR_FORMAT;
                break;
            case "application/zip":
                packagingFormat = ZIP_FORMAT;
                break;
            case "application/flowfile-v3":
                packagingFormat = FLOWFILE_STREAM_FORMAT_V3;
                break;
            default:
                getLogger().warn("Cannot unpack {} because its mime.type attribute is set to '{}', which is not a format that can be unpacked", new Object[] { flowFile, mimeType });
        }
    }
    final SequenceFileWriter sequenceFileWriter;
    switch(packagingFormat) {
        case TAR_FORMAT:
            sequenceFileWriter = new TarUnpackerSequenceFileWriter();
            break;
        case ZIP_FORMAT:
            sequenceFileWriter = new ZipUnpackerSequenceFileWriter();
            break;
        case FLOWFILE_STREAM_FORMAT_V3:
            sequenceFileWriter = new FlowFileStreamUnpackerSequenceFileWriter();
            break;
        default:
            sequenceFileWriter = new SequenceFileWriterImpl();
    }
    final Configuration configuration = getConfiguration();
    if (configuration == null) {
        getLogger().error("HDFS not configured properly");
        session.transfer(flowFile, RELATIONSHIP_FAILURE);
        context.yield();
        return;
    }
    final CompressionCodec codec = getCompressionCodec(context, configuration);
    final String value = context.getProperty(COMPRESSION_TYPE).getValue();
    final SequenceFile.CompressionType compressionType = value == null ? SequenceFile.CompressionType.valueOf(DEFAULT_COMPRESSION_TYPE) : SequenceFile.CompressionType.valueOf(value);
    final String fileName = flowFile.getAttribute(CoreAttributes.FILENAME.key()) + ".sf";
    flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
    try {
        StopWatch stopWatch = new StopWatch(true);
        flowFile = sequenceFileWriter.writeSequenceFile(flowFile, session, configuration, compressionType, codec);
        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, RELATIONSHIP_SUCCESS);
        getLogger().info("Transferred flowfile {} to {}", new Object[] { flowFile, RELATIONSHIP_SUCCESS });
    } catch (ProcessException e) {
        getLogger().error("Failed to create Sequence File. Transferring {} to 'failure'", new Object[] { flowFile }, e);
        session.transfer(flowFile, RELATIONSHIP_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) SequenceFileWriter(org.apache.nifi.processors.hadoop.util.SequenceFileWriter) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) SequenceFile(org.apache.hadoop.io.SequenceFile) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 67 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class FetchHDFS method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final FileSystem hdfs = getFileSystem();
    final UserGroupInformation ugi = getUserGroupInformation();
    final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(flowFile).getValue();
    final Path path;
    try {
        path = new Path(filenameValue);
    } catch (IllegalArgumentException e) {
        getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, flowFile, e });
        flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    final FlowFile finalFlowFile = flowFile;
    ugi.doAs(new PrivilegedAction<Object>() {

        @Override
        public Object run() {
            InputStream stream = null;
            CompressionCodec codec = null;
            Configuration conf = getConfiguration();
            final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
            final CompressionType compressionType = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).toString());
            final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;
            if (inferCompressionCodec) {
                codec = compressionCodecFactory.getCodec(path);
            } else if (compressionType != CompressionType.NONE) {
                codec = getCompressionCodec(context, getConfiguration());
            }
            FlowFile flowFile = finalFlowFile;
            final Path qualifiedPath = path.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
            try {
                final String outputFilename;
                final String originalFilename = path.getName();
                stream = hdfs.open(path, 16384);
                // Check if compression codec is defined (inferred or otherwise)
                if (codec != null) {
                    stream = codec.createInputStream(stream);
                    outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
                } else {
                    outputFilename = originalFilename;
                }
                flowFile = session.importFrom(stream, finalFlowFile);
                flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);
                stopWatch.stop();
                getLogger().info("Successfully received content from {} for {} in {}", new Object[] { qualifiedPath, flowFile, stopWatch.getDuration() });
                session.getProvenanceReporter().fetch(flowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
                session.transfer(flowFile, REL_SUCCESS);
            } catch (final FileNotFoundException | AccessControlException e) {
                getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { qualifiedPath, flowFile, e });
                flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_FAILURE);
            } catch (final IOException e) {
                getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to comms.failure", new Object[] { qualifiedPath, flowFile, e });
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_COMMS_FAILURE);
            } finally {
                IOUtils.closeQuietly(stream);
            }
            return null;
        }
    });
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) InputStream(java.io.InputStream) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 68 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class GetHDFS method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    int batchSize = context.getProperty(BATCH_SIZE).asInteger();
    final List<Path> files = new ArrayList<>(batchSize);
    // retrieve new file names from HDFS and place them into work queue
    if (filePathQueue.size() < MAX_WORKING_QUEUE_SIZE / 2) {
        try {
            final StopWatch stopWatch = new StopWatch(true);
            Set<Path> listedFiles = performListing(context);
            stopWatch.stop();
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
            if (listedFiles != null) {
                // place files into the work queue
                int newItems = 0;
                queueLock.lock();
                try {
                    for (Path file : listedFiles) {
                        if (!filePathQueue.contains(file) && !processing.contains(file)) {
                            if (!filePathQueue.offer(file)) {
                                break;
                            }
                            newItems++;
                        }
                    }
                } catch (Exception e) {
                    getLogger().warn("Could not add to processing queue due to {}", new Object[] { e });
                } finally {
                    queueLock.unlock();
                }
                if (listedFiles.size() > 0) {
                    logEmptyListing.set(3L);
                }
                if (logEmptyListing.getAndDecrement() > 0) {
                    getLogger().info("Obtained file listing in {} milliseconds; listing had {} items, {} of which were new", new Object[] { millis, listedFiles.size(), newItems });
                }
            }
        } catch (IOException e) {
            context.yield();
            getLogger().warn("Error while retrieving list of files due to {}", new Object[] { e });
            return;
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            context.yield();
            getLogger().warn("Interrupted while retrieving files", e);
            return;
        }
    }
    // prepare to process a batch of files in the queue
    queueLock.lock();
    try {
        filePathQueue.drainTo(files, batchSize);
        if (files.isEmpty()) {
            // nothing to do!
            context.yield();
            return;
        }
        processing.addAll(files);
    } finally {
        queueLock.unlock();
    }
    processBatchOfFiles(files, context, session);
    queueLock.lock();
    try {
        processing.removeAll(files);
    } finally {
        queueLock.unlock();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch)

Example 69 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class GetHDFSSequenceFile method processBatchOfFiles.

@Override
protected void processBatchOfFiles(final List<Path> files, final ProcessContext context, final ProcessSession session) {
    final Configuration conf = getConfiguration();
    final FileSystem hdfs = getFileSystem();
    final String flowFileContentValue = context.getProperty(FLOWFILE_CONTENT).getValue();
    final boolean keepSourceFiles = context.getProperty(KEEP_SOURCE_FILE).asBoolean();
    final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
    if (bufferSizeProp != null) {
        int bufferSize = bufferSizeProp.intValue();
        conf.setInt(BUFFER_SIZE_KEY, bufferSize);
    }
    ComponentLog logger = getLogger();
    final SequenceFileReader<Set<FlowFile>> reader;
    if (flowFileContentValue.equalsIgnoreCase(VALUE_ONLY)) {
        reader = new ValueReader(session);
    } else {
        reader = new KeyValueReader(session);
    }
    Set<FlowFile> flowFiles = Collections.emptySet();
    for (final Path file : files) {
        if (!this.isScheduled()) {
            // This processor should stop running immediately.
            break;
        }
        final StopWatch stopWatch = new StopWatch(false);
        try {
            stopWatch.start();
            if (!hdfs.exists(file)) {
                // If file is no longer here move on.
                continue;
            }
            logger.debug("Reading file");
            flowFiles = getFlowFiles(conf, hdfs, reader, file);
            if (!keepSourceFiles && !hdfs.delete(file, false)) {
                logger.warn("Unable to delete path " + file.toString() + " from HDFS.  Will likely be picked up over and over...");
            }
        } catch (Throwable t) {
            logger.error("Error retrieving file {} from HDFS due to {}", new Object[] { file, t });
            session.rollback();
            context.yield();
        } finally {
            stopWatch.stop();
            long totalSize = 0;
            for (FlowFile flowFile : flowFiles) {
                totalSize += flowFile.getSize();
                session.getProvenanceReporter().receive(flowFile, file.toString());
            }
            if (totalSize > 0) {
                final String dataRate = stopWatch.calculateDataRate(totalSize);
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                logger.info("Created {} flowFiles from SequenceFile {}. Ingested in {} milliseconds at a rate of {}", new Object[] { flowFiles.size(), file.toUri().toASCIIString(), millis, dataRate });
                logger.info("Transferred flowFiles {}  to success", new Object[] { flowFiles });
                session.transfer(flowFiles, REL_SUCCESS);
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Set(java.util.Set) Configuration(org.apache.hadoop.conf.Configuration) ComponentLog(org.apache.nifi.logging.ComponentLog) StopWatch(org.apache.nifi.util.StopWatch) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 70 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class MoveHDFS method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    // MoveHDFS
    FlowFile parentFlowFile = session.get();
    if (parentFlowFile == null) {
        return;
    }
    final FileSystem hdfs = getFileSystem();
    final String filenameValue = context.getProperty(INPUT_DIRECTORY_OR_FILE).evaluateAttributeExpressions(parentFlowFile).getValue();
    Path inputPath = null;
    try {
        inputPath = new Path(filenameValue);
        if (!hdfs.exists(inputPath)) {
            throw new IOException("Input Directory or File does not exist in HDFS");
        }
    } catch (Exception e) {
        getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, parentFlowFile, e });
        parentFlowFile = session.putAttribute(parentFlowFile, "hdfs.failure.reason", e.getMessage());
        parentFlowFile = session.penalize(parentFlowFile);
        session.transfer(parentFlowFile, REL_FAILURE);
        return;
    }
    List<Path> files = new ArrayList<Path>();
    try {
        final StopWatch stopWatch = new StopWatch(true);
        Set<Path> listedFiles = performListing(context, inputPath);
        stopWatch.stop();
        final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
        if (listedFiles != null) {
            // place files into the work queue
            int newItems = 0;
            queueLock.lock();
            try {
                for (Path file : listedFiles) {
                    if (!filePathQueue.contains(file) && !processing.contains(file)) {
                        if (!filePathQueue.offer(file)) {
                            break;
                        }
                        newItems++;
                    }
                }
            } catch (Exception e) {
                getLogger().warn("Could not add to processing queue due to {}", new Object[] { e.getMessage() }, e);
            } finally {
                queueLock.unlock();
            }
            if (listedFiles.size() > 0) {
                logEmptyListing.set(3L);
            }
            if (logEmptyListing.getAndDecrement() > 0) {
                getLogger().info("Obtained file listing in {} milliseconds; listing had {} items, {} of which were new", new Object[] { millis, listedFiles.size(), newItems });
            }
        }
    } catch (IOException e) {
        context.yield();
        getLogger().warn("Error while retrieving list of files due to {}", new Object[] { e });
        return;
    }
    // prepare to process a batch of files in the queue
    queueLock.lock();
    try {
        filePathQueue.drainTo(files);
        if (files.isEmpty()) {
            // nothing to do!
            session.remove(parentFlowFile);
            context.yield();
            return;
        }
    } finally {
        queueLock.unlock();
    }
    processBatchOfFiles(files, context, session, parentFlowFile);
    queueLock.lock();
    try {
        processing.removeAll(files);
    } finally {
        queueLock.unlock();
    }
    session.remove(parentFlowFile);
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) StopWatch(org.apache.nifi.util.StopWatch) FileSystem(org.apache.hadoop.fs.FileSystem)

Aggregations

StopWatch (org.apache.nifi.util.StopWatch)72 FlowFile (org.apache.nifi.flowfile.FlowFile)59 IOException (java.io.IOException)41 ProcessException (org.apache.nifi.processor.exception.ProcessException)37 InputStream (java.io.InputStream)27 ComponentLog (org.apache.nifi.logging.ComponentLog)27 OutputStream (java.io.OutputStream)21 HashMap (java.util.HashMap)16 ArrayList (java.util.ArrayList)13 Map (java.util.Map)11 ProcessSession (org.apache.nifi.processor.ProcessSession)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)10 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)10 StreamCallback (org.apache.nifi.processor.io.StreamCallback)10 HashSet (java.util.HashSet)9 Path (org.apache.hadoop.fs.Path)9 Charset (java.nio.charset.Charset)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)8