Search in sources :

Example 26 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class ExtractGrok method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue());
    final String contentString;
    byte[] buffer = bufferQueue.poll();
    if (buffer == null) {
        final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
        buffer = new byte[maxBufferSize];
    }
    try {
        final byte[] byteBuffer = buffer;
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
                StreamUtils.fillBuffer(in, byteBuffer, false);
            }
        });
        final long len = Math.min(byteBuffer.length, flowFile.getSize());
        contentString = new String(byteBuffer, 0, (int) len, charset);
    } finally {
        bufferQueue.offer(buffer);
    }
    final Match gm = grok.match(contentString);
    gm.captures();
    if (gm.toMap().isEmpty()) {
        session.transfer(flowFile, REL_NO_MATCH);
        getLogger().info("Did not match any Grok Expressions for FlowFile {}", new Object[] { flowFile });
        return;
    }
    final ObjectMapper objectMapper = new ObjectMapper();
    switch(context.getProperty(DESTINATION).getValue()) {
        case FLOWFILE_ATTRIBUTE:
            Map<String, String> grokResults = new HashMap<>();
            for (Map.Entry<String, Object> entry : gm.toMap().entrySet()) {
                if (null != entry.getValue()) {
                    grokResults.put("grok." + entry.getKey(), entry.getValue().toString());
                }
            }
            flowFile = session.putAllAttributes(flowFile, grokResults);
            session.getProvenanceReporter().modifyAttributes(flowFile);
            session.transfer(flowFile, REL_MATCH);
            getLogger().info("Matched {} Grok Expressions and added attributes to FlowFile {}", new Object[] { grokResults.size(), flowFile });
            break;
        case FLOWFILE_CONTENT:
            FlowFile conFlowfile = session.write(flowFile, new StreamCallback() {

                @Override
                public void process(InputStream in, OutputStream out) throws IOException {
                    out.write(objectMapper.writeValueAsBytes(gm.toMap()));
                }
            });
            conFlowfile = session.putAttribute(conFlowfile, CoreAttributes.MIME_TYPE.key(), APPLICATION_JSON);
            session.getProvenanceReporter().modifyContent(conFlowfile, "Replaced content with parsed Grok fields and values", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
            session.transfer(conFlowfile, REL_MATCH);
            break;
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) Charset(java.nio.charset.Charset) IOException(java.io.IOException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) StreamCallback(org.apache.nifi.processor.io.StreamCallback) StopWatch(org.apache.nifi.util.StopWatch) Match(io.thekraken.grok.api.Match) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 27 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class GetFileTransfer method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final long pollingIntervalMillis = context.getProperty(FileTransfer.POLLING_INTERVAL).asTimePeriod(TimeUnit.MILLISECONDS);
    final long nextPollTime = lastPollTime.get() + pollingIntervalMillis;
    BlockingQueue<FileInfo> fileQueue = fileQueueRef.get();
    final ComponentLog logger = getLogger();
    // do not do the listing if there are already 100 or more items in our queue
    // 100 is really just a magic number that seems to work out well in practice
    FileTransfer transfer = null;
    if (System.currentTimeMillis() >= nextPollTime && (fileQueue == null || fileQueue.size() < 100) && listingLock.tryLock()) {
        try {
            transfer = getFileTransfer(context);
            try {
                fetchListing(context, session, transfer);
                lastPollTime.set(System.currentTimeMillis());
            } catch (final IOException e) {
                context.yield();
                try {
                    transfer.close();
                } catch (final IOException e1) {
                    logger.warn("Unable to close connection due to {}", new Object[] { e1 });
                }
                logger.error("Unable to fetch listing from remote server due to {}", new Object[] { e });
                return;
            }
        } finally {
            listingLock.unlock();
        }
    }
    fileQueue = fileQueueRef.get();
    if (fileQueue == null || fileQueue.isEmpty()) {
        // nothing to do!
        context.yield();
        if (transfer != null) {
            try {
                transfer.close();
            } catch (final IOException e1) {
                logger.warn("Unable to close connection due to {}", new Object[] { e1 });
            }
        }
        return;
    }
    final String hostname = context.getProperty(FileTransfer.HOSTNAME).evaluateAttributeExpressions().getValue();
    final boolean deleteOriginal = context.getProperty(FileTransfer.DELETE_ORIGINAL).asBoolean();
    final int maxSelects = context.getProperty(FileTransfer.MAX_SELECTS).asInteger();
    if (transfer == null) {
        transfer = getFileTransfer(context);
    }
    try {
        for (int i = 0; i < maxSelects && isScheduled(); i++) {
            final FileInfo file;
            sharableTransferLock.lock();
            try {
                file = fileQueue.poll();
                if (file == null) {
                    return;
                }
                processing.add(file);
            } finally {
                sharableTransferLock.unlock();
            }
            File relativeFile = new File(file.getFullPathFileName());
            final String parentRelativePath = (null == relativeFile.getParent()) ? "" : relativeFile.getParent();
            final String parentRelativePathString = parentRelativePath + "/";
            final Path absPath = relativeFile.toPath().toAbsolutePath();
            final String absPathString = absPath.getParent().toString() + "/";
            try {
                FlowFile flowFile = session.create();
                final StopWatch stopWatch = new StopWatch(false);
                try (final InputStream in = transfer.getInputStream(file.getFullPathFileName())) {
                    stopWatch.start();
                    flowFile = session.importFrom(in, flowFile);
                    stopWatch.stop();
                }
                transfer.flush();
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                final String dataRate = stopWatch.calculateDataRate(flowFile.getSize());
                flowFile = session.putAttribute(flowFile, this.getClass().getSimpleName().toLowerCase() + ".remote.source", hostname);
                flowFile = session.putAttribute(flowFile, CoreAttributes.PATH.key(), parentRelativePathString);
                flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), relativeFile.getName());
                flowFile = session.putAttribute(flowFile, CoreAttributes.ABSOLUTE_PATH.key(), absPathString);
                Map<String, String> attributes = getAttributesFromFile(file);
                if (attributes.size() > 0) {
                    flowFile = session.putAllAttributes(flowFile, attributes);
                }
                if (deleteOriginal) {
                    try {
                        transfer.deleteFile(flowFile, null, file.getFullPathFileName());
                    } catch (final IOException e) {
                        logger.error("Failed to remove remote file {} due to {}; deleting local copy", new Object[] { file.getFullPathFileName(), e });
                        session.remove(flowFile);
                        return;
                    }
                }
                session.getProvenanceReporter().receive(flowFile, transfer.getProtocolName() + "://" + hostname + "/" + file.getFullPathFileName(), millis);
                session.transfer(flowFile, REL_SUCCESS);
                logger.info("Successfully retrieved {} from {} in {} milliseconds at a rate of {} and transferred to success", new Object[] { flowFile, hostname, millis, dataRate });
                session.commit();
            } catch (final IOException e) {
                context.yield();
                logger.error("Unable to retrieve file {} due to {}", new Object[] { file.getFullPathFileName(), e });
                try {
                    transfer.close();
                } catch (IOException e1) {
                    logger.warn("Unable to close connection to remote host due to {}", new Object[] { e1 });
                }
                session.rollback();
                return;
            } catch (final FlowFileAccessException e) {
                context.yield();
                logger.error("Unable to retrieve file {} due to {}", new Object[] { file.getFullPathFileName(), e.getCause() }, e);
                try {
                    transfer.close();
                } catch (IOException e1) {
                    logger.warn("Unable to close connection to remote host due to {}", e1);
                }
                session.rollback();
                return;
            } finally {
                processing.remove(file);
            }
        }
    } finally {
        try {
            transfer.close();
        } catch (final IOException e) {
            logger.warn("Failed to close connection to {} due to {}", new Object[] { hostname, e });
        }
    }
}
Also used : Path(java.nio.file.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) InputStream(java.io.InputStream) FileTransfer(org.apache.nifi.processors.standard.util.FileTransfer) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) StopWatch(org.apache.nifi.util.StopWatch) FileInfo(org.apache.nifi.processors.standard.util.FileInfo) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File)

Example 28 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class PutSolrContentStream method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final AtomicReference<Exception> error = new AtomicReference<>(null);
    final AtomicReference<Exception> connectionError = new AtomicReference<>(null);
    final boolean isSolrCloud = SOLR_TYPE_CLOUD.equals(context.getProperty(SOLR_TYPE).getValue());
    final String collection = context.getProperty(COLLECTION).evaluateAttributeExpressions(flowFile).getValue();
    final Long commitWithin = context.getProperty(COMMIT_WITHIN).evaluateAttributeExpressions(flowFile).asLong();
    final String contentStreamPath = context.getProperty(CONTENT_STREAM_PATH).evaluateAttributeExpressions(flowFile).getValue();
    final MultiMapSolrParams requestParams = new MultiMapSolrParams(getRequestParams(context, flowFile));
    StopWatch timer = new StopWatch(true);
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            ContentStreamUpdateRequest request = new ContentStreamUpdateRequest(contentStreamPath);
            request.setParams(new ModifiableSolrParams());
            // add the extra params, don't use 'set' in case of repeating params
            Iterator<String> paramNames = requestParams.getParameterNamesIterator();
            while (paramNames.hasNext()) {
                String paramName = paramNames.next();
                for (String paramValue : requestParams.getParams(paramName)) {
                    request.getParams().add(paramName, paramValue);
                }
            }
            // specify the collection for SolrCloud
            if (isSolrCloud) {
                request.setParam(COLLECTION_PARAM_NAME, collection);
            }
            if (commitWithin != null && commitWithin > 0) {
                request.setParam(COMMIT_WITHIN_PARAM_NAME, commitWithin.toString());
            }
            // if a username and password were provided then pass them for basic auth
            if (isBasicAuthEnabled()) {
                request.setBasicAuthCredentials(getUsername(), getPassword());
            }
            try (final BufferedInputStream bufferedIn = new BufferedInputStream(in)) {
                // add the FlowFile's content on the UpdateRequest
                request.addContentStream(new ContentStreamBase() {

                    @Override
                    public InputStream getStream() throws IOException {
                        return bufferedIn;
                    }

                    @Override
                    public String getContentType() {
                        return context.getProperty(CONTENT_TYPE).evaluateAttributeExpressions().getValue();
                    }
                });
                UpdateResponse response = request.process(getSolrClient());
                getLogger().debug("Got {} response from Solr", new Object[] { response.getStatus() });
            } catch (SolrException e) {
                error.set(e);
            } catch (SolrServerException e) {
                if (causedByIOException(e)) {
                    connectionError.set(e);
                } else {
                    error.set(e);
                }
            } catch (IOException e) {
                connectionError.set(e);
            }
        }
    });
    timer.stop();
    if (error.get() != null) {
        getLogger().error("Failed to send {} to Solr due to {}; routing to failure", new Object[] { flowFile, error.get() });
        session.transfer(flowFile, REL_FAILURE);
    } else if (connectionError.get() != null) {
        getLogger().error("Failed to send {} to Solr due to {}; routing to connection_failure", new Object[] { flowFile, connectionError.get() });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_CONNECTION_FAILURE);
    } else {
        StringBuilder transitUri = new StringBuilder("solr://");
        transitUri.append(getSolrLocation());
        if (isSolrCloud) {
            transitUri.append(":").append(collection);
        }
        final long duration = timer.getDuration(TimeUnit.MILLISECONDS);
        session.getProvenanceReporter().send(flowFile, transitUri.toString(), duration, true);
        getLogger().info("Successfully sent {} to Solr in {} millis", new Object[] { flowFile, duration });
        session.transfer(flowFile, REL_SUCCESS);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MultiMapSolrParams(org.apache.solr.common.params.MultiMapSolrParams) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) SolrServerException(org.apache.solr.client.solrj.SolrServerException) AtomicReference(java.util.concurrent.atomic.AtomicReference) ContentStreamUpdateRequest(org.apache.solr.client.solrj.request.ContentStreamUpdateRequest) IOException(java.io.IOException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) StopWatch(org.apache.nifi.util.StopWatch) UpdateResponse(org.apache.solr.client.solrj.response.UpdateResponse) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) Iterator(java.util.Iterator) SolrException(org.apache.solr.common.SolrException) ContentStreamBase(org.apache.solr.common.util.ContentStreamBase)

Example 29 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class IPLookupService method loadDatabase.

private void loadDatabase(final File dbFile, final String dbFileChecksum) throws IOException {
    final StopWatch stopWatch = new StopWatch(true);
    final DatabaseReader reader = new DatabaseReader.Builder(dbFile).build();
    stopWatch.stop();
    getLogger().info("Completed loading of Maxmind Database.  Elapsed time was {} milliseconds.", new Object[] { stopWatch.getDuration(TimeUnit.MILLISECONDS) });
    databaseReader = reader;
    databaseChecksum = dbFileChecksum;
}
Also used : StopWatch(org.apache.nifi.util.StopWatch)

Example 30 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class GetHDFS method processBatchOfFiles.

protected void processBatchOfFiles(final List<Path> files, final ProcessContext context, final ProcessSession session) {
    // process the batch of files
    InputStream stream = null;
    CompressionCodec codec = null;
    Configuration conf = getConfiguration();
    FileSystem hdfs = getFileSystem();
    final boolean keepSourceFiles = context.getProperty(KEEP_SOURCE_FILE).asBoolean();
    final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
    int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : conf.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT);
    final Path rootDir = new Path(context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue());
    final CompressionType compressionType = CompressionType.valueOf(context.getProperty(COMPRESSION_CODEC).toString());
    final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;
    if (inferCompressionCodec || compressionType != CompressionType.NONE) {
        codec = getCompressionCodec(context, getConfiguration());
    }
    final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
    for (final Path file : files) {
        try {
            if (!getUserGroupInformation().doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.exists(file))) {
                // if file is no longer there then move on
                continue;
            }
            final String originalFilename = file.getName();
            final String relativePath = getPathDifference(rootDir, file);
            stream = getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> hdfs.open(file, bufferSize));
            final String outputFilename;
            // Check if we should infer compression codec
            if (inferCompressionCodec) {
                codec = compressionCodecFactory.getCodec(file);
            }
            // Check if compression codec is defined (inferred or otherwise)
            if (codec != null) {
                stream = codec.createInputStream(stream);
                outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
            } else {
                outputFilename = originalFilename;
            }
            FlowFile flowFile = session.create();
            final StopWatch stopWatch = new StopWatch(true);
            flowFile = session.importFrom(stream, flowFile);
            stopWatch.stop();
            final String dataRate = stopWatch.calculateDataRate(flowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
            flowFile = session.putAttribute(flowFile, CoreAttributes.PATH.key(), relativePath.isEmpty() ? "." : relativePath);
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);
            if (!keepSourceFiles && !getUserGroupInformation().doAs((PrivilegedExceptionAction<Boolean>) () -> hdfs.delete(file, false))) {
                getLogger().warn("Could not remove {} from HDFS. Not ingesting this file ...", new Object[] { file });
                session.remove(flowFile);
                continue;
            }
            session.getProvenanceReporter().receive(flowFile, file.toString());
            session.transfer(flowFile, REL_SUCCESS);
            getLogger().info("retrieved {} from HDFS {} in {} milliseconds at a rate of {}", new Object[] { flowFile, file, millis, dataRate });
            session.commit();
        } catch (final Throwable t) {
            getLogger().error("Error retrieving file {} from HDFS due to {}", new Object[] { file, t });
            session.rollback();
            context.yield();
        } finally {
            IOUtils.closeQuietly(stream);
            stream = null;
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputStream(java.io.InputStream) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) StopWatch(org.apache.nifi.util.StopWatch) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Aggregations

StopWatch (org.apache.nifi.util.StopWatch)72 FlowFile (org.apache.nifi.flowfile.FlowFile)59 IOException (java.io.IOException)41 ProcessException (org.apache.nifi.processor.exception.ProcessException)37 InputStream (java.io.InputStream)27 ComponentLog (org.apache.nifi.logging.ComponentLog)27 OutputStream (java.io.OutputStream)21 HashMap (java.util.HashMap)16 ArrayList (java.util.ArrayList)13 Map (java.util.Map)11 ProcessSession (org.apache.nifi.processor.ProcessSession)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)10 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)10 StreamCallback (org.apache.nifi.processor.io.StreamCallback)10 HashSet (java.util.HashSet)9 Path (org.apache.hadoop.fs.Path)9 Charset (java.nio.charset.Charset)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)8