Search in sources :

Example 6 with FlowFileFilter

use of org.apache.nifi.processor.FlowFileFilter in project nifi by apache.

the class PostHTTP method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile firstFlowFile = session.get();
    if (firstFlowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final String url = context.getProperty(URL).evaluateAttributeExpressions(firstFlowFile).getValue();
    try {
        new java.net.URL(url);
    } catch (final MalformedURLException e) {
        logger.error("After substituting attribute values for {}, URL is {}; this is not a valid URL, so routing to failure", new Object[] { firstFlowFile, url });
        firstFlowFile = session.penalize(firstFlowFile);
        session.transfer(firstFlowFile, REL_FAILURE);
        return;
    }
    final List<FlowFile> toSend = new ArrayList<>();
    toSend.add(firstFlowFile);
    final boolean sendAsFlowFile = context.getProperty(SEND_AS_FLOWFILE).asBoolean();
    final int compressionLevel = context.getProperty(COMPRESSION_LEVEL).asInteger();
    final String userAgent = context.getProperty(USER_AGENT).getValue();
    final RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
    requestConfigBuilder.setConnectionRequestTimeout(context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
    requestConfigBuilder.setConnectTimeout(context.getProperty(CONNECTION_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
    requestConfigBuilder.setRedirectsEnabled(false);
    requestConfigBuilder.setSocketTimeout(context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
    final RequestConfig requestConfig = requestConfigBuilder.build();
    final StreamThrottler throttler = throttlerRef.get();
    final Double maxBatchBytes = context.getProperty(MAX_BATCH_SIZE).asDataSize(DataUnit.B);
    final AtomicLong bytesToSend = new AtomicLong(firstFlowFile.getSize());
    DestinationAccepts destinationAccepts = null;
    CloseableHttpClient client = null;
    final String transactionId = UUID.randomUUID().toString();
    final AtomicReference<String> dnHolder = new AtomicReference<>("none");
    final Config config = getConfig(url, context);
    final HttpClientConnectionManager conMan = config.getConnectionManager();
    final HttpClientBuilder clientBuilder = HttpClientBuilder.create();
    clientBuilder.setConnectionManager(conMan);
    clientBuilder.setUserAgent(userAgent);
    clientBuilder.addInterceptorFirst(new HttpResponseInterceptor() {

        @Override
        public void process(final HttpResponse response, final HttpContext httpContext) throws HttpException, IOException {
            final HttpCoreContext coreContext = HttpCoreContext.adapt(httpContext);
            final ManagedHttpClientConnection conn = coreContext.getConnection(ManagedHttpClientConnection.class);
            if (!conn.isOpen()) {
                return;
            }
            final SSLSession sslSession = conn.getSSLSession();
            if (sslSession != null) {
                final Certificate[] certChain = sslSession.getPeerCertificates();
                if (certChain == null || certChain.length == 0) {
                    throw new SSLPeerUnverifiedException("No certificates found");
                }
                try {
                    final X509Certificate cert = CertificateUtils.convertAbstractX509Certificate(certChain[0]);
                    dnHolder.set(cert.getSubjectDN().getName().trim());
                } catch (CertificateException e) {
                    final String msg = "Could not extract subject DN from SSL session peer certificate";
                    logger.warn(msg);
                    throw new SSLPeerUnverifiedException(msg);
                }
            }
        }
    });
    clientBuilder.disableAutomaticRetries();
    clientBuilder.disableContentCompression();
    final String username = context.getProperty(USERNAME).getValue();
    final String password = context.getProperty(PASSWORD).getValue();
    // set the credentials if appropriate
    if (username != null) {
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        if (password == null) {
            credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username));
        } else {
            credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password));
        }
        clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
    }
    // Set the proxy if specified
    if (context.getProperty(PROXY_HOST).isSet() && context.getProperty(PROXY_PORT).isSet()) {
        final String host = context.getProperty(PROXY_HOST).getValue();
        final int port = context.getProperty(PROXY_PORT).asInteger();
        clientBuilder.setProxy(new HttpHost(host, port));
    }
    client = clientBuilder.build();
    // determine whether or not destination accepts flowfile/gzip
    destinationAccepts = config.getDestinationAccepts();
    if (destinationAccepts == null) {
        try {
            destinationAccepts = getDestinationAcceptance(sendAsFlowFile, client, url, getLogger(), transactionId);
            config.setDestinationAccepts(destinationAccepts);
        } catch (final IOException e) {
            firstFlowFile = session.penalize(firstFlowFile);
            session.transfer(firstFlowFile, REL_FAILURE);
            logger.error("Unable to communicate with destination {} to determine whether or not it can accept " + "flowfiles/gzip; routing {} to failure due to {}", new Object[] { url, firstFlowFile, e });
            context.yield();
            return;
        }
    }
    // then we can get more flowfiles from the session up to MAX_BATCH_SIZE for the same URL
    if (sendAsFlowFile && (destinationAccepts.isFlowFileV3Accepted() || destinationAccepts.isFlowFileV2Accepted())) {
        toSend.addAll(session.get(new FlowFileFilter() {

            @Override
            public FlowFileFilterResult filter(FlowFile flowFile) {
                // if over MAX_BATCH_SIZE, then stop adding files
                if (bytesToSend.get() + flowFile.getSize() > maxBatchBytes) {
                    return FlowFileFilterResult.REJECT_AND_TERMINATE;
                }
                // check URL to see if this flowfile can be included in the batch
                final String urlToCheck = context.getProperty(URL).evaluateAttributeExpressions(flowFile).getValue();
                if (url.equals(urlToCheck)) {
                    bytesToSend.addAndGet(flowFile.getSize());
                    return FlowFileFilterResult.ACCEPT_AND_CONTINUE;
                } else {
                    return FlowFileFilterResult.REJECT_AND_CONTINUE;
                }
            }
        }));
    }
    final HttpPost post = new HttpPost(url);
    final DestinationAccepts accepts = destinationAccepts;
    final boolean isDestinationLegacyNiFi = accepts.getProtocolVersion() == null;
    final EntityTemplate entity = new EntityTemplate(new ContentProducer() {

        @Override
        public void writeTo(final OutputStream rawOut) throws IOException {
            final OutputStream throttled = throttler == null ? rawOut : throttler.newThrottledOutputStream(rawOut);
            OutputStream wrappedOut = new BufferedOutputStream(throttled);
            if (compressionLevel > 0 && accepts.isGzipAccepted()) {
                wrappedOut = new GZIPOutputStream(wrappedOut, compressionLevel);
            }
            try (final OutputStream out = wrappedOut) {
                for (final FlowFile flowFile : toSend) {
                    session.read(flowFile, new InputStreamCallback() {

                        @Override
                        public void process(final InputStream rawIn) throws IOException {
                            try (final InputStream in = new BufferedInputStream(rawIn)) {
                                FlowFilePackager packager = null;
                                if (!sendAsFlowFile) {
                                    packager = null;
                                } else if (accepts.isFlowFileV3Accepted()) {
                                    packager = new FlowFilePackagerV3();
                                } else if (accepts.isFlowFileV2Accepted()) {
                                    packager = new FlowFilePackagerV2();
                                } else if (accepts.isFlowFileV1Accepted()) {
                                    packager = new FlowFilePackagerV1();
                                }
                                // formats is acceptable if sending as FlowFile.
                                if (packager == null) {
                                    StreamUtils.copy(in, out);
                                } else {
                                    final Map<String, String> flowFileAttributes;
                                    if (isDestinationLegacyNiFi) {
                                        // Old versions of NiFi expect nf.file.name and nf.file.path to indicate filename & path;
                                        // in order to maintain backward compatibility, we copy the filename & path to those attribute keys.
                                        flowFileAttributes = new HashMap<>(flowFile.getAttributes());
                                        flowFileAttributes.put("nf.file.name", flowFile.getAttribute(CoreAttributes.FILENAME.key()));
                                        flowFileAttributes.put("nf.file.path", flowFile.getAttribute(CoreAttributes.PATH.key()));
                                    } else {
                                        flowFileAttributes = flowFile.getAttributes();
                                    }
                                    packager.packageFlowFile(in, out, flowFileAttributes, flowFile.getSize());
                                }
                            }
                        }
                    });
                }
                out.flush();
            }
        }
    }) {

        @Override
        public long getContentLength() {
            if (compressionLevel == 0 && !sendAsFlowFile && !context.getProperty(CHUNKED_ENCODING).asBoolean()) {
                return toSend.get(0).getSize();
            } else {
                return -1;
            }
        }
    };
    if (context.getProperty(CHUNKED_ENCODING).isSet()) {
        entity.setChunked(context.getProperty(CHUNKED_ENCODING).asBoolean());
    }
    post.setEntity(entity);
    post.setConfig(requestConfig);
    final String contentType;
    if (sendAsFlowFile) {
        if (accepts.isFlowFileV3Accepted()) {
            contentType = APPLICATION_FLOW_FILE_V3;
        } else if (accepts.isFlowFileV2Accepted()) {
            contentType = APPLICATION_FLOW_FILE_V2;
        } else if (accepts.isFlowFileV1Accepted()) {
            contentType = APPLICATION_FLOW_FILE_V1;
        } else {
            logger.error("Cannot send data to {} because the destination does not accept FlowFiles and this processor is " + "configured to deliver FlowFiles; rolling back session", new Object[] { url });
            session.rollback();
            context.yield();
            IOUtils.closeQuietly(client);
            return;
        }
    } else {
        final String contentTypeValue = context.getProperty(CONTENT_TYPE).evaluateAttributeExpressions(toSend.get(0)).getValue();
        contentType = StringUtils.isBlank(contentTypeValue) ? DEFAULT_CONTENT_TYPE : contentTypeValue;
    }
    final String attributeHeaderRegex = context.getProperty(ATTRIBUTES_AS_HEADERS_REGEX).getValue();
    if (attributeHeaderRegex != null && !sendAsFlowFile && toSend.size() == 1) {
        final Pattern pattern = Pattern.compile(attributeHeaderRegex);
        final Map<String, String> attributes = toSend.get(0).getAttributes();
        for (final Map.Entry<String, String> entry : attributes.entrySet()) {
            final String key = entry.getKey();
            if (pattern.matcher(key).matches()) {
                post.setHeader(entry.getKey(), entry.getValue());
            }
        }
    }
    post.setHeader(CONTENT_TYPE_HEADER, contentType);
    post.setHeader(FLOWFILE_CONFIRMATION_HEADER, "true");
    post.setHeader(PROTOCOL_VERSION_HEADER, PROTOCOL_VERSION);
    post.setHeader(TRANSACTION_ID_HEADER, transactionId);
    if (compressionLevel > 0 && accepts.isGzipAccepted()) {
        if (sendAsFlowFile) {
            post.setHeader(GZIPPED_HEADER, "true");
        } else {
            post.setHeader(CONTENT_ENCODING_HEADER, CONTENT_ENCODING_GZIP_VALUE);
        }
    }
    // Do the actual POST
    final String flowFileDescription = toSend.size() <= 10 ? toSend.toString() : toSend.size() + " FlowFiles";
    final String uploadDataRate;
    final long uploadMillis;
    CloseableHttpResponse response = null;
    try {
        final StopWatch stopWatch = new StopWatch(true);
        response = client.execute(post);
        // consume input stream entirely, ignoring its contents. If we
        // don't do this, the Connection will not be returned to the pool
        EntityUtils.consume(response.getEntity());
        stopWatch.stop();
        uploadDataRate = stopWatch.calculateDataRate(bytesToSend.get());
        uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
    } catch (final IOException e) {
        logger.error("Failed to Post {} due to {}; transferring to failure", new Object[] { flowFileDescription, e });
        context.yield();
        for (FlowFile flowFile : toSend) {
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_FAILURE);
        }
        return;
    } finally {
        if (response != null) {
            try {
                response.close();
            } catch (final IOException e) {
                getLogger().warn("Failed to close HTTP Response due to {}", new Object[] { e });
            }
        }
    }
    // If we get a 'SEE OTHER' status code and an HTTP header that indicates that the intent
    // of the Location URI is a flowfile hold, we will store this holdUri. This prevents us
    // from posting to some other webservice and then attempting to delete some resource to which
    // we are redirected
    final int responseCode = response.getStatusLine().getStatusCode();
    final String responseReason = response.getStatusLine().getReasonPhrase();
    String holdUri = null;
    if (responseCode == HttpServletResponse.SC_SEE_OTHER) {
        final Header locationUriHeader = response.getFirstHeader(LOCATION_URI_INTENT_NAME);
        if (locationUriHeader != null) {
            if (LOCATION_URI_INTENT_VALUE.equals(locationUriHeader.getValue())) {
                final Header holdUriHeader = response.getFirstHeader(LOCATION_HEADER_NAME);
                if (holdUriHeader != null) {
                    holdUri = holdUriHeader.getValue();
                }
            }
        }
        if (holdUri == null) {
            for (FlowFile flowFile : toSend) {
                flowFile = session.penalize(flowFile);
                logger.error("Failed to Post {} to {}: sent content and received status code {}:{} but no Hold URI", new Object[] { flowFile, url, responseCode, responseReason });
                session.transfer(flowFile, REL_FAILURE);
            }
            return;
        }
    }
    if (holdUri == null) {
        if (responseCode == HttpServletResponse.SC_SERVICE_UNAVAILABLE) {
            for (FlowFile flowFile : toSend) {
                flowFile = session.penalize(flowFile);
                logger.error("Failed to Post {} to {}: response code was {}:{}; will yield processing, " + "since the destination is temporarily unavailable", new Object[] { flowFile, url, responseCode, responseReason });
                session.transfer(flowFile, REL_FAILURE);
            }
            context.yield();
            return;
        }
        if (responseCode >= 300) {
            for (FlowFile flowFile : toSend) {
                flowFile = session.penalize(flowFile);
                logger.error("Failed to Post {} to {}: response code was {}:{}", new Object[] { flowFile, url, responseCode, responseReason });
                session.transfer(flowFile, REL_FAILURE);
            }
            return;
        }
        logger.info("Successfully Posted {} to {} in {} at a rate of {}", new Object[] { flowFileDescription, url, FormatUtils.formatMinutesSeconds(uploadMillis, TimeUnit.MILLISECONDS), uploadDataRate });
        for (final FlowFile flowFile : toSend) {
            session.getProvenanceReporter().send(flowFile, url, "Remote DN=" + dnHolder.get(), uploadMillis, true);
            session.transfer(flowFile, REL_SUCCESS);
        }
        return;
    }
    // 
    // the response indicated a Hold URI; delete the Hold.
    // 
    // determine the full URI of the Flow File's Hold; Unfortunately, the responses that are returned have
    // changed over the past, so we have to take into account a few different possibilities.
    String fullHoldUri = holdUri;
    if (holdUri.startsWith("/contentListener")) {
        // If the Hold URI that we get starts with /contentListener, it may not really be /contentListener,
        // as this really indicates that it should be whatever we posted to -- if posting directly to the
        // ListenHTTP component, it will be /contentListener, but if posting to a proxy/load balancer, we may
        // be posting to some other URL.
        fullHoldUri = url + holdUri.substring(16);
    } else if (holdUri.startsWith("/")) {
        // URL indicates the full path but not hostname or port; use the same hostname & port that we posted
        // to but use the full path indicated by the response.
        int firstSlash = url.indexOf("/", 8);
        if (firstSlash < 0) {
            firstSlash = url.length();
        }
        final String beforeSlash = url.substring(0, firstSlash);
        fullHoldUri = beforeSlash + holdUri;
    } else if (!holdUri.startsWith("http")) {
        // Absolute URL
        fullHoldUri = url + (url.endsWith("/") ? "" : "/") + holdUri;
    }
    final HttpDelete delete = new HttpDelete(fullHoldUri);
    delete.setHeader(TRANSACTION_ID_HEADER, transactionId);
    while (true) {
        try {
            final HttpResponse holdResponse = client.execute(delete);
            EntityUtils.consume(holdResponse.getEntity());
            final int holdStatusCode = holdResponse.getStatusLine().getStatusCode();
            final String holdReason = holdResponse.getStatusLine().getReasonPhrase();
            if (holdStatusCode >= 300) {
                logger.error("Failed to delete Hold that destination placed on {}: got response code {}:{}; routing to failure", new Object[] { flowFileDescription, holdStatusCode, holdReason });
                for (FlowFile flowFile : toSend) {
                    flowFile = session.penalize(flowFile);
                    session.transfer(flowFile, REL_FAILURE);
                }
                return;
            }
            logger.info("Successfully Posted {} to {} in {} milliseconds at a rate of {}", new Object[] { flowFileDescription, url, uploadMillis, uploadDataRate });
            for (final FlowFile flowFile : toSend) {
                session.getProvenanceReporter().send(flowFile, url);
                session.transfer(flowFile, REL_SUCCESS);
            }
            return;
        } catch (final IOException e) {
            logger.warn("Failed to delete Hold that destination placed on {} due to {}", new Object[] { flowFileDescription, e });
        }
        if (!isScheduled()) {
            context.yield();
            logger.warn("Failed to delete Hold that destination placed on {}; Processor has been stopped so routing FlowFile(s) to failure", new Object[] { flowFileDescription });
            for (FlowFile flowFile : toSend) {
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_FAILURE);
            }
            return;
        }
    }
}
Also used : HttpPost(org.apache.http.client.methods.HttpPost) MalformedURLException(java.net.MalformedURLException) BasicCredentialsProvider(org.apache.http.impl.client.BasicCredentialsProvider) GZIPOutputStream(org.apache.nifi.stream.io.GZIPOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) EntityTemplate(org.apache.http.entity.EntityTemplate) CertificateException(java.security.cert.CertificateException) FlowFilePackager(org.apache.nifi.util.FlowFilePackager) GZIPOutputStream(org.apache.nifi.stream.io.GZIPOutputStream) BufferedInputStream(java.io.BufferedInputStream) HttpHost(org.apache.http.HttpHost) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) HttpClientConnectionManager(org.apache.http.conn.HttpClientConnectionManager) PoolingHttpClientConnectionManager(org.apache.http.impl.conn.PoolingHttpClientConnectionManager) FlowFile(org.apache.nifi.flowfile.FlowFile) RequestConfig(org.apache.http.client.config.RequestConfig) FlowFileFilter(org.apache.nifi.processor.FlowFileFilter) ComponentLog(org.apache.nifi.logging.ComponentLog) X509Certificate(java.security.cert.X509Certificate) UsernamePasswordCredentials(org.apache.http.auth.UsernamePasswordCredentials) Header(org.apache.http.Header) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) LeakyBucketStreamThrottler(org.apache.nifi.stream.io.LeakyBucketStreamThrottler) StreamThrottler(org.apache.nifi.stream.io.StreamThrottler) ContentProducer(org.apache.http.entity.ContentProducer) HttpDelete(org.apache.http.client.methods.HttpDelete) RequestConfig(org.apache.http.client.config.RequestConfig) HttpClientBuilder(org.apache.http.impl.client.HttpClientBuilder) HttpException(org.apache.http.HttpException) BufferedOutputStream(java.io.BufferedOutputStream) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) Pattern(java.util.regex.Pattern) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) SSLPeerUnverifiedException(javax.net.ssl.SSLPeerUnverifiedException) HttpContext(org.apache.http.protocol.HttpContext) SSLSession(javax.net.ssl.SSLSession) HttpResponse(org.apache.http.HttpResponse) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) CredentialsProvider(org.apache.http.client.CredentialsProvider) BasicCredentialsProvider(org.apache.http.impl.client.BasicCredentialsProvider) StopWatch(org.apache.nifi.util.StopWatch) ManagedHttpClientConnection(org.apache.http.conn.ManagedHttpClientConnection) AtomicLong(java.util.concurrent.atomic.AtomicLong) HttpCoreContext(org.apache.http.protocol.HttpCoreContext) HttpResponseInterceptor(org.apache.http.HttpResponseInterceptor) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) FlowFilePackagerV3(org.apache.nifi.util.FlowFilePackagerV3) FlowFilePackagerV2(org.apache.nifi.util.FlowFilePackagerV2) FlowFilePackagerV1(org.apache.nifi.util.FlowFilePackagerV1)

Aggregations

FlowFileFilter (org.apache.nifi.processor.FlowFileFilter)6 FlowFile (org.apache.nifi.flowfile.FlowFile)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 Map (java.util.Map)3 Test (org.junit.Test)3 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 OutputStream (java.io.OutputStream)2 Path (java.nio.file.Path)2 Collection (java.util.Collection)2 List (java.util.List)2 Set (java.util.Set)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ConcurrentMap (java.util.concurrent.ConcurrentMap)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 Pattern (java.util.regex.Pattern)2