Search in sources :

Example 56 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class GetFileTransfer method fetchListing.

// must be called while holding the listingLock
private void fetchListing(final ProcessContext context, final ProcessSession session, final FileTransfer transfer) throws IOException {
    BlockingQueue<FileInfo> queue = fileQueueRef.get();
    if (queue == null) {
        final boolean useNaturalOrdering = context.getProperty(FileTransfer.USE_NATURAL_ORDERING).asBoolean();
        queue = useNaturalOrdering ? new PriorityBlockingQueue<FileInfo>(25000) : new LinkedBlockingQueue<FileInfo>(25000);
        fileQueueRef.set(queue);
    }
    final StopWatch stopWatch = new StopWatch(true);
    final List<FileInfo> listing = transfer.getListing();
    final long millis = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
    int newItems = 0;
    mutuallyExclusiveTransferLock.lock();
    try {
        for (final FileInfo file : listing) {
            if (!queue.contains(file) && !processing.contains(file)) {
                if (!queue.offer(file)) {
                    break;
                }
                newItems++;
            }
        }
    } finally {
        mutuallyExclusiveTransferLock.unlock();
    }
    getLogger().info("Obtained file listing in {} milliseconds; listing had {} items, {} of which were new", new Object[] { millis, listing.size(), newItems });
}
Also used : FileInfo(org.apache.nifi.processors.standard.util.FileInfo) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) PriorityBlockingQueue(java.util.concurrent.PriorityBlockingQueue) StopWatch(org.apache.nifi.util.StopWatch)

Example 57 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class GetHTTP method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    final ComponentLog logger = getLogger();
    final ProcessSession session = sessionFactory.createSession();
    final FlowFile incomingFlowFile = session.get();
    if (incomingFlowFile != null) {
        session.transfer(incomingFlowFile, REL_SUCCESS);
        logger.warn("found FlowFile {} in input queue; transferring to success", new Object[] { incomingFlowFile });
    }
    // get the URL
    final String url = context.getProperty(URL).evaluateAttributeExpressions().getValue();
    final URI uri;
    String source = url;
    try {
        uri = new URI(url);
        source = uri.getHost();
    } catch (final URISyntaxException swallow) {
    // this won't happen as the url has already been validated
    }
    // get the ssl context service
    final SSLContextService sslContextService = context.getProperty(SSL_CONTEXT_SERVICE).asControllerService(SSLContextService.class);
    // create the connection manager
    final HttpClientConnectionManager conMan;
    if (sslContextService == null) {
        conMan = new BasicHttpClientConnectionManager();
    } else {
        final SSLContext sslContext;
        try {
            sslContext = createSSLContext(sslContextService);
        } catch (final Exception e) {
            throw new ProcessException(e);
        }
        final SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext);
        // Also include a plain socket factory for regular http connections (especially proxies)
        final Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create().register("https", sslsf).register("http", PlainConnectionSocketFactory.getSocketFactory()).build();
        conMan = new BasicHttpClientConnectionManager(socketFactoryRegistry);
    }
    try {
        // build the request configuration
        final RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
        requestConfigBuilder.setConnectionRequestTimeout(context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
        requestConfigBuilder.setConnectTimeout(context.getProperty(CONNECTION_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
        requestConfigBuilder.setSocketTimeout(context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
        requestConfigBuilder.setRedirectsEnabled(context.getProperty(FOLLOW_REDIRECTS).asBoolean());
        switch(context.getProperty(REDIRECT_COOKIE_POLICY).getValue()) {
            case STANDARD_COOKIE_POLICY_STR:
                requestConfigBuilder.setCookieSpec(CookieSpecs.STANDARD);
                break;
            case STRICT_COOKIE_POLICY_STR:
                requestConfigBuilder.setCookieSpec(CookieSpecs.STANDARD_STRICT);
                break;
            case NETSCAPE_COOKIE_POLICY_STR:
                requestConfigBuilder.setCookieSpec(CookieSpecs.NETSCAPE);
                break;
            case IGNORE_COOKIE_POLICY_STR:
                requestConfigBuilder.setCookieSpec(CookieSpecs.IGNORE_COOKIES);
                break;
            case DEFAULT_COOKIE_POLICY_STR:
            default:
                requestConfigBuilder.setCookieSpec(CookieSpecs.DEFAULT);
        }
        // build the http client
        final HttpClientBuilder clientBuilder = HttpClientBuilder.create();
        clientBuilder.setConnectionManager(conMan);
        // include the user agent
        final String userAgent = context.getProperty(USER_AGENT).getValue();
        if (userAgent != null) {
            clientBuilder.setUserAgent(userAgent);
        }
        // set the ssl context if necessary
        if (sslContextService != null) {
            clientBuilder.setSslcontext(sslContextService.createSSLContext(ClientAuth.REQUIRED));
        }
        final String username = context.getProperty(USERNAME).getValue();
        final String password = context.getProperty(PASSWORD).getValue();
        // set the credentials if appropriate
        if (username != null) {
            final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            if (password == null) {
                credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username));
            } else {
                credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password));
            }
            clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
        }
        // Set the proxy if specified
        if (context.getProperty(PROXY_HOST).isSet() && context.getProperty(PROXY_PORT).isSet()) {
            final String host = context.getProperty(PROXY_HOST).getValue();
            final int port = context.getProperty(PROXY_PORT).asInteger();
            clientBuilder.setProxy(new HttpHost(host, port));
        }
        // create request
        final HttpGet get = new HttpGet(url);
        get.setConfig(requestConfigBuilder.build());
        final StateMap beforeStateMap;
        try {
            beforeStateMap = context.getStateManager().getState(Scope.LOCAL);
            final String lastModified = beforeStateMap.get(LAST_MODIFIED + ":" + url);
            if (lastModified != null) {
                get.addHeader(HEADER_IF_MODIFIED_SINCE, parseStateValue(lastModified).getValue());
            }
            final String etag = beforeStateMap.get(ETAG + ":" + url);
            if (etag != null) {
                get.addHeader(HEADER_IF_NONE_MATCH, parseStateValue(etag).getValue());
            }
        } catch (final IOException ioe) {
            throw new ProcessException(ioe);
        }
        final String accept = context.getProperty(ACCEPT_CONTENT_TYPE).getValue();
        if (accept != null) {
            get.addHeader(HEADER_ACCEPT, accept);
        }
        // Add dynamic headers
        PropertyValue customHeaderValue;
        for (PropertyDescriptor customProperty : customHeaders) {
            customHeaderValue = context.getProperty(customProperty).evaluateAttributeExpressions();
            if (StringUtils.isNotBlank(customHeaderValue.getValue())) {
                get.addHeader(customProperty.getName(), customHeaderValue.getValue());
            }
        }
        // create the http client
        try (final CloseableHttpClient client = clientBuilder.build()) {
            // NOTE: including this inner try in order to swallow exceptions on close
            try {
                final StopWatch stopWatch = new StopWatch(true);
                final HttpResponse response = client.execute(get);
                final int statusCode = response.getStatusLine().getStatusCode();
                if (statusCode == NOT_MODIFIED) {
                    logger.info("content not retrieved because server returned HTTP Status Code {}: Not Modified", new Object[] { NOT_MODIFIED });
                    context.yield();
                    // doing a commit in case there were flow files in the input queue
                    session.commit();
                    return;
                }
                final String statusExplanation = response.getStatusLine().getReasonPhrase();
                if ((statusCode >= 300) || (statusCode == 204)) {
                    logger.error("received status code {}:{} from {}", new Object[] { statusCode, statusExplanation, url });
                    // doing a commit in case there were flow files in the input queue
                    session.commit();
                    return;
                }
                FlowFile flowFile = session.create();
                flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), context.getProperty(FILENAME).evaluateAttributeExpressions().getValue());
                flowFile = session.putAttribute(flowFile, this.getClass().getSimpleName().toLowerCase() + ".remote.source", source);
                flowFile = session.importFrom(response.getEntity().getContent(), flowFile);
                final Header contentTypeHeader = response.getFirstHeader("Content-Type");
                if (contentTypeHeader != null) {
                    final String contentType = contentTypeHeader.getValue();
                    if (!contentType.trim().isEmpty()) {
                        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), contentType.trim());
                    }
                }
                final long flowFileSize = flowFile.getSize();
                stopWatch.stop();
                final String dataRate = stopWatch.calculateDataRate(flowFileSize);
                session.getProvenanceReporter().receive(flowFile, url, stopWatch.getDuration(TimeUnit.MILLISECONDS));
                session.transfer(flowFile, REL_SUCCESS);
                logger.info("Successfully received {} from {} at a rate of {}; transferred to success", new Object[] { flowFile, url, dataRate });
                session.commit();
                updateStateMap(context, response, beforeStateMap, url);
            } catch (final IOException e) {
                context.yield();
                session.rollback();
                logger.error("Failed to retrieve file from {} due to {}; rolling back session", new Object[] { url, e.getMessage() }, e);
                throw new ProcessException(e);
            } catch (final Throwable t) {
                context.yield();
                session.rollback();
                logger.error("Failed to process due to {}; rolling back session", new Object[] { t.getMessage() }, t);
                throw t;
            }
        } catch (final IOException e) {
            logger.debug("Error closing client due to {}, continuing.", new Object[] { e.getMessage() });
        }
    } finally {
        conMan.shutdown();
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) BasicCredentialsProvider(org.apache.http.impl.client.BasicCredentialsProvider) HttpGet(org.apache.http.client.methods.HttpGet) StateMap(org.apache.nifi.components.state.StateMap) URISyntaxException(java.net.URISyntaxException) HttpClientBuilder(org.apache.http.impl.client.HttpClientBuilder) URI(java.net.URI) SSLConnectionSocketFactory(org.apache.http.conn.ssl.SSLConnectionSocketFactory) SSLConnectionSocketFactory(org.apache.http.conn.ssl.SSLConnectionSocketFactory) ConnectionSocketFactory(org.apache.http.conn.socket.ConnectionSocketFactory) PlainConnectionSocketFactory(org.apache.http.conn.socket.PlainConnectionSocketFactory) HttpHost(org.apache.http.HttpHost) HttpClientConnectionManager(org.apache.http.conn.HttpClientConnectionManager) BasicHttpClientConnectionManager(org.apache.http.impl.conn.BasicHttpClientConnectionManager) BasicHttpClientConnectionManager(org.apache.http.impl.conn.BasicHttpClientConnectionManager) FlowFile(org.apache.nifi.flowfile.FlowFile) RequestConfig(org.apache.http.client.config.RequestConfig) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) PropertyValue(org.apache.nifi.components.PropertyValue) HttpResponse(org.apache.http.HttpResponse) SSLContext(javax.net.ssl.SSLContext) BasicCredentialsProvider(org.apache.http.impl.client.BasicCredentialsProvider) CredentialsProvider(org.apache.http.client.CredentialsProvider) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) URISyntaxException(java.net.URISyntaxException) KeyStoreException(java.security.KeyStoreException) KeyManagementException(java.security.KeyManagementException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) ProcessException(org.apache.nifi.processor.exception.ProcessException) UnrecoverableKeyException(java.security.UnrecoverableKeyException) IOException(java.io.IOException) CertificateException(java.security.cert.CertificateException) UsernamePasswordCredentials(org.apache.http.auth.UsernamePasswordCredentials) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) Header(org.apache.http.Header) SSLContextService(org.apache.nifi.ssl.SSLContextService)

Example 58 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class YandexTranslate method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    final String key = context.getProperty(KEY).getValue();
    final String sourceLanguage = context.getProperty(SOURCE_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
    final String targetLanguage = context.getProperty(TARGET_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
    final String encoding = context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue();
    final List<String> attributeNames = new ArrayList<>();
    final List<String> textValues = new ArrayList<>();
    for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
        if (descriptor.isDynamic()) {
            // add to list so that we know the order when the translations come back.
            attributeNames.add(descriptor.getName());
            textValues.add(context.getProperty(descriptor).evaluateAttributeExpressions(flowFile).getValue());
        }
    }
    if (context.getProperty(TRANSLATE_CONTENT).asBoolean()) {
        final byte[] buff = new byte[(int) flowFile.getSize()];
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                StreamUtils.fillBuffer(in, buff);
            }
        });
        final String content = new String(buff, Charset.forName(encoding));
        textValues.add(content);
    }
    final Invocation invocation = prepareResource(key, textValues, sourceLanguage, targetLanguage);
    final Response response;
    try {
        response = invocation.invoke();
    } catch (final Exception e) {
        getLogger().error("Failed to make request to Yandex to transate text for {} due to {}; routing to comms.failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_COMMS_FAILURE);
        return;
    }
    if (response.getStatus() != Response.Status.OK.getStatusCode()) {
        getLogger().error("Failed to translate text using Yandex for {}; response was {}: {}; routing to {}", new Object[] { flowFile, response.getStatus(), response.getStatusInfo().getReasonPhrase(), REL_TRANSLATION_FAILED.getName() });
        flowFile = session.putAttribute(flowFile, "yandex.translate.failure.reason", response.getStatusInfo().getReasonPhrase());
        session.transfer(flowFile, REL_TRANSLATION_FAILED);
        return;
    }
    final Map<String, String> newAttributes = new HashMap<>();
    final Translation translation = response.readEntity(Translation.class);
    final List<String> texts = translation.getText();
    for (int i = 0; i < texts.size(); i++) {
        final String text = texts.get(i);
        if (i < attributeNames.size()) {
            final String attributeName = attributeNames.get(i);
            newAttributes.put(attributeName, text);
        } else {
            flowFile = session.write(flowFile, new OutputStreamCallback() {

                @Override
                public void process(final OutputStream out) throws IOException {
                    out.write(text.getBytes(encoding));
                }
            });
            newAttributes.put("language", targetLanguage);
        }
    }
    if (!newAttributes.isEmpty()) {
        flowFile = session.putAllAttributes(flowFile, newAttributes);
    }
    stopWatch.stop();
    session.transfer(flowFile, REL_SUCCESS);
    getLogger().info("Successfully translated {} items for {} from {} to {} in {}; routing to success", new Object[] { texts.size(), flowFile, sourceLanguage, targetLanguage, stopWatch.getDuration() });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Translation(org.apache.nifi.processors.yandex.model.Translation) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) Invocation(javax.ws.rs.client.Invocation) HashMap(java.util.HashMap) MultivaluedHashMap(javax.ws.rs.core.MultivaluedHashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) Response(javax.ws.rs.core.Response) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 59 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class SelectHiveQL method onTrigger.

private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
    FlowFile flowfile = null;
    // we know that we should run only if we have a FlowFile.
    if (context.hasIncomingConnection()) {
        if (fileToProcess == null && context.hasNonLoopConnection()) {
            return;
        }
    }
    final ComponentLog logger = getLogger();
    final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class);
    final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
    final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());
    // Source the SQL
    final String selectQuery;
    if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
        selectQuery = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
    } else {
        // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
        // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
        final StringBuilder queryContents = new StringBuilder();
        session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
        selectQuery = queryContents.toString();
    }
    final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
    final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger();
    final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0;
    final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
    final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
    final StopWatch stopWatch = new StopWatch(true);
    final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue();
    final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue();
    final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
    final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
    final String fragmentIdentifier = UUID.randomUUID().toString();
    try (final Connection con = dbcpService.getConnection();
        final Statement st = (flowbased ? con.prepareStatement(selectQuery) : con.createStatement())) {
        if (fetchSize != null && fetchSize > 0) {
            try {
                st.setFetchSize(fetchSize);
            } catch (SQLException se) {
                // Not all drivers support this, just log the error (at debug level) and move on
                logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
            }
        }
        final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
        try {
            logger.debug("Executing query {}", new Object[] { selectQuery });
            if (flowbased) {
                // Hive JDBC Doesn't Support this yet:
                // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
                // int paramCount = pmd.getParameterCount();
                // Alternate way to determine number of params in SQL.
                int paramCount = StringUtils.countMatches(selectQuery, "?");
                if (paramCount > 0) {
                    setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
                }
            }
            final ResultSet resultSet;
            try {
                resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(selectQuery));
            } catch (SQLException se) {
                // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
                flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
                fileToProcess = null;
                throw se;
            }
            int fragmentIndex = 0;
            String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null;
            while (true) {
                final AtomicLong nrOfRows = new AtomicLong(0L);
                flowfile = (flowfile == null) ? session.create() : session.create(flowfile);
                if (baseFilename == null) {
                    baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
                }
                try {
                    flowfile = session.write(flowfile, out -> {
                        try {
                            if (AVRO.equals(outputFormat)) {
                                nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro));
                            } else if (CSV.equals(outputFormat)) {
                                CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile);
                                nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
                            } else {
                                nrOfRows.set(0L);
                                throw new ProcessException("Unsupported output format: " + outputFormat);
                            }
                        } catch (final SQLException | RuntimeException e) {
                            throw new ProcessException("Error during database query or conversion of records.", e);
                        }
                    });
                } catch (ProcessException e) {
                    // Add flowfile to results before rethrowing so it will be removed from session in outer catch
                    resultSetFlowFiles.add(flowfile);
                    throw e;
                }
                if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
                    final Map<String, String> attributes = new HashMap<>();
                    // Set attribute for how many rows were selected
                    attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
                    try {
                        // Set input/output table names by parsing the query
                        attributes.putAll(toQueryTableAttributes(findTableNames(selectQuery)));
                    } catch (Exception e) {
                        // If failed to parse the query, just log a warning message, but continue.
                        getLogger().warn("Failed to parse query: {} due to {}", new Object[] { selectQuery, e }, e);
                    }
                    // Set MIME type on output document and add extension to filename
                    if (AVRO.equals(outputFormat)) {
                        attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
                        attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro");
                    } else if (CSV.equals(outputFormat)) {
                        attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
                        attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv");
                    }
                    if (maxRowsPerFlowFile > 0) {
                        attributes.put("fragment.identifier", fragmentIdentifier);
                        attributes.put("fragment.index", String.valueOf(fragmentIndex));
                    }
                    flowfile = session.putAllAttributes(flowfile, attributes);
                    logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() });
                    if (context.hasIncomingConnection()) {
                        // If the flow file came from an incoming connection, issue a Fetch provenance event
                        session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    } else {
                        // If we created a flow file from rows received from Hive, issue a Receive provenance event
                        session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    }
                    resultSetFlowFiles.add(flowfile);
                } else {
                    // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
                    session.remove(flowfile);
                    break;
                }
                fragmentIndex++;
                if (maxFragments > 0 && fragmentIndex >= maxFragments) {
                    break;
                }
            }
            for (int i = 0; i < resultSetFlowFiles.size(); i++) {
                // Set count on all FlowFiles
                if (maxRowsPerFlowFile > 0) {
                    resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
                }
            }
        } catch (final SQLException e) {
            throw e;
        }
        session.transfer(resultSetFlowFiles, REL_SUCCESS);
    } catch (final ProcessException | SQLException e) {
        logger.error("Issue processing SQL {} due to {}.", new Object[] { selectQuery, e });
        if (flowfile == null) {
            // This can happen if any exceptions occur while setting up the connection, statement, etc.
            logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { selectQuery, e });
            context.yield();
        } else {
            if (context.hasIncomingConnection()) {
                logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, flowfile, e });
                flowfile = session.penalize(flowfile);
            } else {
                logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { selectQuery, e });
                context.yield();
            }
            session.transfer(flowfile, REL_FAILURE);
        }
    } finally {
        if (fileToProcess != null) {
            session.remove(fileToProcess);
        }
    }
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) StringUtils(org.apache.commons.lang.StringUtils) Connection(java.sql.Connection) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) NORMALIZE_NAMES_FOR_AVRO(org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO) HashMap(java.util.HashMap) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) CSV_MIME_TYPE(org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE) ComponentLog(org.apache.nifi.logging.ComponentLog) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) Charset(java.nio.charset.Charset) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) CSV(org.apache.nifi.util.hive.HiveJdbcCommon.CSV) ResultSet(java.sql.ResultSet) Map(java.util.Map) HiveDBCPService(org.apache.nifi.dbcp.hive.HiveDBCPService) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) MIME_TYPE_AVRO_BINARY(org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY) PartialFunctions(org.apache.nifi.processor.util.pattern.PartialFunctions) HiveJdbcCommon(org.apache.nifi.util.hive.HiveJdbcCommon) FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) AVRO(org.apache.nifi.util.hive.HiveJdbcCommon.AVRO) UUID(java.util.UUID) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) PreparedStatement(java.sql.PreparedStatement) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) CsvOutputOptions(org.apache.nifi.util.hive.CsvOutputOptions) Statement(java.sql.Statement) StopWatch(org.apache.nifi.util.StopWatch) Tags(org.apache.nifi.annotation.documentation.Tags) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) Collections(java.util.Collections) SQLException(java.sql.SQLException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ResultSet(java.sql.ResultSet) HiveDBCPService(org.apache.nifi.dbcp.hive.HiveDBCPService) FlowFile(org.apache.nifi.flowfile.FlowFile) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) Connection(java.sql.Connection) Charset(java.nio.charset.Charset) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) SQLException(java.sql.SQLException) StopWatch(org.apache.nifi.util.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) CsvOutputOptions(org.apache.nifi.util.hive.CsvOutputOptions)

Example 60 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class StoreInKiteDataset method onTrigger.

@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final View<Record> target = load(context, flowFile);
    final Schema schema = target.getDataset().getDescriptor().getSchema();
    try {
        StopWatch timer = new StopWatch(true);
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
                try (DataFileStream<Record> stream = new DataFileStream<>(in, AvroUtil.newDatumReader(schema, Record.class))) {
                    IncompatibleSchemaException.check(SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema);
                    long written = 0L;
                    try (DatasetWriter<Record> writer = target.newWriter()) {
                        for (Record record : stream) {
                            writer.write(record);
                            written += 1;
                        }
                    } finally {
                        session.adjustCounter("Stored records", written, true);
                    }
                }
            }
        });
        timer.stop();
        session.getProvenanceReporter().send(flowFile, target.getUri().toString(), timer.getDuration(TimeUnit.MILLISECONDS), true);
        session.transfer(flowFile, SUCCESS);
    } catch (ProcessException | DatasetIOException e) {
        getLogger().error("Failed to read FlowFile", e);
        session.transfer(flowFile, FAILURE);
    } catch (ValidationException e) {
        getLogger().error(e.getMessage());
        getLogger().debug("Incompatible schema error", e);
        session.transfer(flowFile, INCOMPATIBLE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ValidationException(org.kitesdk.data.ValidationException) InputStream(java.io.InputStream) Schema(org.apache.avro.Schema) DatasetIOException(org.kitesdk.data.DatasetIOException) IOException(java.io.IOException) DataFileStream(org.apache.avro.file.DataFileStream) DatasetWriter(org.kitesdk.data.DatasetWriter) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) Record(org.apache.avro.generic.GenericData.Record) DatasetIOException(org.kitesdk.data.DatasetIOException)

Aggregations

StopWatch (org.apache.nifi.util.StopWatch)72 FlowFile (org.apache.nifi.flowfile.FlowFile)59 IOException (java.io.IOException)41 ProcessException (org.apache.nifi.processor.exception.ProcessException)37 InputStream (java.io.InputStream)27 ComponentLog (org.apache.nifi.logging.ComponentLog)27 OutputStream (java.io.OutputStream)21 HashMap (java.util.HashMap)16 ArrayList (java.util.ArrayList)13 Map (java.util.Map)11 ProcessSession (org.apache.nifi.processor.ProcessSession)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)10 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)10 StreamCallback (org.apache.nifi.processor.io.StreamCallback)10 HashSet (java.util.HashSet)9 Path (org.apache.hadoop.fs.Path)9 Charset (java.nio.charset.Charset)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)8