use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class GetFileTransfer method fetchListing.
// must be called while holding the listingLock
private void fetchListing(final ProcessContext context, final ProcessSession session, final FileTransfer transfer) throws IOException {
BlockingQueue<FileInfo> queue = fileQueueRef.get();
if (queue == null) {
final boolean useNaturalOrdering = context.getProperty(FileTransfer.USE_NATURAL_ORDERING).asBoolean();
queue = useNaturalOrdering ? new PriorityBlockingQueue<FileInfo>(25000) : new LinkedBlockingQueue<FileInfo>(25000);
fileQueueRef.set(queue);
}
final StopWatch stopWatch = new StopWatch(true);
final List<FileInfo> listing = transfer.getListing();
final long millis = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
int newItems = 0;
mutuallyExclusiveTransferLock.lock();
try {
for (final FileInfo file : listing) {
if (!queue.contains(file) && !processing.contains(file)) {
if (!queue.offer(file)) {
break;
}
newItems++;
}
}
} finally {
mutuallyExclusiveTransferLock.unlock();
}
getLogger().info("Obtained file listing in {} milliseconds; listing had {} items, {} of which were new", new Object[] { millis, listing.size(), newItems });
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class GetHTTP method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
final ComponentLog logger = getLogger();
final ProcessSession session = sessionFactory.createSession();
final FlowFile incomingFlowFile = session.get();
if (incomingFlowFile != null) {
session.transfer(incomingFlowFile, REL_SUCCESS);
logger.warn("found FlowFile {} in input queue; transferring to success", new Object[] { incomingFlowFile });
}
// get the URL
final String url = context.getProperty(URL).evaluateAttributeExpressions().getValue();
final URI uri;
String source = url;
try {
uri = new URI(url);
source = uri.getHost();
} catch (final URISyntaxException swallow) {
// this won't happen as the url has already been validated
}
// get the ssl context service
final SSLContextService sslContextService = context.getProperty(SSL_CONTEXT_SERVICE).asControllerService(SSLContextService.class);
// create the connection manager
final HttpClientConnectionManager conMan;
if (sslContextService == null) {
conMan = new BasicHttpClientConnectionManager();
} else {
final SSLContext sslContext;
try {
sslContext = createSSLContext(sslContextService);
} catch (final Exception e) {
throw new ProcessException(e);
}
final SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext);
// Also include a plain socket factory for regular http connections (especially proxies)
final Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create().register("https", sslsf).register("http", PlainConnectionSocketFactory.getSocketFactory()).build();
conMan = new BasicHttpClientConnectionManager(socketFactoryRegistry);
}
try {
// build the request configuration
final RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
requestConfigBuilder.setConnectionRequestTimeout(context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
requestConfigBuilder.setConnectTimeout(context.getProperty(CONNECTION_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
requestConfigBuilder.setSocketTimeout(context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
requestConfigBuilder.setRedirectsEnabled(context.getProperty(FOLLOW_REDIRECTS).asBoolean());
switch(context.getProperty(REDIRECT_COOKIE_POLICY).getValue()) {
case STANDARD_COOKIE_POLICY_STR:
requestConfigBuilder.setCookieSpec(CookieSpecs.STANDARD);
break;
case STRICT_COOKIE_POLICY_STR:
requestConfigBuilder.setCookieSpec(CookieSpecs.STANDARD_STRICT);
break;
case NETSCAPE_COOKIE_POLICY_STR:
requestConfigBuilder.setCookieSpec(CookieSpecs.NETSCAPE);
break;
case IGNORE_COOKIE_POLICY_STR:
requestConfigBuilder.setCookieSpec(CookieSpecs.IGNORE_COOKIES);
break;
case DEFAULT_COOKIE_POLICY_STR:
default:
requestConfigBuilder.setCookieSpec(CookieSpecs.DEFAULT);
}
// build the http client
final HttpClientBuilder clientBuilder = HttpClientBuilder.create();
clientBuilder.setConnectionManager(conMan);
// include the user agent
final String userAgent = context.getProperty(USER_AGENT).getValue();
if (userAgent != null) {
clientBuilder.setUserAgent(userAgent);
}
// set the ssl context if necessary
if (sslContextService != null) {
clientBuilder.setSslcontext(sslContextService.createSSLContext(ClientAuth.REQUIRED));
}
final String username = context.getProperty(USERNAME).getValue();
final String password = context.getProperty(PASSWORD).getValue();
// set the credentials if appropriate
if (username != null) {
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
if (password == null) {
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username));
} else {
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password));
}
clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
}
// Set the proxy if specified
if (context.getProperty(PROXY_HOST).isSet() && context.getProperty(PROXY_PORT).isSet()) {
final String host = context.getProperty(PROXY_HOST).getValue();
final int port = context.getProperty(PROXY_PORT).asInteger();
clientBuilder.setProxy(new HttpHost(host, port));
}
// create request
final HttpGet get = new HttpGet(url);
get.setConfig(requestConfigBuilder.build());
final StateMap beforeStateMap;
try {
beforeStateMap = context.getStateManager().getState(Scope.LOCAL);
final String lastModified = beforeStateMap.get(LAST_MODIFIED + ":" + url);
if (lastModified != null) {
get.addHeader(HEADER_IF_MODIFIED_SINCE, parseStateValue(lastModified).getValue());
}
final String etag = beforeStateMap.get(ETAG + ":" + url);
if (etag != null) {
get.addHeader(HEADER_IF_NONE_MATCH, parseStateValue(etag).getValue());
}
} catch (final IOException ioe) {
throw new ProcessException(ioe);
}
final String accept = context.getProperty(ACCEPT_CONTENT_TYPE).getValue();
if (accept != null) {
get.addHeader(HEADER_ACCEPT, accept);
}
// Add dynamic headers
PropertyValue customHeaderValue;
for (PropertyDescriptor customProperty : customHeaders) {
customHeaderValue = context.getProperty(customProperty).evaluateAttributeExpressions();
if (StringUtils.isNotBlank(customHeaderValue.getValue())) {
get.addHeader(customProperty.getName(), customHeaderValue.getValue());
}
}
// create the http client
try (final CloseableHttpClient client = clientBuilder.build()) {
// NOTE: including this inner try in order to swallow exceptions on close
try {
final StopWatch stopWatch = new StopWatch(true);
final HttpResponse response = client.execute(get);
final int statusCode = response.getStatusLine().getStatusCode();
if (statusCode == NOT_MODIFIED) {
logger.info("content not retrieved because server returned HTTP Status Code {}: Not Modified", new Object[] { NOT_MODIFIED });
context.yield();
// doing a commit in case there were flow files in the input queue
session.commit();
return;
}
final String statusExplanation = response.getStatusLine().getReasonPhrase();
if ((statusCode >= 300) || (statusCode == 204)) {
logger.error("received status code {}:{} from {}", new Object[] { statusCode, statusExplanation, url });
// doing a commit in case there were flow files in the input queue
session.commit();
return;
}
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), context.getProperty(FILENAME).evaluateAttributeExpressions().getValue());
flowFile = session.putAttribute(flowFile, this.getClass().getSimpleName().toLowerCase() + ".remote.source", source);
flowFile = session.importFrom(response.getEntity().getContent(), flowFile);
final Header contentTypeHeader = response.getFirstHeader("Content-Type");
if (contentTypeHeader != null) {
final String contentType = contentTypeHeader.getValue();
if (!contentType.trim().isEmpty()) {
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), contentType.trim());
}
}
final long flowFileSize = flowFile.getSize();
stopWatch.stop();
final String dataRate = stopWatch.calculateDataRate(flowFileSize);
session.getProvenanceReporter().receive(flowFile, url, stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);
logger.info("Successfully received {} from {} at a rate of {}; transferred to success", new Object[] { flowFile, url, dataRate });
session.commit();
updateStateMap(context, response, beforeStateMap, url);
} catch (final IOException e) {
context.yield();
session.rollback();
logger.error("Failed to retrieve file from {} due to {}; rolling back session", new Object[] { url, e.getMessage() }, e);
throw new ProcessException(e);
} catch (final Throwable t) {
context.yield();
session.rollback();
logger.error("Failed to process due to {}; rolling back session", new Object[] { t.getMessage() }, t);
throw t;
}
} catch (final IOException e) {
logger.debug("Error closing client due to {}, continuing.", new Object[] { e.getMessage() });
}
} finally {
conMan.shutdown();
}
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class YandexTranslate method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
final String key = context.getProperty(KEY).getValue();
final String sourceLanguage = context.getProperty(SOURCE_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
final String targetLanguage = context.getProperty(TARGET_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
final String encoding = context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue();
final List<String> attributeNames = new ArrayList<>();
final List<String> textValues = new ArrayList<>();
for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
if (descriptor.isDynamic()) {
// add to list so that we know the order when the translations come back.
attributeNames.add(descriptor.getName());
textValues.add(context.getProperty(descriptor).evaluateAttributeExpressions(flowFile).getValue());
}
}
if (context.getProperty(TRANSLATE_CONTENT).asBoolean()) {
final byte[] buff = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buff);
}
});
final String content = new String(buff, Charset.forName(encoding));
textValues.add(content);
}
final Invocation invocation = prepareResource(key, textValues, sourceLanguage, targetLanguage);
final Response response;
try {
response = invocation.invoke();
} catch (final Exception e) {
getLogger().error("Failed to make request to Yandex to transate text for {} due to {}; routing to comms.failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_COMMS_FAILURE);
return;
}
if (response.getStatus() != Response.Status.OK.getStatusCode()) {
getLogger().error("Failed to translate text using Yandex for {}; response was {}: {}; routing to {}", new Object[] { flowFile, response.getStatus(), response.getStatusInfo().getReasonPhrase(), REL_TRANSLATION_FAILED.getName() });
flowFile = session.putAttribute(flowFile, "yandex.translate.failure.reason", response.getStatusInfo().getReasonPhrase());
session.transfer(flowFile, REL_TRANSLATION_FAILED);
return;
}
final Map<String, String> newAttributes = new HashMap<>();
final Translation translation = response.readEntity(Translation.class);
final List<String> texts = translation.getText();
for (int i = 0; i < texts.size(); i++) {
final String text = texts.get(i);
if (i < attributeNames.size()) {
final String attributeName = attributeNames.get(i);
newAttributes.put(attributeName, text);
} else {
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(text.getBytes(encoding));
}
});
newAttributes.put("language", targetLanguage);
}
}
if (!newAttributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, newAttributes);
}
stopWatch.stop();
session.transfer(flowFile, REL_SUCCESS);
getLogger().info("Successfully translated {} items for {} from {} to {} in {}; routing to success", new Object[] { texts.size(), flowFile, sourceLanguage, targetLanguage, stopWatch.getDuration() });
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class SelectHiveQL method onTrigger.
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
FlowFile flowfile = null;
// we know that we should run only if we have a FlowFile.
if (context.hasIncomingConnection()) {
if (fileToProcess == null && context.hasNonLoopConnection()) {
return;
}
}
final ComponentLog logger = getLogger();
final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class);
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());
// Source the SQL
final String selectQuery;
if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
selectQuery = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
} else {
// If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
// If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
final StringBuilder queryContents = new StringBuilder();
session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
selectQuery = queryContents.toString();
}
final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger();
final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() : 0;
final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
final StopWatch stopWatch = new StopWatch(true);
final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue();
final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue();
final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
final String fragmentIdentifier = UUID.randomUUID().toString();
try (final Connection con = dbcpService.getConnection();
final Statement st = (flowbased ? con.prepareStatement(selectQuery) : con.createStatement())) {
if (fetchSize != null && fetchSize > 0) {
try {
st.setFetchSize(fetchSize);
} catch (SQLException se) {
// Not all drivers support this, just log the error (at debug level) and move on
logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
}
}
final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
try {
logger.debug("Executing query {}", new Object[] { selectQuery });
if (flowbased) {
// Hive JDBC Doesn't Support this yet:
// ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
// int paramCount = pmd.getParameterCount();
// Alternate way to determine number of params in SQL.
int paramCount = StringUtils.countMatches(selectQuery, "?");
if (paramCount > 0) {
setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
}
}
final ResultSet resultSet;
try {
resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(selectQuery));
} catch (SQLException se) {
// If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
fileToProcess = null;
throw se;
}
int fragmentIndex = 0;
String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null;
while (true) {
final AtomicLong nrOfRows = new AtomicLong(0L);
flowfile = (flowfile == null) ? session.create() : session.create(flowfile);
if (baseFilename == null) {
baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
}
try {
flowfile = session.write(flowfile, out -> {
try {
if (AVRO.equals(outputFormat)) {
nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro));
} else if (CSV.equals(outputFormat)) {
CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile);
nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
} else {
nrOfRows.set(0L);
throw new ProcessException("Unsupported output format: " + outputFormat);
}
} catch (final SQLException | RuntimeException e) {
throw new ProcessException("Error during database query or conversion of records.", e);
}
});
} catch (ProcessException e) {
// Add flowfile to results before rethrowing so it will be removed from session in outer catch
resultSetFlowFiles.add(flowfile);
throw e;
}
if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
final Map<String, String> attributes = new HashMap<>();
// Set attribute for how many rows were selected
attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
try {
// Set input/output table names by parsing the query
attributes.putAll(toQueryTableAttributes(findTableNames(selectQuery)));
} catch (Exception e) {
// If failed to parse the query, just log a warning message, but continue.
getLogger().warn("Failed to parse query: {} due to {}", new Object[] { selectQuery, e }, e);
}
// Set MIME type on output document and add extension to filename
if (AVRO.equals(outputFormat)) {
attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro");
} else if (CSV.equals(outputFormat)) {
attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv");
}
if (maxRowsPerFlowFile > 0) {
attributes.put("fragment.identifier", fragmentIdentifier);
attributes.put("fragment.index", String.valueOf(fragmentIndex));
}
flowfile = session.putAllAttributes(flowfile, attributes);
logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { flowfile, nrOfRows.get() });
if (context.hasIncomingConnection()) {
// If the flow file came from an incoming connection, issue a Fetch provenance event
session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
} else {
// If we created a flow file from rows received from Hive, issue a Receive provenance event
session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
}
resultSetFlowFiles.add(flowfile);
} else {
// If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
session.remove(flowfile);
break;
}
fragmentIndex++;
if (maxFragments > 0 && fragmentIndex >= maxFragments) {
break;
}
}
for (int i = 0; i < resultSetFlowFiles.size(); i++) {
// Set count on all FlowFiles
if (maxRowsPerFlowFile > 0) {
resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
}
}
} catch (final SQLException e) {
throw e;
}
session.transfer(resultSetFlowFiles, REL_SUCCESS);
} catch (final ProcessException | SQLException e) {
logger.error("Issue processing SQL {} due to {}.", new Object[] { selectQuery, e });
if (flowfile == null) {
// This can happen if any exceptions occur while setting up the connection, statement, etc.
logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", new Object[] { selectQuery, e });
context.yield();
} else {
if (context.hasIncomingConnection()) {
logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, flowfile, e });
flowfile = session.penalize(flowfile);
} else {
logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", new Object[] { selectQuery, e });
context.yield();
}
session.transfer(flowfile, REL_FAILURE);
}
} finally {
if (fileToProcess != null) {
session.remove(fileToProcess);
}
}
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class StoreInKiteDataset method onTrigger.
@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final View<Record> target = load(context, flowFile);
final Schema schema = target.getDataset().getDescriptor().getSchema();
try {
StopWatch timer = new StopWatch(true);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
try (DataFileStream<Record> stream = new DataFileStream<>(in, AvroUtil.newDatumReader(schema, Record.class))) {
IncompatibleSchemaException.check(SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema);
long written = 0L;
try (DatasetWriter<Record> writer = target.newWriter()) {
for (Record record : stream) {
writer.write(record);
written += 1;
}
} finally {
session.adjustCounter("Stored records", written, true);
}
}
}
});
timer.stop();
session.getProvenanceReporter().send(flowFile, target.getUri().toString(), timer.getDuration(TimeUnit.MILLISECONDS), true);
session.transfer(flowFile, SUCCESS);
} catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed to read FlowFile", e);
session.transfer(flowFile, FAILURE);
} catch (ValidationException e) {
getLogger().error(e.getMessage());
getLogger().debug("Incompatible schema error", e);
session.transfer(flowFile, INCOMPATIBLE);
}
}
Aggregations