use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class EnforceOrder method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLogger();
final Integer batchCount = context.getProperty(BATCH_COUNT).asInteger();
List<FlowFile> flowFiles = session.get(batchCount);
if (flowFiles == null || flowFiles.isEmpty()) {
return;
}
final StateMap stateMap;
try {
stateMap = context.getStateManager().getState(Scope.LOCAL);
} catch (final IOException e) {
logger.error("Failed to retrieve state from StateManager due to {}" + e, e);
context.yield();
return;
}
final OrderingContext oc = new OrderingContext(context, session);
oc.groupStates.putAll(stateMap.toMap());
for (FlowFile flowFile : flowFiles) {
oc.setFlowFile(flowFile);
if (oc.flowFile == null) {
break;
}
if (!oc.computeGroupId() || !oc.computeOrder() || !oc.computeInitialOrder() || !oc.computeMaxOrder()) {
continue;
}
// At this point, the flow file is confirmed to be valid.
oc.markFlowFileValid();
}
oc.transferFlowFiles();
oc.cleanupInactiveStates();
try {
context.getStateManager().setState(oc.groupStates, Scope.LOCAL);
} catch (final IOException e) {
throw new RuntimeException("Failed to update state due to " + e + ". Session will be rollback and processor will be yielded for a while.", e);
}
}
use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class GenerateTableFetch method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
// Fetch the column/table info once (if the table name and max value columns are not dynamic). Otherwise do the setup later
if (!isDynamicTableName && !isDynamicMaxValues && !setupComplete.get()) {
super.setup(context);
}
ProcessSession session = sessionFactory.createSession();
FlowFile fileToProcess = null;
if (context.hasIncomingConnection()) {
fileToProcess = session.get();
if (fileToProcess == null) {
// Incoming connection with no flow file available, do no work (see capability description)
return;
}
}
final ComponentLog logger = getLogger();
final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(fileToProcess).getValue();
final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
final int partitionSize = context.getProperty(PARTITION_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions(fileToProcess).getValue();
final StateManager stateManager = context.getStateManager();
final StateMap stateMap;
FlowFile finalFileToProcess = fileToProcess;
try {
stateMap = stateManager.getState(Scope.CLUSTER);
} catch (final IOException ioe) {
logger.error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
context.yield();
return;
}
try {
// Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
// set as the current state map (after the session has been committed)
final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
// If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
String maxPropKey = maxProp.getKey().toLowerCase();
String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
String newMaxPropValue;
// but store the new initial max value under the fully-qualified key.
if (statePropertyMap.containsKey(maxPropKey)) {
newMaxPropValue = statePropertyMap.get(maxPropKey);
} else {
newMaxPropValue = maxProp.getValue();
}
statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
}
}
// Build a WHERE clause with maximum-value columns (if they exist), and a list of column names that will contain MAX(<column>) aliases. The
// executed SQL query will retrieve the count of all records after the filter(s) have been applied, as well as the new maximum values for the
// specified columns. This allows the processor to generate the correctly partitioned SQL statements as well as to update the state with the
// latest observed maximum values.
String whereClause = null;
List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? new ArrayList<>(0) : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
List<String> maxValueClauses = new ArrayList<>(maxValueColumnNameList.size());
String columnsClause = null;
List<String> maxValueSelectColumns = new ArrayList<>(maxValueColumnNameList.size() + 1);
maxValueSelectColumns.add("COUNT(*)");
// For each maximum-value column, get a WHERE filter and a MAX(column) alias
IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
String colName = maxValueColumnNameList.get(index);
maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
if (!StringUtils.isEmpty(maxValue)) {
if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
// This means column type cache is clean after instance reboot. We should re-cache column type
super.setup(context, false, finalFileToProcess);
}
Integer type = getColumnType(tableName, colName);
// Add a condition for the WHERE clause
maxValueClauses.add(colName + (index == 0 ? " > " : " >= ") + getLiteralByType(type, maxValue, dbAdapter.getName()));
}
});
if (customWhereClause != null) {
// adding the custom WHERE clause (if defined) to the list of existing clauses.
maxValueClauses.add("(" + customWhereClause + ")");
}
whereClause = StringUtils.join(maxValueClauses, " AND ");
columnsClause = StringUtils.join(maxValueSelectColumns, ", ");
// Build a SELECT query with maximum-value columns (if present)
final String selectQuery = dbAdapter.getSelectStatement(tableName, columnsClause, whereClause, null, null, null);
long rowCount = 0;
try (final Connection con = dbcpService.getConnection();
final Statement st = con.createStatement()) {
final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asTimePeriod(TimeUnit.SECONDS).intValue();
// timeout in seconds
st.setQueryTimeout(queryTimeout);
logger.debug("Executing {}", new Object[] { selectQuery });
ResultSet resultSet;
resultSet = st.executeQuery(selectQuery);
if (resultSet.next()) {
// Total row count is in the first column
rowCount = resultSet.getLong(1);
// Update the state map with the newly-observed maximum values
ResultSetMetaData rsmd = resultSet.getMetaData();
for (int i = 2; i <= rsmd.getColumnCount(); i++) {
// Some JDBC drivers consider the columns name and label to be very different things.
// Since this column has been aliased lets check the label first,
// if there is no label we'll use the column name.
String resultColumnName = (StringUtils.isNotEmpty(rsmd.getColumnLabel(i)) ? rsmd.getColumnLabel(i) : rsmd.getColumnName(i)).toLowerCase();
String fullyQualifiedStateKey = getStateKey(tableName, resultColumnName);
String resultColumnCurrentMax = statePropertyMap.get(fullyQualifiedStateKey);
if (StringUtils.isEmpty(resultColumnCurrentMax) && !isDynamicTableName) {
// If we can't find the value at the fully-qualified key name and the table name is static, it is possible (under a previous scheme)
// the value has been stored under a key that is only the column name. Fall back to check the column name; either way, when a new
// maximum value is observed, it will be stored under the fully-qualified key from then on.
resultColumnCurrentMax = statePropertyMap.get(resultColumnName);
}
int type = rsmd.getColumnType(i);
if (isDynamicTableName) {
// We haven't pre-populated the column type map if the table name is dynamic, so do it here
columnTypeMap.put(fullyQualifiedStateKey, type);
}
try {
String newMaxValue = getMaxValueFromRow(resultSet, i, type, resultColumnCurrentMax, dbAdapter.getName());
if (newMaxValue != null) {
statePropertyMap.put(fullyQualifiedStateKey, newMaxValue);
}
} catch (ParseException | IOException pie) {
// Fail the whole thing here before we start creating flow files and such
throw new ProcessException(pie);
}
}
} else {
// Something is very wrong here, one row (even if count is zero) should be returned
throw new SQLException("No rows returned from metadata query: " + selectQuery);
}
// for each maximum-value column get a right bounding WHERE condition
IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
String colName = maxValueColumnNameList.get(index);
maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
if (!StringUtils.isEmpty(maxValue)) {
if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
// This means column type cache is clean after instance reboot. We should re-cache column type
super.setup(context, false, finalFileToProcess);
}
Integer type = getColumnType(tableName, colName);
// Add a condition for the WHERE clause
maxValueClauses.add(colName + " <= " + getLiteralByType(type, maxValue, dbAdapter.getName()));
}
});
// Update WHERE list to include new right hand boundaries
whereClause = StringUtils.join(maxValueClauses, " AND ");
final long numberOfFetches = (partitionSize == 0) ? 1 : (rowCount / partitionSize) + (rowCount % partitionSize == 0 ? 0 : 1);
// Generate SQL statements to read "pages" of data
for (long i = 0; i < numberOfFetches; i++) {
Long limit = partitionSize == 0 ? null : (long) partitionSize;
Long offset = partitionSize == 0 ? null : i * partitionSize;
final String maxColumnNames = StringUtils.join(maxValueColumnNameList, ", ");
final String query = dbAdapter.getSelectStatement(tableName, columnNames, whereClause, maxColumnNames, limit, offset);
FlowFile sqlFlowFile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
sqlFlowFile = session.write(sqlFlowFile, out -> out.write(query.getBytes()));
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.tableName", tableName);
if (columnNames != null) {
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.columnNames", columnNames);
}
if (StringUtils.isNotBlank(whereClause)) {
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.whereClause", whereClause);
}
if (StringUtils.isNotBlank(maxColumnNames)) {
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.maxColumnNames", maxColumnNames);
}
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.limit", String.valueOf(limit));
if (partitionSize != 0) {
sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.offset", String.valueOf(offset));
}
session.transfer(sqlFlowFile, REL_SUCCESS);
}
if (fileToProcess != null) {
session.remove(fileToProcess);
}
} catch (SQLException e) {
if (fileToProcess != null) {
logger.error("Unable to execute SQL select query {} due to {}, routing {} to failure", new Object[] { selectQuery, e, fileToProcess });
fileToProcess = session.putAttribute(fileToProcess, "generatetablefetch.sql.error", e.getMessage());
session.transfer(fileToProcess, REL_FAILURE);
} else {
logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
throw new ProcessException(e);
}
}
session.commit();
try {
// Update the state
stateManager.setState(statePropertyMap, Scope.CLUSTER);
} catch (IOException ioe) {
logger.error("{} failed to update State Manager, observed maximum values will not be recorded. " + "Also, any generated SQL statements may be duplicated.", new Object[] { this, ioe });
}
} catch (final ProcessException pe) {
// Log the cause of the ProcessException if it is available
Throwable t = (pe.getCause() == null ? pe : pe.getCause());
logger.error("Error during processing: {}", new Object[] { t.getMessage() }, t);
session.rollback();
context.yield();
}
}
use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class GetHTTP method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
final ComponentLog logger = getLogger();
final ProcessSession session = sessionFactory.createSession();
final FlowFile incomingFlowFile = session.get();
if (incomingFlowFile != null) {
session.transfer(incomingFlowFile, REL_SUCCESS);
logger.warn("found FlowFile {} in input queue; transferring to success", new Object[] { incomingFlowFile });
}
// get the URL
final String url = context.getProperty(URL).evaluateAttributeExpressions().getValue();
final URI uri;
String source = url;
try {
uri = new URI(url);
source = uri.getHost();
} catch (final URISyntaxException swallow) {
// this won't happen as the url has already been validated
}
// get the ssl context service
final SSLContextService sslContextService = context.getProperty(SSL_CONTEXT_SERVICE).asControllerService(SSLContextService.class);
// create the connection manager
final HttpClientConnectionManager conMan;
if (sslContextService == null) {
conMan = new BasicHttpClientConnectionManager();
} else {
final SSLContext sslContext;
try {
sslContext = createSSLContext(sslContextService);
} catch (final Exception e) {
throw new ProcessException(e);
}
final SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext);
// Also include a plain socket factory for regular http connections (especially proxies)
final Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create().register("https", sslsf).register("http", PlainConnectionSocketFactory.getSocketFactory()).build();
conMan = new BasicHttpClientConnectionManager(socketFactoryRegistry);
}
try {
// build the request configuration
final RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
requestConfigBuilder.setConnectionRequestTimeout(context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
requestConfigBuilder.setConnectTimeout(context.getProperty(CONNECTION_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
requestConfigBuilder.setSocketTimeout(context.getProperty(DATA_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue());
requestConfigBuilder.setRedirectsEnabled(context.getProperty(FOLLOW_REDIRECTS).asBoolean());
switch(context.getProperty(REDIRECT_COOKIE_POLICY).getValue()) {
case STANDARD_COOKIE_POLICY_STR:
requestConfigBuilder.setCookieSpec(CookieSpecs.STANDARD);
break;
case STRICT_COOKIE_POLICY_STR:
requestConfigBuilder.setCookieSpec(CookieSpecs.STANDARD_STRICT);
break;
case NETSCAPE_COOKIE_POLICY_STR:
requestConfigBuilder.setCookieSpec(CookieSpecs.NETSCAPE);
break;
case IGNORE_COOKIE_POLICY_STR:
requestConfigBuilder.setCookieSpec(CookieSpecs.IGNORE_COOKIES);
break;
case DEFAULT_COOKIE_POLICY_STR:
default:
requestConfigBuilder.setCookieSpec(CookieSpecs.DEFAULT);
}
// build the http client
final HttpClientBuilder clientBuilder = HttpClientBuilder.create();
clientBuilder.setConnectionManager(conMan);
// include the user agent
final String userAgent = context.getProperty(USER_AGENT).getValue();
if (userAgent != null) {
clientBuilder.setUserAgent(userAgent);
}
// set the ssl context if necessary
if (sslContextService != null) {
clientBuilder.setSslcontext(sslContextService.createSSLContext(ClientAuth.REQUIRED));
}
final String username = context.getProperty(USERNAME).getValue();
final String password = context.getProperty(PASSWORD).getValue();
// set the credentials if appropriate
if (username != null) {
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
if (password == null) {
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username));
} else {
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(username, password));
}
clientBuilder.setDefaultCredentialsProvider(credentialsProvider);
}
// Set the proxy if specified
if (context.getProperty(PROXY_HOST).isSet() && context.getProperty(PROXY_PORT).isSet()) {
final String host = context.getProperty(PROXY_HOST).getValue();
final int port = context.getProperty(PROXY_PORT).asInteger();
clientBuilder.setProxy(new HttpHost(host, port));
}
// create request
final HttpGet get = new HttpGet(url);
get.setConfig(requestConfigBuilder.build());
final StateMap beforeStateMap;
try {
beforeStateMap = context.getStateManager().getState(Scope.LOCAL);
final String lastModified = beforeStateMap.get(LAST_MODIFIED + ":" + url);
if (lastModified != null) {
get.addHeader(HEADER_IF_MODIFIED_SINCE, parseStateValue(lastModified).getValue());
}
final String etag = beforeStateMap.get(ETAG + ":" + url);
if (etag != null) {
get.addHeader(HEADER_IF_NONE_MATCH, parseStateValue(etag).getValue());
}
} catch (final IOException ioe) {
throw new ProcessException(ioe);
}
final String accept = context.getProperty(ACCEPT_CONTENT_TYPE).getValue();
if (accept != null) {
get.addHeader(HEADER_ACCEPT, accept);
}
// Add dynamic headers
PropertyValue customHeaderValue;
for (PropertyDescriptor customProperty : customHeaders) {
customHeaderValue = context.getProperty(customProperty).evaluateAttributeExpressions();
if (StringUtils.isNotBlank(customHeaderValue.getValue())) {
get.addHeader(customProperty.getName(), customHeaderValue.getValue());
}
}
// create the http client
try (final CloseableHttpClient client = clientBuilder.build()) {
// NOTE: including this inner try in order to swallow exceptions on close
try {
final StopWatch stopWatch = new StopWatch(true);
final HttpResponse response = client.execute(get);
final int statusCode = response.getStatusLine().getStatusCode();
if (statusCode == NOT_MODIFIED) {
logger.info("content not retrieved because server returned HTTP Status Code {}: Not Modified", new Object[] { NOT_MODIFIED });
context.yield();
// doing a commit in case there were flow files in the input queue
session.commit();
return;
}
final String statusExplanation = response.getStatusLine().getReasonPhrase();
if ((statusCode >= 300) || (statusCode == 204)) {
logger.error("received status code {}:{} from {}", new Object[] { statusCode, statusExplanation, url });
// doing a commit in case there were flow files in the input queue
session.commit();
return;
}
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), context.getProperty(FILENAME).evaluateAttributeExpressions().getValue());
flowFile = session.putAttribute(flowFile, this.getClass().getSimpleName().toLowerCase() + ".remote.source", source);
flowFile = session.importFrom(response.getEntity().getContent(), flowFile);
final Header contentTypeHeader = response.getFirstHeader("Content-Type");
if (contentTypeHeader != null) {
final String contentType = contentTypeHeader.getValue();
if (!contentType.trim().isEmpty()) {
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), contentType.trim());
}
}
final long flowFileSize = flowFile.getSize();
stopWatch.stop();
final String dataRate = stopWatch.calculateDataRate(flowFileSize);
session.getProvenanceReporter().receive(flowFile, url, stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);
logger.info("Successfully received {} from {} at a rate of {}; transferred to success", new Object[] { flowFile, url, dataRate });
session.commit();
updateStateMap(context, response, beforeStateMap, url);
} catch (final IOException e) {
context.yield();
session.rollback();
logger.error("Failed to retrieve file from {} due to {}; rolling back session", new Object[] { url, e.getMessage() }, e);
throw new ProcessException(e);
} catch (final Throwable t) {
context.yield();
session.rollback();
logger.error("Failed to process due to {}; rolling back session", new Object[] { t.getMessage() }, t);
throw t;
}
} catch (final IOException e) {
logger.debug("Error closing client due to {}, continuing.", new Object[] { e.getMessage() });
}
} finally {
conMan.shutdown();
}
}
use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class GetSplunk method loadState.
private TimeRange loadState(StateManager stateManager) throws IOException {
final StateMap stateMap = stateManager.getState(Scope.CLUSTER);
if (stateMap.getVersion() < 0) {
getLogger().debug("No previous state found");
return null;
}
final String earliest = stateMap.get(EARLIEST_TIME_KEY);
final String latest = stateMap.get(LATEST_TIME_KEY);
getLogger().debug("Loaded state with earliestTime of {} and latestTime of {}", new Object[] { earliest, latest });
if (StringUtils.isBlank(earliest) && StringUtils.isBlank(latest)) {
return null;
} else {
return new TimeRange(earliest, latest);
}
}
use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class TestGetSplunk method testGetWithManagedFromCurrentUsingIndexTime.
@Test
public void testGetWithManagedFromCurrentUsingIndexTime() throws IOException, ParseException {
final String query = "search tcp:7879";
final String outputMode = GetSplunk.ATOM_VALUE.getValue();
runner.setProperty(GetSplunk.QUERY, query);
runner.setProperty(GetSplunk.OUTPUT_MODE, outputMode);
runner.setProperty(GetSplunk.TIME_RANGE_STRATEGY, GetSplunk.MANAGED_CURRENT_VALUE.getValue());
runner.setProperty(GetSplunk.TIME_FIELD_STRATEGY, GetSplunk.INDEX_TIME_VALUE.getValue());
final String resultContent = "fake results";
final ByteArrayInputStream input = new ByteArrayInputStream(resultContent.getBytes(StandardCharsets.UTF_8));
when(service.export(eq(query), any(JobExportArgs.class))).thenReturn(input);
// run once and don't shut down, shouldn't produce any results first time
runner.run(1, false);
runner.assertAllFlowFilesTransferred(GetSplunk.REL_SUCCESS, 0);
// capture what the args were on last run
verify(service, times(0)).export(eq(query), any(JobExportArgs.class));
final StateMap state = runner.getStateManager().getState(Scope.CLUSTER);
Assert.assertNotNull(state);
Assert.assertTrue(state.getVersion() > 0);
// save the latest time from the first run which should be earliest time of next run
final String lastLatest = state.get(GetSplunk.LATEST_TIME_KEY);
final SimpleDateFormat format = new SimpleDateFormat(GetSplunk.DATE_TIME_FORMAT);
format.setTimeZone(TimeZone.getTimeZone("UTC"));
final Date lastLatestDate = format.parse(lastLatest);
final String expectedLatest = format.format(new Date(lastLatestDate.getTime() + 1));
// run again
runner.run(1, false);
runner.assertAllFlowFilesTransferred(GetSplunk.REL_SUCCESS, 1);
final ArgumentCaptor<JobExportArgs> capture = ArgumentCaptor.forClass(JobExportArgs.class);
verify(service, times(1)).export(eq(query), capture.capture());
// second execution the earliest time should be the previous latest_time
final JobExportArgs actualArgs = capture.getValue();
Assert.assertNotNull(actualArgs);
Assert.assertEquals(expectedLatest, actualArgs.get("index_earliest"));
Assert.assertNotNull(actualArgs.get("index_latest"));
}
Aggregations