use of org.apache.nifi.processor.ProcessSessionFactory in project nifi by apache.
the class BinFiles method binFlowFiles.
private int binFlowFiles(final ProcessContext context, final ProcessSessionFactory sessionFactory) {
int flowFilesBinned = 0;
while (binManager.getBinCount() <= context.getProperty(MAX_BIN_COUNT).asInteger()) {
if (!isScheduled()) {
break;
}
final ProcessSession session = sessionFactory.createSession();
final List<FlowFile> flowFiles = session.get(1000);
if (flowFiles.isEmpty()) {
break;
}
final Map<String, List<FlowFile>> flowFileGroups = new HashMap<>();
for (FlowFile flowFile : flowFiles) {
flowFile = this.preprocessFlowFile(context, session, flowFile);
try {
final String groupingIdentifier = getGroupId(context, flowFile, session);
flowFileGroups.computeIfAbsent(groupingIdentifier, id -> new ArrayList<>()).add(flowFile);
} catch (final Exception e) {
getLogger().error("Could not determine which Bin to add {} to; will route to failure", new Object[] { flowFile }, e);
session.transfer(flowFile, REL_FAILURE);
continue;
}
}
for (final Map.Entry<String, List<FlowFile>> entry : flowFileGroups.entrySet()) {
final Set<FlowFile> unbinned = binManager.offer(entry.getKey(), entry.getValue(), session, sessionFactory);
for (final FlowFile flowFile : unbinned) {
Bin bin = new Bin(sessionFactory.createSession(), 0, Long.MAX_VALUE, 0, Integer.MAX_VALUE, null);
bin.offer(flowFile, session);
this.readyBins.add(bin);
}
flowFilesBinned += entry.getValue().size();
}
}
return flowFilesBinned;
}
use of org.apache.nifi.processor.ProcessSessionFactory in project nifi by apache.
the class QueryDatabaseTable method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
// Fetch the column/table info once
if (!setupComplete.get()) {
super.setup(context);
}
ProcessSession session = sessionFactory.createSession();
final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
final ComponentLog logger = getLogger();
final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue();
final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions().getValue();
final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions().getValue();
final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions().getValue();
final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions().asInteger();
final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions().asInteger();
final Integer outputBatchSizeField = context.getProperty(OUTPUT_BATCH_SIZE).evaluateAttributeExpressions().asInteger();
final int outputBatchSize = outputBatchSizeField == null ? 0 : outputBatchSizeField;
final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions().asInteger() : 0;
final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().recordName(tableName).maxRows(maxRowsPerFlowFile).convertNames(context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean()).useLogicalTypes(context.getProperty(USE_AVRO_LOGICAL_TYPES).asBoolean()).defaultPrecision(context.getProperty(DEFAULT_PRECISION).evaluateAttributeExpressions().asInteger()).defaultScale(context.getProperty(DEFAULT_SCALE).evaluateAttributeExpressions().asInteger()).build();
final StateManager stateManager = context.getStateManager();
final StateMap stateMap;
try {
stateMap = stateManager.getState(Scope.CLUSTER);
} catch (final IOException ioe) {
getLogger().error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
context.yield();
return;
}
// Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
// set as the current state map (after the session has been committed)
final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
// If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
String maxPropKey = maxProp.getKey().toLowerCase();
String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
String newMaxPropValue;
// but store the new initial max value under the fully-qualified key.
if (statePropertyMap.containsKey(maxPropKey)) {
newMaxPropValue = statePropertyMap.get(maxPropKey);
} else {
newMaxPropValue = maxProp.getValue();
}
statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
}
}
List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? null : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
final String selectQuery = getQuery(dbAdapter, tableName, columnNames, maxValueColumnNameList, customWhereClause, statePropertyMap);
final StopWatch stopWatch = new StopWatch(true);
final String fragmentIdentifier = UUID.randomUUID().toString();
try (final Connection con = dbcpService.getConnection();
final Statement st = con.createStatement()) {
if (fetchSize != null && fetchSize > 0) {
try {
st.setFetchSize(fetchSize);
} catch (SQLException se) {
// Not all drivers support this, just log the error (at debug level) and move on
logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
}
}
String jdbcURL = "DBCPService";
try {
DatabaseMetaData databaseMetaData = con.getMetaData();
if (databaseMetaData != null) {
jdbcURL = databaseMetaData.getURL();
}
} catch (SQLException se) {
// Ignore and use default JDBC URL. This shouldn't happen unless the driver doesn't implement getMetaData() properly
}
final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions().asTimePeriod(TimeUnit.SECONDS).intValue();
// timeout in seconds
st.setQueryTimeout(queryTimeout);
try {
logger.debug("Executing query {}", new Object[] { selectQuery });
final ResultSet resultSet = st.executeQuery(selectQuery);
int fragmentIndex = 0;
while (true) {
final AtomicLong nrOfRows = new AtomicLong(0L);
FlowFile fileToProcess = session.create();
try {
fileToProcess = session.write(fileToProcess, out -> {
// Max values will be updated in the state property map by the callback
final MaxValueResultSetRowCollector maxValCollector = new MaxValueResultSetRowCollector(tableName, statePropertyMap, dbAdapter);
try {
nrOfRows.set(JdbcCommon.convertToAvroStream(resultSet, out, options, maxValCollector));
} catch (SQLException | RuntimeException e) {
throw new ProcessException("Error during database query or conversion of records to Avro.", e);
}
});
} catch (ProcessException e) {
// Add flowfile to results before rethrowing so it will be removed from session in outer catch
resultSetFlowFiles.add(fileToProcess);
throw e;
}
if (nrOfRows.get() > 0) {
// set attribute how many rows were selected
fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
fileToProcess = session.putAttribute(fileToProcess, RESULT_TABLENAME, tableName);
fileToProcess = session.putAttribute(fileToProcess, CoreAttributes.MIME_TYPE.key(), JdbcCommon.MIME_TYPE_AVRO_BINARY);
if (maxRowsPerFlowFile > 0) {
fileToProcess = session.putAttribute(fileToProcess, "fragment.identifier", fragmentIdentifier);
fileToProcess = session.putAttribute(fileToProcess, "fragment.index", String.valueOf(fragmentIndex));
}
logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() });
session.getProvenanceReporter().receive(fileToProcess, jdbcURL, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
resultSetFlowFiles.add(fileToProcess);
// If we've reached the batch size, send out the flow files
if (outputBatchSize > 0 && resultSetFlowFiles.size() >= outputBatchSize) {
session.transfer(resultSetFlowFiles, REL_SUCCESS);
session.commit();
resultSetFlowFiles.clear();
}
} else {
// If there were no rows returned, don't send the flowfile
session.remove(fileToProcess);
context.yield();
break;
}
fragmentIndex++;
if (maxFragments > 0 && fragmentIndex >= maxFragments) {
break;
}
}
// Even though the maximum value and total count are known at this point, to maintain consistent behavior if Output Batch Size is set, do not store the attributes
if (outputBatchSize == 0) {
for (int i = 0; i < resultSetFlowFiles.size(); i++) {
// Add maximum values as attributes
for (Map.Entry<String, String> entry : statePropertyMap.entrySet()) {
// Get just the column name from the key
String key = entry.getKey();
String colName = key.substring(key.lastIndexOf(NAMESPACE_DELIMITER) + NAMESPACE_DELIMITER.length());
resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "maxvalue." + colName, entry.getValue()));
}
// set count on all FlowFiles
if (maxRowsPerFlowFile > 0) {
resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
}
}
}
} catch (final SQLException e) {
throw e;
}
session.transfer(resultSetFlowFiles, REL_SUCCESS);
} catch (final ProcessException | SQLException e) {
logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
if (!resultSetFlowFiles.isEmpty()) {
session.remove(resultSetFlowFiles);
}
context.yield();
} finally {
session.commit();
try {
// Update the state
stateManager.setState(statePropertyMap, Scope.CLUSTER);
} catch (IOException ioe) {
getLogger().error("{} failed to update State Manager, maximum observed values will not be recorded", new Object[] { this, ioe });
}
}
}
use of org.apache.nifi.processor.ProcessSessionFactory in project nifi by apache.
the class MergeRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
RecordBinManager manager = binManager.get();
while (manager == null) {
manager = new RecordBinManager(context, sessionFactory, getLogger());
manager.setMaxBinAge(context.getProperty(MAX_BIN_AGE).asTimePeriod(TimeUnit.NANOSECONDS), TimeUnit.NANOSECONDS);
final boolean updated = binManager.compareAndSet(null, manager);
if (!updated) {
manager = binManager.get();
}
}
final ProcessSession session = sessionFactory.createSession();
final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(250, DataUnit.KB, 250));
if (getLogger().isDebugEnabled()) {
final List<String> ids = flowFiles.stream().map(ff -> "id=" + ff.getId()).collect(Collectors.toList());
getLogger().debug("Pulled {} FlowFiles from queue: {}", new Object[] { ids.size(), ids });
}
final String mergeStrategy = context.getProperty(MERGE_STRATEGY).getValue();
final boolean block;
if (MERGE_STRATEGY_DEFRAGMENT.equals(mergeStrategy)) {
block = true;
} else if (context.getProperty(CORRELATION_ATTRIBUTE_NAME).isSet()) {
block = true;
} else {
block = false;
}
try {
for (final FlowFile flowFile : flowFiles) {
try {
binFlowFile(context, flowFile, session, manager, block);
} catch (final Exception e) {
getLogger().error("Failed to bin {} due to {}", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
}
}
} finally {
session.commit();
}
try {
manager.completeExpiredBins();
} catch (final Exception e) {
getLogger().error("Failed to merge FlowFiles to create new bin due to " + e, e);
}
if (flowFiles.isEmpty()) {
getLogger().debug("No FlowFiles to bin; will yield");
context.yield();
}
}
use of org.apache.nifi.processor.ProcessSessionFactory in project nifi by apache.
the class ListenHTTPServlet method doPost.
@Override
protected void doPost(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException {
final ProcessContext context = processContext;
ProcessSessionFactory sessionFactory;
do {
sessionFactory = sessionFactoryHolder.get();
if (sessionFactory == null) {
try {
Thread.sleep(10);
} catch (final InterruptedException e) {
}
}
} while (sessionFactory == null);
final ProcessSession session = sessionFactory.createSession();
FlowFile flowFile = null;
String holdUuid = null;
String foundSubject = null;
try {
final long n = filesReceived.getAndIncrement() % FILES_BEFORE_CHECKING_DESTINATION_SPACE;
if (n == 0 || !spaceAvailable.get()) {
if (context.getAvailableRelationships().isEmpty()) {
spaceAvailable.set(false);
if (logger.isDebugEnabled()) {
logger.debug("Received request from " + request.getRemoteHost() + " but no space available; Indicating Service Unavailable");
}
response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE);
return;
} else {
spaceAvailable.set(true);
}
}
response.setHeader("Content-Type", MediaType.TEXT_PLAIN);
final boolean contentGzipped = Boolean.parseBoolean(request.getHeader(GZIPPED_HEADER));
final X509Certificate[] certs = (X509Certificate[]) request.getAttribute("javax.servlet.request.X509Certificate");
foundSubject = DEFAULT_FOUND_SUBJECT;
if (certs != null && certs.length > 0) {
for (final X509Certificate cert : certs) {
foundSubject = cert.getSubjectDN().getName();
if (authorizedPattern.matcher(foundSubject).matches()) {
break;
} else {
logger.warn("Rejecting transfer attempt from " + foundSubject + " because the DN is not authorized, host=" + request.getRemoteHost());
response.sendError(HttpServletResponse.SC_FORBIDDEN, "not allowed based on dn");
return;
}
}
}
final String destinationVersion = request.getHeader(PROTOCOL_VERSION_HEADER);
Integer protocolVersion = null;
if (destinationVersion != null) {
try {
protocolVersion = Integer.valueOf(destinationVersion);
} catch (final NumberFormatException e) {
// Value was invalid. Treat as if the header were missing.
}
}
final boolean destinationIsLegacyNiFi = (protocolVersion == null);
final boolean createHold = Boolean.parseBoolean(request.getHeader(FLOWFILE_CONFIRMATION_HEADER));
final String contentType = request.getContentType();
final InputStream unthrottled = contentGzipped ? new GZIPInputStream(request.getInputStream()) : request.getInputStream();
final InputStream in = (streamThrottler == null) ? unthrottled : streamThrottler.newThrottledInputStream(unthrottled);
if (logger.isDebugEnabled()) {
logger.debug("Received request from " + request.getRemoteHost() + ", createHold=" + createHold + ", content-type=" + contentType + ", gzip=" + contentGzipped);
}
final AtomicBoolean hasMoreData = new AtomicBoolean(false);
final FlowFileUnpackager unpackager;
if (APPLICATION_FLOW_FILE_V3.equals(contentType)) {
unpackager = new FlowFileUnpackagerV3();
} else if (APPLICATION_FLOW_FILE_V2.equals(contentType)) {
unpackager = new FlowFileUnpackagerV2();
} else if (APPLICATION_FLOW_FILE_V1.equals(contentType)) {
unpackager = new FlowFileUnpackagerV1();
} else {
unpackager = null;
}
final Set<FlowFile> flowFileSet = new HashSet<>();
do {
final long startNanos = System.nanoTime();
final Map<String, String> attributes = new HashMap<>();
flowFile = session.create();
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream rawOut) throws IOException {
try (final BufferedOutputStream bos = new BufferedOutputStream(rawOut, 65536)) {
if (unpackager == null) {
IOUtils.copy(in, bos);
hasMoreData.set(false);
} else {
attributes.putAll(unpackager.unpackageFlowFile(in, bos));
if (destinationIsLegacyNiFi) {
if (attributes.containsKey("nf.file.name")) {
// for backward compatibility with old nifi...
attributes.put(CoreAttributes.FILENAME.key(), attributes.remove("nf.file.name"));
}
if (attributes.containsKey("nf.file.path")) {
attributes.put(CoreAttributes.PATH.key(), attributes.remove("nf.file.path"));
}
}
hasMoreData.set(unpackager.hasMoreData());
}
}
}
});
final long transferNanos = System.nanoTime() - startNanos;
final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);
// put metadata on flowfile
final String nameVal = request.getHeader(CoreAttributes.FILENAME.key());
if (StringUtils.isNotBlank(nameVal)) {
attributes.put(CoreAttributes.FILENAME.key(), nameVal);
}
// put arbitrary headers on flow file
for (Enumeration<String> headerEnum = request.getHeaderNames(); headerEnum.hasMoreElements(); ) {
String headerName = headerEnum.nextElement();
if (headerPattern != null && headerPattern.matcher(headerName).matches()) {
String headerValue = request.getHeader(headerName);
attributes.put(headerName, headerValue);
}
}
String sourceSystemFlowFileIdentifier = attributes.get(CoreAttributes.UUID.key());
if (sourceSystemFlowFileIdentifier != null) {
sourceSystemFlowFileIdentifier = "urn:nifi:" + sourceSystemFlowFileIdentifier;
// If we receveied a UUID, we want to give the FlowFile a new UUID and register the sending system's
// identifier as the SourceSystemFlowFileIdentifier field in the Provenance RECEIVE event
attributes.put(CoreAttributes.UUID.key(), UUID.randomUUID().toString());
}
flowFile = session.putAllAttributes(flowFile, attributes);
session.getProvenanceReporter().receive(flowFile, request.getRequestURL().toString(), sourceSystemFlowFileIdentifier, "Remote DN=" + foundSubject, transferMillis);
flowFile = session.putAttribute(flowFile, "restlistener.remote.source.host", request.getRemoteHost());
flowFile = session.putAttribute(flowFile, "restlistener.request.uri", request.getRequestURI());
flowFile = session.putAttribute(flowFile, "restlistener.remote.user.dn", foundSubject);
flowFileSet.add(flowFile);
if (holdUuid == null) {
holdUuid = flowFile.getAttribute(CoreAttributes.UUID.key());
}
} while (hasMoreData.get());
if (createHold) {
String uuid = (holdUuid == null) ? UUID.randomUUID().toString() : holdUuid;
if (flowFileMap.containsKey(uuid)) {
uuid = UUID.randomUUID().toString();
}
final FlowFileEntryTimeWrapper wrapper = new FlowFileEntryTimeWrapper(session, flowFileSet, System.currentTimeMillis(), request.getRemoteHost());
FlowFileEntryTimeWrapper previousWrapper;
do {
previousWrapper = flowFileMap.putIfAbsent(uuid, wrapper);
if (previousWrapper != null) {
uuid = UUID.randomUUID().toString();
}
} while (previousWrapper != null);
response.setStatus(HttpServletResponse.SC_SEE_OTHER);
final String ackUri = "/" + basePath + "/holds/" + uuid;
response.addHeader(LOCATION_HEADER_NAME, ackUri);
response.addHeader(LOCATION_URI_INTENT_NAME, LOCATION_URI_INTENT_VALUE);
response.getOutputStream().write(ackUri.getBytes("UTF-8"));
if (logger.isDebugEnabled()) {
logger.debug("Ingested {} from Remote Host: [{}] Port [{}] SubjectDN [{}]; placed hold on these {} files with ID {}", new Object[] { flowFileSet, request.getRemoteHost(), request.getRemotePort(), foundSubject, flowFileSet.size(), uuid });
}
} else {
response.setStatus(this.returnCode);
logger.info("Received from Remote Host: [{}] Port [{}] SubjectDN [{}]; transferring to 'success' {}", new Object[] { request.getRemoteHost(), request.getRemotePort(), foundSubject, flowFile });
session.transfer(flowFileSet, ListenHTTP.RELATIONSHIP_SUCCESS);
session.commit();
}
} catch (final Throwable t) {
session.rollback();
if (flowFile == null) {
logger.error("Unable to receive file from Remote Host: [{}] SubjectDN [{}] due to {}", new Object[] { request.getRemoteHost(), foundSubject, t });
} else {
logger.error("Unable to receive file {} from Remote Host: [{}] SubjectDN [{}] due to {}", new Object[] { flowFile, request.getRemoteHost(), foundSubject, t });
}
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, t.toString());
}
}
use of org.apache.nifi.processor.ProcessSessionFactory in project nifi by apache.
the class ListenHTTPServlet method init.
@SuppressWarnings("unchecked")
@Override
public void init(final ServletConfig config) throws ServletException {
final ServletContext context = config.getServletContext();
this.logger = (ComponentLog) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_LOGGER);
this.sessionFactoryHolder = (AtomicReference<ProcessSessionFactory>) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_SESSION_FACTORY_HOLDER);
this.processContext = (ProcessContext) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_PROCESS_CONTEXT_HOLDER);
this.authorizedPattern = (Pattern) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_AUTHORITY_PATTERN);
this.headerPattern = (Pattern) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_HEADER_PATTERN);
this.flowFileMap = (ConcurrentMap<String, FlowFileEntryTimeWrapper>) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_FLOWFILE_MAP);
this.streamThrottler = (StreamThrottler) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_STREAM_THROTTLER);
this.basePath = (String) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_BASE_PATH);
this.returnCode = (int) context.getAttribute(ListenHTTP.CONTEXT_ATTRIBUTE_RETURN_CODE);
}
Aggregations