Search in sources :

Example 16 with StateManager

use of org.apache.nifi.components.state.StateManager in project nifi by apache.

the class ProvenanceEventConsumer method consumeEvents.

public void consumeEvents(final ReportingContext context, final BiConsumer<ComponentMapHolder, List<ProvenanceEventRecord>> consumer) throws ProcessException {
    if (context == null) {
        logger.debug("No ReportingContext available.");
        return;
    }
    final EventAccess eventAccess = context.getEventAccess();
    final ProcessGroupStatus procGroupStatus = eventAccess.getControllerStatus();
    final ComponentMapHolder componentMapHolder = ComponentMapHolder.createComponentMap(procGroupStatus);
    final StateManager stateManager = context.getStateManager();
    Long currMaxId = eventAccess.getProvenanceRepository().getMaxEventId();
    if (currMaxId == null) {
        logger.debug("No events to send because no events have been created yet.");
        return;
    }
    if (firstEventId < 0) {
        Map<String, String> state;
        try {
            state = stateManager.getState(Scope.LOCAL).toMap();
        } catch (IOException e) {
            logger.error("Failed to get state at start up due to:" + e.getMessage(), e);
            return;
        }
        if (state.containsKey(LAST_EVENT_ID_KEY)) {
            firstEventId = Long.parseLong(state.get(LAST_EVENT_ID_KEY)) + 1;
        } else {
            if (END_OF_STREAM.getValue().equals(startPositionValue)) {
                firstEventId = currMaxId;
            }
        }
        if (currMaxId < (firstEventId - 1)) {
            if (BEGINNING_OF_STREAM.getValue().equals(startPositionValue)) {
                logger.warn("Current provenance max id is {} which is less than what was stored in state as the last queried event, which was {}. This means the provenance restarted its " + "ids. Restarting querying from the beginning.", new Object[] { currMaxId, firstEventId });
                firstEventId = -1;
            } else {
                logger.warn("Current provenance max id is {} which is less than what was stored in state as the last queried event, which was {}. This means the provenance restarted its " + "ids. Restarting querying from the latest event in the Provenance Repository.", new Object[] { currMaxId, firstEventId });
                firstEventId = currMaxId;
            }
        }
    }
    if (currMaxId == (firstEventId - 1)) {
        logger.debug("No events to send due to the current max id being equal to the last id that was queried.");
        return;
    }
    List<ProvenanceEventRecord> rawEvents;
    List<ProvenanceEventRecord> filteredEvents;
    try {
        rawEvents = eventAccess.getProvenanceEvents(firstEventId, batchSize);
        filteredEvents = filterEvents(componentMapHolder, rawEvents);
    } catch (final IOException ioe) {
        logger.error("Failed to retrieve Provenance Events from repository due to: " + ioe.getMessage(), ioe);
        return;
    }
    if (rawEvents == null || rawEvents.isEmpty()) {
        logger.debug("No events to send due to 'events' being null or empty.");
        return;
    }
    // Consume while there are more events and not stopped.
    while (rawEvents != null && !rawEvents.isEmpty() && isScheduled()) {
        if (!filteredEvents.isEmpty()) {
            // Executes callback.
            consumer.accept(componentMapHolder, filteredEvents);
        }
        firstEventId = updateLastEventId(rawEvents, stateManager);
        // Retrieve the next batch
        try {
            rawEvents = eventAccess.getProvenanceEvents(firstEventId, batchSize);
            filteredEvents = filterEvents(componentMapHolder, rawEvents);
        } catch (final IOException ioe) {
            logger.error("Failed to retrieve Provenance Events from repository due to: " + ioe.getMessage(), ioe);
            return;
        }
    }
}
Also used : EventAccess(org.apache.nifi.reporting.EventAccess) ProcessGroupStatus(org.apache.nifi.controller.status.ProcessGroupStatus) StateManager(org.apache.nifi.components.state.StateManager) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) IOException(java.io.IOException)

Example 17 with StateManager

use of org.apache.nifi.components.state.StateManager in project nifi by apache.

the class MonitorActivity method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final long thresholdMillis = context.getProperty(THRESHOLD).asTimePeriod(TimeUnit.MILLISECONDS);
    final long now = System.currentTimeMillis();
    final ComponentLog logger = getLogger();
    final boolean copyAttributes = context.getProperty(COPY_ATTRIBUTES).asBoolean();
    final boolean isClusterScope = isClusterScope(context, false);
    final boolean shouldReportOnlyOnPrimary = shouldReportOnlyOnPrimary(isClusterScope, context);
    final List<FlowFile> flowFiles = session.get(50);
    boolean isInactive = false;
    long updatedLatestSuccessTransfer = -1;
    StateMap clusterState = null;
    if (flowFiles.isEmpty()) {
        final long previousSuccessMillis = latestSuccessTransfer.get();
        boolean sendInactiveMarker = false;
        isInactive = (now >= previousSuccessMillis + thresholdMillis);
        logger.debug("isInactive={}, previousSuccessMillis={}, now={}", new Object[] { isInactive, previousSuccessMillis, now });
        if (isInactive && isClusterScope) {
            // However, if this node is active, we don't have to look at cluster state.
            try {
                clusterState = context.getStateManager().getState(Scope.CLUSTER);
                if (clusterState != null && !StringUtils.isEmpty(clusterState.get(STATE_KEY_LATEST_SUCCESS_TRANSFER))) {
                    final long latestReportedClusterActivity = Long.valueOf(clusterState.get(STATE_KEY_LATEST_SUCCESS_TRANSFER));
                    isInactive = (now >= latestReportedClusterActivity + thresholdMillis);
                    if (!isInactive) {
                        // This node has been inactive, but other node has more recent activity.
                        updatedLatestSuccessTransfer = latestReportedClusterActivity;
                    }
                    logger.debug("isInactive={}, latestReportedClusterActivity={}", new Object[] { isInactive, latestReportedClusterActivity });
                }
            } catch (IOException e) {
                logger.error("Failed to access cluster state. Activity will not be monitored properly until this is addressed.", e);
            }
        }
        if (isInactive) {
            final boolean continual = context.getProperty(CONTINUALLY_SEND_MESSAGES).asBoolean();
            sendInactiveMarker = !inactive.getAndSet(true) || (continual && (now > lastInactiveMessage.get() + thresholdMillis));
        }
        if (sendInactiveMarker && shouldThisNodeReport(isClusterScope, shouldReportOnlyOnPrimary)) {
            lastInactiveMessage.set(System.currentTimeMillis());
            FlowFile inactiveFlowFile = session.create();
            inactiveFlowFile = session.putAttribute(inactiveFlowFile, "inactivityStartMillis", String.valueOf(previousSuccessMillis));
            inactiveFlowFile = session.putAttribute(inactiveFlowFile, "inactivityDurationMillis", String.valueOf(now - previousSuccessMillis));
            final byte[] outBytes = context.getProperty(INACTIVITY_MESSAGE).evaluateAttributeExpressions(inactiveFlowFile).getValue().getBytes(UTF8);
            inactiveFlowFile = session.write(inactiveFlowFile, new OutputStreamCallback() {

                @Override
                public void process(final OutputStream out) throws IOException {
                    out.write(outBytes);
                }
            });
            session.getProvenanceReporter().create(inactiveFlowFile);
            session.transfer(inactiveFlowFile, REL_INACTIVE);
            logger.info("Transferred {} to 'inactive'", new Object[] { inactiveFlowFile });
        } else {
            // no need to dominate CPU checking times; let other processors run for a bit.
            context.yield();
        }
    } else {
        session.transfer(flowFiles, REL_SUCCESS);
        updatedLatestSuccessTransfer = now;
        logger.info("Transferred {} FlowFiles to 'success'", new Object[] { flowFiles.size() });
        final long latestStateReportTimestamp = latestReportedNodeState.get();
        if (isClusterScope && (now - latestStateReportTimestamp) > (thresholdMillis / 3)) {
            // We don't want to hit the state manager every onTrigger(), but often enough to detect activeness.
            try {
                final StateManager stateManager = context.getStateManager();
                final StateMap state = stateManager.getState(Scope.CLUSTER);
                final Map<String, String> newValues = new HashMap<>();
                // Persist attributes so that other nodes can copy it
                if (copyAttributes) {
                    newValues.putAll(flowFiles.get(0).getAttributes());
                }
                newValues.put(STATE_KEY_LATEST_SUCCESS_TRANSFER, String.valueOf(now));
                if (state == null || state.getVersion() == -1) {
                    stateManager.setState(newValues, Scope.CLUSTER);
                } else {
                    final String existingTimestamp = state.get(STATE_KEY_LATEST_SUCCESS_TRANSFER);
                    if (StringUtils.isEmpty(existingTimestamp) || Long.parseLong(existingTimestamp) < now) {
                        // If this returns false due to race condition, it's not a problem since we just need
                        // the latest active timestamp.
                        stateManager.replace(state, newValues, Scope.CLUSTER);
                    } else {
                        logger.debug("Existing state has more recent timestamp, didn't update state.");
                    }
                }
                latestReportedNodeState.set(now);
            } catch (IOException e) {
                logger.error("Failed to access cluster state. Activity will not be monitored properly until this is addressed.", e);
            }
        }
    }
    if (!isInactive) {
        final long inactivityStartMillis = latestSuccessTransfer.get();
        if (updatedLatestSuccessTransfer > -1) {
            latestSuccessTransfer.set(updatedLatestSuccessTransfer);
        }
        if (inactive.getAndSet(false) && shouldThisNodeReport(isClusterScope, shouldReportOnlyOnPrimary)) {
            FlowFile activityRestoredFlowFile = session.create();
            if (copyAttributes) {
                final Map<String, String> attributes = new HashMap<>();
                if (flowFiles.size() > 0) {
                    // copy attributes from the first flow file in the list
                    attributes.putAll(flowFiles.get(0).getAttributes());
                } else if (clusterState != null) {
                    attributes.putAll(clusterState.toMap());
                    attributes.remove(STATE_KEY_LATEST_SUCCESS_TRANSFER);
                }
                // don't copy the UUID
                attributes.remove(CoreAttributes.UUID.key());
                activityRestoredFlowFile = session.putAllAttributes(activityRestoredFlowFile, attributes);
            }
            activityRestoredFlowFile = session.putAttribute(activityRestoredFlowFile, "inactivityStartMillis", String.valueOf(inactivityStartMillis));
            activityRestoredFlowFile = session.putAttribute(activityRestoredFlowFile, "inactivityDurationMillis", String.valueOf(now - inactivityStartMillis));
            final byte[] outBytes = context.getProperty(ACTIVITY_RESTORED_MESSAGE).evaluateAttributeExpressions(activityRestoredFlowFile).getValue().getBytes(UTF8);
            activityRestoredFlowFile = session.write(activityRestoredFlowFile, out -> out.write(outBytes));
            session.getProvenanceReporter().create(activityRestoredFlowFile);
            session.transfer(activityRestoredFlowFile, REL_ACTIVITY_RESTORED);
            logger.info("Transferred {} to 'activity.restored'", new Object[] { activityRestoredFlowFile });
        }
    }
}
Also used : OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) StandardValidators(org.apache.nifi.processor.util.StandardValidators) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) SideEffectFree(org.apache.nifi.annotation.behavior.SideEffectFree) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Charset(java.nio.charset.Charset) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) Relationship(org.apache.nifi.processor.Relationship) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) ValidationResult(org.apache.nifi.components.ValidationResult) OutputStream(java.io.OutputStream) TriggerWhenEmpty(org.apache.nifi.annotation.behavior.TriggerWhenEmpty) FlowFile(org.apache.nifi.flowfile.FlowFile) StateManager(org.apache.nifi.components.state.StateManager) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) IOException(java.io.IOException) ProcessSession(org.apache.nifi.processor.ProcessSession) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) StringUtils(org.apache.nifi.util.StringUtils) AllowableValue(org.apache.nifi.components.AllowableValue) StateMap(org.apache.nifi.components.state.StateMap) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) Collections(java.util.Collections) OnStopped(org.apache.nifi.annotation.lifecycle.OnStopped) ProcessorInitializationContext(org.apache.nifi.processor.ProcessorInitializationContext) FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) StateMap(org.apache.nifi.components.state.StateMap) OutputStream(java.io.OutputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) StateManager(org.apache.nifi.components.state.StateManager) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 18 with StateManager

use of org.apache.nifi.components.state.StateManager in project nifi by apache.

the class GenerateTableFetch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    // Fetch the column/table info once (if the table name and max value columns are not dynamic). Otherwise do the setup later
    if (!isDynamicTableName && !isDynamicMaxValues && !setupComplete.get()) {
        super.setup(context);
    }
    ProcessSession session = sessionFactory.createSession();
    FlowFile fileToProcess = null;
    if (context.hasIncomingConnection()) {
        fileToProcess = session.get();
        if (fileToProcess == null) {
            // Incoming connection with no flow file available, do no work (see capability description)
            return;
        }
    }
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(fileToProcess).getValue();
    final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
    final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
    final int partitionSize = context.getProperty(PARTITION_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
    final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions(fileToProcess).getValue();
    final StateManager stateManager = context.getStateManager();
    final StateMap stateMap;
    FlowFile finalFileToProcess = fileToProcess;
    try {
        stateMap = stateManager.getState(Scope.CLUSTER);
    } catch (final IOException ioe) {
        logger.error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
        context.yield();
        return;
    }
    try {
        // Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
        // set as the current state map (after the session has been committed)
        final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
        // If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
        for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
            String maxPropKey = maxProp.getKey().toLowerCase();
            String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
            if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
                String newMaxPropValue;
                // but store the new initial max value under the fully-qualified key.
                if (statePropertyMap.containsKey(maxPropKey)) {
                    newMaxPropValue = statePropertyMap.get(maxPropKey);
                } else {
                    newMaxPropValue = maxProp.getValue();
                }
                statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
            }
        }
        // Build a WHERE clause with maximum-value columns (if they exist), and a list of column names that will contain MAX(<column>) aliases. The
        // executed SQL query will retrieve the count of all records after the filter(s) have been applied, as well as the new maximum values for the
        // specified columns. This allows the processor to generate the correctly partitioned SQL statements as well as to update the state with the
        // latest observed maximum values.
        String whereClause = null;
        List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? new ArrayList<>(0) : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
        List<String> maxValueClauses = new ArrayList<>(maxValueColumnNameList.size());
        String columnsClause = null;
        List<String> maxValueSelectColumns = new ArrayList<>(maxValueColumnNameList.size() + 1);
        maxValueSelectColumns.add("COUNT(*)");
        // For each maximum-value column, get a WHERE filter and a MAX(column) alias
        IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
            String colName = maxValueColumnNameList.get(index);
            maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
            String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
            if (!StringUtils.isEmpty(maxValue)) {
                if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
                    // This means column type cache is clean after instance reboot. We should re-cache column type
                    super.setup(context, false, finalFileToProcess);
                }
                Integer type = getColumnType(tableName, colName);
                // Add a condition for the WHERE clause
                maxValueClauses.add(colName + (index == 0 ? " > " : " >= ") + getLiteralByType(type, maxValue, dbAdapter.getName()));
            }
        });
        if (customWhereClause != null) {
            // adding the custom WHERE clause (if defined) to the list of existing clauses.
            maxValueClauses.add("(" + customWhereClause + ")");
        }
        whereClause = StringUtils.join(maxValueClauses, " AND ");
        columnsClause = StringUtils.join(maxValueSelectColumns, ", ");
        // Build a SELECT query with maximum-value columns (if present)
        final String selectQuery = dbAdapter.getSelectStatement(tableName, columnsClause, whereClause, null, null, null);
        long rowCount = 0;
        try (final Connection con = dbcpService.getConnection();
            final Statement st = con.createStatement()) {
            final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asTimePeriod(TimeUnit.SECONDS).intValue();
            // timeout in seconds
            st.setQueryTimeout(queryTimeout);
            logger.debug("Executing {}", new Object[] { selectQuery });
            ResultSet resultSet;
            resultSet = st.executeQuery(selectQuery);
            if (resultSet.next()) {
                // Total row count is in the first column
                rowCount = resultSet.getLong(1);
                // Update the state map with the newly-observed maximum values
                ResultSetMetaData rsmd = resultSet.getMetaData();
                for (int i = 2; i <= rsmd.getColumnCount(); i++) {
                    // Some JDBC drivers consider the columns name and label to be very different things.
                    // Since this column has been aliased lets check the label first,
                    // if there is no label we'll use the column name.
                    String resultColumnName = (StringUtils.isNotEmpty(rsmd.getColumnLabel(i)) ? rsmd.getColumnLabel(i) : rsmd.getColumnName(i)).toLowerCase();
                    String fullyQualifiedStateKey = getStateKey(tableName, resultColumnName);
                    String resultColumnCurrentMax = statePropertyMap.get(fullyQualifiedStateKey);
                    if (StringUtils.isEmpty(resultColumnCurrentMax) && !isDynamicTableName) {
                        // If we can't find the value at the fully-qualified key name and the table name is static, it is possible (under a previous scheme)
                        // the value has been stored under a key that is only the column name. Fall back to check the column name; either way, when a new
                        // maximum value is observed, it will be stored under the fully-qualified key from then on.
                        resultColumnCurrentMax = statePropertyMap.get(resultColumnName);
                    }
                    int type = rsmd.getColumnType(i);
                    if (isDynamicTableName) {
                        // We haven't pre-populated the column type map if the table name is dynamic, so do it here
                        columnTypeMap.put(fullyQualifiedStateKey, type);
                    }
                    try {
                        String newMaxValue = getMaxValueFromRow(resultSet, i, type, resultColumnCurrentMax, dbAdapter.getName());
                        if (newMaxValue != null) {
                            statePropertyMap.put(fullyQualifiedStateKey, newMaxValue);
                        }
                    } catch (ParseException | IOException pie) {
                        // Fail the whole thing here before we start creating flow files and such
                        throw new ProcessException(pie);
                    }
                }
            } else {
                // Something is very wrong here, one row (even if count is zero) should be returned
                throw new SQLException("No rows returned from metadata query: " + selectQuery);
            }
            // for each maximum-value column get a right bounding WHERE condition
            IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
                String colName = maxValueColumnNameList.get(index);
                maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
                String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
                if (!StringUtils.isEmpty(maxValue)) {
                    if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
                        // This means column type cache is clean after instance reboot. We should re-cache column type
                        super.setup(context, false, finalFileToProcess);
                    }
                    Integer type = getColumnType(tableName, colName);
                    // Add a condition for the WHERE clause
                    maxValueClauses.add(colName + " <= " + getLiteralByType(type, maxValue, dbAdapter.getName()));
                }
            });
            // Update WHERE list to include new right hand boundaries
            whereClause = StringUtils.join(maxValueClauses, " AND ");
            final long numberOfFetches = (partitionSize == 0) ? 1 : (rowCount / partitionSize) + (rowCount % partitionSize == 0 ? 0 : 1);
            // Generate SQL statements to read "pages" of data
            for (long i = 0; i < numberOfFetches; i++) {
                Long limit = partitionSize == 0 ? null : (long) partitionSize;
                Long offset = partitionSize == 0 ? null : i * partitionSize;
                final String maxColumnNames = StringUtils.join(maxValueColumnNameList, ", ");
                final String query = dbAdapter.getSelectStatement(tableName, columnNames, whereClause, maxColumnNames, limit, offset);
                FlowFile sqlFlowFile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
                sqlFlowFile = session.write(sqlFlowFile, out -> out.write(query.getBytes()));
                sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.tableName", tableName);
                if (columnNames != null) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.columnNames", columnNames);
                }
                if (StringUtils.isNotBlank(whereClause)) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.whereClause", whereClause);
                }
                if (StringUtils.isNotBlank(maxColumnNames)) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.maxColumnNames", maxColumnNames);
                }
                sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.limit", String.valueOf(limit));
                if (partitionSize != 0) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.offset", String.valueOf(offset));
                }
                session.transfer(sqlFlowFile, REL_SUCCESS);
            }
            if (fileToProcess != null) {
                session.remove(fileToProcess);
            }
        } catch (SQLException e) {
            if (fileToProcess != null) {
                logger.error("Unable to execute SQL select query {} due to {}, routing {} to failure", new Object[] { selectQuery, e, fileToProcess });
                fileToProcess = session.putAttribute(fileToProcess, "generatetablefetch.sql.error", e.getMessage());
                session.transfer(fileToProcess, REL_FAILURE);
            } else {
                logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
                throw new ProcessException(e);
            }
        }
        session.commit();
        try {
            // Update the state
            stateManager.setState(statePropertyMap, Scope.CLUSTER);
        } catch (IOException ioe) {
            logger.error("{} failed to update State Manager, observed maximum values will not be recorded. " + "Also, any generated SQL statements may be duplicated.", new Object[] { this, ioe });
        }
    } catch (final ProcessException pe) {
        // Log the cause of the ProcessException if it is available
        Throwable t = (pe.getCause() == null ? pe : pe.getCause());
        logger.error("Error during processing: {}", new Object[] { t.getMessage() }, t);
        session.rollback();
        context.yield();
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) StandardValidators(org.apache.nifi.processor.util.StandardValidators) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Connection(java.sql.Connection) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) Relationship(org.apache.nifi.processor.Relationship) ResultSet(java.sql.ResultSet) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ParseException(java.text.ParseException) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) ValidationResult(org.apache.nifi.components.ValidationResult) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) FlowFile(org.apache.nifi.flowfile.FlowFile) StateManager(org.apache.nifi.components.state.StateManager) Collection(java.util.Collection) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) StateMap(org.apache.nifi.components.state.StateMap) TimeUnit(java.util.concurrent.TimeUnit) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) List(java.util.List) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) Statement(java.sql.Statement) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) Collections(java.util.Collections) ResultSetMetaData(java.sql.ResultSetMetaData) HashMap(java.util.HashMap) SQLException(java.sql.SQLException) StateMap(org.apache.nifi.components.state.StateMap) ArrayList(java.util.ArrayList) ResultSetMetaData(java.sql.ResultSetMetaData) StateManager(org.apache.nifi.components.state.StateManager) ResultSet(java.sql.ResultSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Statement(java.sql.Statement) Connection(java.sql.Connection) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) ParseException(java.text.ParseException) HashMap(java.util.HashMap) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap)

Example 19 with StateManager

use of org.apache.nifi.components.state.StateManager in project nifi by apache.

the class TestGenerateTableFetch method testBackwardsCompatibilityStateKeyDynamicTableStaticMaxValues.

@Test
public void testBackwardsCompatibilityStateKeyDynamicTableStaticMaxValues() throws Exception {
    // load test data to database
    final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
    Statement stmt = con.createStatement();
    try {
        stmt.execute("drop table TEST_QUERY_DB_TABLE");
    } catch (final SQLException sqle) {
    // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
    }
    stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, bucket integer not null)");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, bucket) VALUES (0, 0)");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, bucket) VALUES (1, 0)");
    runner.setProperty(GenerateTableFetch.TABLE_NAME, "${tableName}");
    runner.setIncomingConnection(true);
    runner.setProperty(GenerateTableFetch.MAX_VALUE_COLUMN_NAMES, "id");
    runner.enqueue("".getBytes(), new HashMap<String, String>() {

        {
            put("tableName", "TEST_QUERY_DB_TABLE");
        }
    });
    // Pre-populate the state with a key for column name (not fully-qualified)
    StateManager stateManager = runner.getStateManager();
    stateManager.setState(new HashMap<String, String>() {

        {
            put("id", "0");
        }
    }, Scope.CLUSTER);
    // Pre-populate the column type map with an entry for id (not fully-qualified)
    processor.columnTypeMap.put("id", 4);
    runner.run();
    runner.assertAllFlowFilesTransferred(REL_SUCCESS, 1);
    MockFlowFile flowFile = runner.getFlowFilesForRelationship(REL_SUCCESS).get(0);
    // Note there is no WHERE clause here. Because we are using dynamic tables, the old state key/value is not retrieved
    assertEquals("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id <= 1 ORDER BY id FETCH NEXT 10000 ROWS ONLY", new String(flowFile.toByteArray()));
    runner.clearTransferState();
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, bucket) VALUES (2, 0)");
    runner.enqueue("".getBytes(), new HashMap<String, String>() {

        {
            put("tableName", "TEST_QUERY_DB_TABLE");
            put("maxValueCol", "id");
        }
    });
    runner.run();
    runner.assertAllFlowFilesTransferred(REL_SUCCESS, 1);
    flowFile = runner.getFlowFilesForRelationship(REL_SUCCESS).get(0);
    assertEquals("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id > 1 AND id <= 2 ORDER BY id FETCH NEXT 10000 ROWS ONLY", new String(flowFile.toByteArray()));
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) StateManager(org.apache.nifi.components.state.StateManager) SQLException(java.sql.SQLException) Statement(java.sql.Statement) Connection(java.sql.Connection) DBCPService(org.apache.nifi.dbcp.DBCPService) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Example 20 with StateManager

use of org.apache.nifi.components.state.StateManager in project nifi by apache.

the class TestGenerateTableFetch method testBackwardsCompatibilityStateKeyDynamicTableDynamicMaxValues.

@Test
public void testBackwardsCompatibilityStateKeyDynamicTableDynamicMaxValues() throws Exception {
    // load test data to database
    final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
    Statement stmt = con.createStatement();
    try {
        stmt.execute("drop table TEST_QUERY_DB_TABLE");
    } catch (final SQLException sqle) {
    // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
    }
    stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, bucket integer not null)");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, bucket) VALUES (0, 0)");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, bucket) VALUES (1, 0)");
    runner.setProperty(GenerateTableFetch.TABLE_NAME, "${tableName}");
    runner.setIncomingConnection(true);
    runner.setProperty(GenerateTableFetch.MAX_VALUE_COLUMN_NAMES, "${maxValueCol}");
    runner.enqueue("".getBytes(), new HashMap<String, String>() {

        {
            put("tableName", "TEST_QUERY_DB_TABLE");
            put("maxValueCol", "id");
        }
    });
    // Pre-populate the state with a key for column name (not fully-qualified)
    StateManager stateManager = runner.getStateManager();
    stateManager.setState(new HashMap<String, String>() {

        {
            put("id", "0");
        }
    }, Scope.CLUSTER);
    // Pre-populate the column type map with an entry for id (not fully-qualified)
    processor.columnTypeMap.put("id", 4);
    runner.run();
    runner.assertAllFlowFilesTransferred(REL_SUCCESS, 1);
    MockFlowFile flowFile = runner.getFlowFilesForRelationship(REL_SUCCESS).get(0);
    // Note there is no WHERE clause here. Because we are using dynamic tables, the old state key/value is not retrieved
    assertEquals("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id <= 1 ORDER BY id FETCH NEXT 10000 ROWS ONLY", new String(flowFile.toByteArray()));
    assertEquals("TEST_QUERY_DB_TABLE", flowFile.getAttribute("generatetablefetch.tableName"));
    assertEquals(null, flowFile.getAttribute("generatetablefetch.columnNames"));
    assertEquals("id <= 1", flowFile.getAttribute("generatetablefetch.whereClause"));
    assertEquals("id", flowFile.getAttribute("generatetablefetch.maxColumnNames"));
    assertEquals("10000", flowFile.getAttribute("generatetablefetch.limit"));
    assertEquals("0", flowFile.getAttribute("generatetablefetch.offset"));
    runner.clearTransferState();
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, bucket) VALUES (2, 0)");
    runner.enqueue("".getBytes(), new HashMap<String, String>() {

        {
            put("tableName", "TEST_QUERY_DB_TABLE");
            put("maxValueCol", "id");
        }
    });
    runner.run();
    runner.assertAllFlowFilesTransferred(REL_SUCCESS, 1);
    flowFile = runner.getFlowFilesForRelationship(REL_SUCCESS).get(0);
    assertEquals("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id > 1 AND id <= 2 ORDER BY id FETCH NEXT 10000 ROWS ONLY", new String(flowFile.toByteArray()));
    assertEquals("TEST_QUERY_DB_TABLE", flowFile.getAttribute("generatetablefetch.tableName"));
    assertEquals(null, flowFile.getAttribute("generatetablefetch.columnNames"));
    assertEquals("id > 1 AND id <= 2", flowFile.getAttribute("generatetablefetch.whereClause"));
    assertEquals("id", flowFile.getAttribute("generatetablefetch.maxColumnNames"));
    assertEquals("10000", flowFile.getAttribute("generatetablefetch.limit"));
    assertEquals("0", flowFile.getAttribute("generatetablefetch.offset"));
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) StateManager(org.apache.nifi.components.state.StateManager) SQLException(java.sql.SQLException) Statement(java.sql.Statement) Connection(java.sql.Connection) DBCPService(org.apache.nifi.dbcp.DBCPService) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Aggregations

StateManager (org.apache.nifi.components.state.StateManager)26 IOException (java.io.IOException)13 StateMap (org.apache.nifi.components.state.StateMap)12 HashMap (java.util.HashMap)11 SQLException (java.sql.SQLException)8 ComponentLog (org.apache.nifi.logging.ComponentLog)8 Connection (java.sql.Connection)7 Statement (java.sql.Statement)7 HashSet (java.util.HashSet)7 DBCPService (org.apache.nifi.dbcp.DBCPService)7 ProcessException (org.apache.nifi.processor.exception.ProcessException)7 Map (java.util.Map)6 ArrayList (java.util.ArrayList)5 TimeUnit (java.util.concurrent.TimeUnit)5 OnScheduled (org.apache.nifi.annotation.lifecycle.OnScheduled)5 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)5 ValidationResult (org.apache.nifi.components.ValidationResult)5 FlowFile (org.apache.nifi.flowfile.FlowFile)5 Test (org.junit.Test)5 Collections (java.util.Collections)4