Search in sources :

Example 1 with DatabaseAdapter

use of org.apache.nifi.processors.standard.db.DatabaseAdapter in project nifi by apache.

the class QueryDatabaseTable method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    // Fetch the column/table info once
    if (!setupComplete.get()) {
        super.setup(context);
    }
    ProcessSession session = sessionFactory.createSession();
    final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue();
    final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions().getValue();
    final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions().getValue();
    final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions().getValue();
    final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions().asInteger();
    final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions().asInteger();
    final Integer outputBatchSizeField = context.getProperty(OUTPUT_BATCH_SIZE).evaluateAttributeExpressions().asInteger();
    final int outputBatchSize = outputBatchSizeField == null ? 0 : outputBatchSizeField;
    final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions().asInteger() : 0;
    final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().recordName(tableName).maxRows(maxRowsPerFlowFile).convertNames(context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean()).useLogicalTypes(context.getProperty(USE_AVRO_LOGICAL_TYPES).asBoolean()).defaultPrecision(context.getProperty(DEFAULT_PRECISION).evaluateAttributeExpressions().asInteger()).defaultScale(context.getProperty(DEFAULT_SCALE).evaluateAttributeExpressions().asInteger()).build();
    final StateManager stateManager = context.getStateManager();
    final StateMap stateMap;
    try {
        stateMap = stateManager.getState(Scope.CLUSTER);
    } catch (final IOException ioe) {
        getLogger().error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
        context.yield();
        return;
    }
    // Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
    // set as the current state map (after the session has been committed)
    final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
    // If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
    for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
        String maxPropKey = maxProp.getKey().toLowerCase();
        String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
        if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
            String newMaxPropValue;
            // but store the new initial max value under the fully-qualified key.
            if (statePropertyMap.containsKey(maxPropKey)) {
                newMaxPropValue = statePropertyMap.get(maxPropKey);
            } else {
                newMaxPropValue = maxProp.getValue();
            }
            statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
        }
    }
    List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? null : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
    final String selectQuery = getQuery(dbAdapter, tableName, columnNames, maxValueColumnNameList, customWhereClause, statePropertyMap);
    final StopWatch stopWatch = new StopWatch(true);
    final String fragmentIdentifier = UUID.randomUUID().toString();
    try (final Connection con = dbcpService.getConnection();
        final Statement st = con.createStatement()) {
        if (fetchSize != null && fetchSize > 0) {
            try {
                st.setFetchSize(fetchSize);
            } catch (SQLException se) {
                // Not all drivers support this, just log the error (at debug level) and move on
                logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
            }
        }
        String jdbcURL = "DBCPService";
        try {
            DatabaseMetaData databaseMetaData = con.getMetaData();
            if (databaseMetaData != null) {
                jdbcURL = databaseMetaData.getURL();
            }
        } catch (SQLException se) {
        // Ignore and use default JDBC URL. This shouldn't happen unless the driver doesn't implement getMetaData() properly
        }
        final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions().asTimePeriod(TimeUnit.SECONDS).intValue();
        // timeout in seconds
        st.setQueryTimeout(queryTimeout);
        try {
            logger.debug("Executing query {}", new Object[] { selectQuery });
            final ResultSet resultSet = st.executeQuery(selectQuery);
            int fragmentIndex = 0;
            while (true) {
                final AtomicLong nrOfRows = new AtomicLong(0L);
                FlowFile fileToProcess = session.create();
                try {
                    fileToProcess = session.write(fileToProcess, out -> {
                        // Max values will be updated in the state property map by the callback
                        final MaxValueResultSetRowCollector maxValCollector = new MaxValueResultSetRowCollector(tableName, statePropertyMap, dbAdapter);
                        try {
                            nrOfRows.set(JdbcCommon.convertToAvroStream(resultSet, out, options, maxValCollector));
                        } catch (SQLException | RuntimeException e) {
                            throw new ProcessException("Error during database query or conversion of records to Avro.", e);
                        }
                    });
                } catch (ProcessException e) {
                    // Add flowfile to results before rethrowing so it will be removed from session in outer catch
                    resultSetFlowFiles.add(fileToProcess);
                    throw e;
                }
                if (nrOfRows.get() > 0) {
                    // set attribute how many rows were selected
                    fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
                    fileToProcess = session.putAttribute(fileToProcess, RESULT_TABLENAME, tableName);
                    fileToProcess = session.putAttribute(fileToProcess, CoreAttributes.MIME_TYPE.key(), JdbcCommon.MIME_TYPE_AVRO_BINARY);
                    if (maxRowsPerFlowFile > 0) {
                        fileToProcess = session.putAttribute(fileToProcess, "fragment.identifier", fragmentIdentifier);
                        fileToProcess = session.putAttribute(fileToProcess, "fragment.index", String.valueOf(fragmentIndex));
                    }
                    logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() });
                    session.getProvenanceReporter().receive(fileToProcess, jdbcURL, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    resultSetFlowFiles.add(fileToProcess);
                    // If we've reached the batch size, send out the flow files
                    if (outputBatchSize > 0 && resultSetFlowFiles.size() >= outputBatchSize) {
                        session.transfer(resultSetFlowFiles, REL_SUCCESS);
                        session.commit();
                        resultSetFlowFiles.clear();
                    }
                } else {
                    // If there were no rows returned, don't send the flowfile
                    session.remove(fileToProcess);
                    context.yield();
                    break;
                }
                fragmentIndex++;
                if (maxFragments > 0 && fragmentIndex >= maxFragments) {
                    break;
                }
            }
            // Even though the maximum value and total count are known at this point, to maintain consistent behavior if Output Batch Size is set, do not store the attributes
            if (outputBatchSize == 0) {
                for (int i = 0; i < resultSetFlowFiles.size(); i++) {
                    // Add maximum values as attributes
                    for (Map.Entry<String, String> entry : statePropertyMap.entrySet()) {
                        // Get just the column name from the key
                        String key = entry.getKey();
                        String colName = key.substring(key.lastIndexOf(NAMESPACE_DELIMITER) + NAMESPACE_DELIMITER.length());
                        resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "maxvalue." + colName, entry.getValue()));
                    }
                    // set count on all FlowFiles
                    if (maxRowsPerFlowFile > 0) {
                        resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
                    }
                }
            }
        } catch (final SQLException e) {
            throw e;
        }
        session.transfer(resultSetFlowFiles, REL_SUCCESS);
    } catch (final ProcessException | SQLException e) {
        logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
        if (!resultSetFlowFiles.isEmpty()) {
            session.remove(resultSetFlowFiles);
        }
        context.yield();
    } finally {
        session.commit();
        try {
            // Update the state
            stateManager.setState(statePropertyMap, Scope.CLUSTER);
        } catch (IOException ioe) {
            getLogger().error("{} failed to update State Manager, maximum observed values will not be recorded", new Object[] { this, ioe });
        }
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) StandardValidators(org.apache.nifi.processor.util.StandardValidators) Arrays(java.util.Arrays) Connection(java.sql.Connection) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) ResultSet(java.sql.ResultSet) Map(java.util.Map) ParseException(java.text.ParseException) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) FlowFile(org.apache.nifi.flowfile.FlowFile) NORMALIZE_NAMES_FOR_AVRO(org.apache.nifi.processors.standard.util.JdbcCommon.NORMALIZE_NAMES_FOR_AVRO) Set(java.util.Set) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) UUID(java.util.UUID) StateMap(org.apache.nifi.components.state.StateMap) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) List(java.util.List) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) JdbcCommon(org.apache.nifi.processors.standard.util.JdbcCommon) StopWatch(org.apache.nifi.util.StopWatch) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) ResultSetMetaData(java.sql.ResultSetMetaData) IntStream(java.util.stream.IntStream) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) USE_AVRO_LOGICAL_TYPES(org.apache.nifi.processors.standard.util.JdbcCommon.USE_AVRO_LOGICAL_TYPES) DatabaseMetaData(java.sql.DatabaseMetaData) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) DEFAULT_SCALE(org.apache.nifi.processors.standard.util.JdbcCommon.DEFAULT_SCALE) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) Relationship(org.apache.nifi.processor.Relationship) DEFAULT_PRECISION(org.apache.nifi.processors.standard.util.JdbcCommon.DEFAULT_PRECISION) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) StateManager(org.apache.nifi.components.state.StateManager) ProcessContext(org.apache.nifi.processor.ProcessContext) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) Statement(java.sql.Statement) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) OnStopped(org.apache.nifi.annotation.lifecycle.OnStopped) Collections(java.util.Collections) HashMap(java.util.HashMap) SQLException(java.sql.SQLException) StateMap(org.apache.nifi.components.state.StateMap) ArrayList(java.util.ArrayList) StateManager(org.apache.nifi.components.state.StateManager) ResultSet(java.sql.ResultSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Statement(java.sql.Statement) Connection(java.sql.Connection) IOException(java.io.IOException) DatabaseMetaData(java.sql.DatabaseMetaData) ComponentLog(org.apache.nifi.logging.ComponentLog) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) StopWatch(org.apache.nifi.util.StopWatch) JdbcCommon(org.apache.nifi.processors.standard.util.JdbcCommon) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap) HashMap(java.util.HashMap)

Example 2 with DatabaseAdapter

use of org.apache.nifi.processors.standard.db.DatabaseAdapter in project nifi by apache.

the class AbstractDatabaseFetchProcessor method setup.

public void setup(final ProcessContext context, boolean shouldCleanCache, FlowFile flowFile) {
    synchronized (setupComplete) {
        setupComplete.set(false);
        final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions(flowFile).getValue();
        // If there are no max-value column names specified, we don't need to perform this processing
        if (StringUtils.isEmpty(maxValueColumnNames)) {
            setupComplete.set(true);
            return;
        }
        // Try to fill the columnTypeMap with the types of the desired max-value columns
        final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
        final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
        try (final Connection con = dbcpService.getConnection();
            final Statement st = con.createStatement()) {
            // Try a query that returns no rows, for the purposes of getting metadata about the columns. It is possible
            // to use DatabaseMetaData.getColumns(), but not all drivers support this, notably the schema-on-read
            // approach as in Apache Drill
            String query = dbAdapter.getSelectStatement(tableName, maxValueColumnNames, "1 = 0", null, null, null);
            ResultSet resultSet = st.executeQuery(query);
            ResultSetMetaData resultSetMetaData = resultSet.getMetaData();
            int numCols = resultSetMetaData.getColumnCount();
            if (numCols > 0) {
                if (shouldCleanCache) {
                    columnTypeMap.clear();
                }
                for (int i = 1; i <= numCols; i++) {
                    String colName = resultSetMetaData.getColumnName(i).toLowerCase();
                    String colKey = getStateKey(tableName, colName);
                    int colType = resultSetMetaData.getColumnType(i);
                    columnTypeMap.putIfAbsent(colKey, colType);
                }
            } else {
                throw new ProcessException("No columns found in table from those specified: " + maxValueColumnNames);
            }
        } catch (SQLException e) {
            throw new ProcessException("Unable to communicate with database in order to determine column types", e);
        }
        setupComplete.set(true);
    }
}
Also used : ResultSetMetaData(java.sql.ResultSetMetaData) ProcessException(org.apache.nifi.processor.exception.ProcessException) SQLException(java.sql.SQLException) Statement(java.sql.Statement) DBCPService(org.apache.nifi.dbcp.DBCPService) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter)

Example 3 with DatabaseAdapter

use of org.apache.nifi.processors.standard.db.DatabaseAdapter in project nifi by apache.

the class GenerateTableFetch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    // Fetch the column/table info once (if the table name and max value columns are not dynamic). Otherwise do the setup later
    if (!isDynamicTableName && !isDynamicMaxValues && !setupComplete.get()) {
        super.setup(context);
    }
    ProcessSession session = sessionFactory.createSession();
    FlowFile fileToProcess = null;
    if (context.hasIncomingConnection()) {
        fileToProcess = session.get();
        if (fileToProcess == null) {
            // Incoming connection with no flow file available, do no work (see capability description)
            return;
        }
    }
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(fileToProcess).getValue();
    final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
    final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions(fileToProcess).getValue();
    final int partitionSize = context.getProperty(PARTITION_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
    final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions(fileToProcess).getValue();
    final StateManager stateManager = context.getStateManager();
    final StateMap stateMap;
    FlowFile finalFileToProcess = fileToProcess;
    try {
        stateMap = stateManager.getState(Scope.CLUSTER);
    } catch (final IOException ioe) {
        logger.error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
        context.yield();
        return;
    }
    try {
        // Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
        // set as the current state map (after the session has been committed)
        final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
        // If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
        for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
            String maxPropKey = maxProp.getKey().toLowerCase();
            String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
            if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
                String newMaxPropValue;
                // but store the new initial max value under the fully-qualified key.
                if (statePropertyMap.containsKey(maxPropKey)) {
                    newMaxPropValue = statePropertyMap.get(maxPropKey);
                } else {
                    newMaxPropValue = maxProp.getValue();
                }
                statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
            }
        }
        // Build a WHERE clause with maximum-value columns (if they exist), and a list of column names that will contain MAX(<column>) aliases. The
        // executed SQL query will retrieve the count of all records after the filter(s) have been applied, as well as the new maximum values for the
        // specified columns. This allows the processor to generate the correctly partitioned SQL statements as well as to update the state with the
        // latest observed maximum values.
        String whereClause = null;
        List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? new ArrayList<>(0) : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
        List<String> maxValueClauses = new ArrayList<>(maxValueColumnNameList.size());
        String columnsClause = null;
        List<String> maxValueSelectColumns = new ArrayList<>(maxValueColumnNameList.size() + 1);
        maxValueSelectColumns.add("COUNT(*)");
        // For each maximum-value column, get a WHERE filter and a MAX(column) alias
        IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
            String colName = maxValueColumnNameList.get(index);
            maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
            String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
            if (!StringUtils.isEmpty(maxValue)) {
                if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
                    // This means column type cache is clean after instance reboot. We should re-cache column type
                    super.setup(context, false, finalFileToProcess);
                }
                Integer type = getColumnType(tableName, colName);
                // Add a condition for the WHERE clause
                maxValueClauses.add(colName + (index == 0 ? " > " : " >= ") + getLiteralByType(type, maxValue, dbAdapter.getName()));
            }
        });
        if (customWhereClause != null) {
            // adding the custom WHERE clause (if defined) to the list of existing clauses.
            maxValueClauses.add("(" + customWhereClause + ")");
        }
        whereClause = StringUtils.join(maxValueClauses, " AND ");
        columnsClause = StringUtils.join(maxValueSelectColumns, ", ");
        // Build a SELECT query with maximum-value columns (if present)
        final String selectQuery = dbAdapter.getSelectStatement(tableName, columnsClause, whereClause, null, null, null);
        long rowCount = 0;
        try (final Connection con = dbcpService.getConnection();
            final Statement st = con.createStatement()) {
            final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asTimePeriod(TimeUnit.SECONDS).intValue();
            // timeout in seconds
            st.setQueryTimeout(queryTimeout);
            logger.debug("Executing {}", new Object[] { selectQuery });
            ResultSet resultSet;
            resultSet = st.executeQuery(selectQuery);
            if (resultSet.next()) {
                // Total row count is in the first column
                rowCount = resultSet.getLong(1);
                // Update the state map with the newly-observed maximum values
                ResultSetMetaData rsmd = resultSet.getMetaData();
                for (int i = 2; i <= rsmd.getColumnCount(); i++) {
                    // Some JDBC drivers consider the columns name and label to be very different things.
                    // Since this column has been aliased lets check the label first,
                    // if there is no label we'll use the column name.
                    String resultColumnName = (StringUtils.isNotEmpty(rsmd.getColumnLabel(i)) ? rsmd.getColumnLabel(i) : rsmd.getColumnName(i)).toLowerCase();
                    String fullyQualifiedStateKey = getStateKey(tableName, resultColumnName);
                    String resultColumnCurrentMax = statePropertyMap.get(fullyQualifiedStateKey);
                    if (StringUtils.isEmpty(resultColumnCurrentMax) && !isDynamicTableName) {
                        // If we can't find the value at the fully-qualified key name and the table name is static, it is possible (under a previous scheme)
                        // the value has been stored under a key that is only the column name. Fall back to check the column name; either way, when a new
                        // maximum value is observed, it will be stored under the fully-qualified key from then on.
                        resultColumnCurrentMax = statePropertyMap.get(resultColumnName);
                    }
                    int type = rsmd.getColumnType(i);
                    if (isDynamicTableName) {
                        // We haven't pre-populated the column type map if the table name is dynamic, so do it here
                        columnTypeMap.put(fullyQualifiedStateKey, type);
                    }
                    try {
                        String newMaxValue = getMaxValueFromRow(resultSet, i, type, resultColumnCurrentMax, dbAdapter.getName());
                        if (newMaxValue != null) {
                            statePropertyMap.put(fullyQualifiedStateKey, newMaxValue);
                        }
                    } catch (ParseException | IOException pie) {
                        // Fail the whole thing here before we start creating flow files and such
                        throw new ProcessException(pie);
                    }
                }
            } else {
                // Something is very wrong here, one row (even if count is zero) should be returned
                throw new SQLException("No rows returned from metadata query: " + selectQuery);
            }
            // for each maximum-value column get a right bounding WHERE condition
            IntStream.range(0, maxValueColumnNameList.size()).forEach((index) -> {
                String colName = maxValueColumnNameList.get(index);
                maxValueSelectColumns.add("MAX(" + colName + ") " + colName);
                String maxValue = getColumnStateMaxValue(tableName, statePropertyMap, colName);
                if (!StringUtils.isEmpty(maxValue)) {
                    if (columnTypeMap.isEmpty() || getColumnType(tableName, colName) == null) {
                        // This means column type cache is clean after instance reboot. We should re-cache column type
                        super.setup(context, false, finalFileToProcess);
                    }
                    Integer type = getColumnType(tableName, colName);
                    // Add a condition for the WHERE clause
                    maxValueClauses.add(colName + " <= " + getLiteralByType(type, maxValue, dbAdapter.getName()));
                }
            });
            // Update WHERE list to include new right hand boundaries
            whereClause = StringUtils.join(maxValueClauses, " AND ");
            final long numberOfFetches = (partitionSize == 0) ? 1 : (rowCount / partitionSize) + (rowCount % partitionSize == 0 ? 0 : 1);
            // Generate SQL statements to read "pages" of data
            for (long i = 0; i < numberOfFetches; i++) {
                Long limit = partitionSize == 0 ? null : (long) partitionSize;
                Long offset = partitionSize == 0 ? null : i * partitionSize;
                final String maxColumnNames = StringUtils.join(maxValueColumnNameList, ", ");
                final String query = dbAdapter.getSelectStatement(tableName, columnNames, whereClause, maxColumnNames, limit, offset);
                FlowFile sqlFlowFile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
                sqlFlowFile = session.write(sqlFlowFile, out -> out.write(query.getBytes()));
                sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.tableName", tableName);
                if (columnNames != null) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.columnNames", columnNames);
                }
                if (StringUtils.isNotBlank(whereClause)) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.whereClause", whereClause);
                }
                if (StringUtils.isNotBlank(maxColumnNames)) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.maxColumnNames", maxColumnNames);
                }
                sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.limit", String.valueOf(limit));
                if (partitionSize != 0) {
                    sqlFlowFile = session.putAttribute(sqlFlowFile, "generatetablefetch.offset", String.valueOf(offset));
                }
                session.transfer(sqlFlowFile, REL_SUCCESS);
            }
            if (fileToProcess != null) {
                session.remove(fileToProcess);
            }
        } catch (SQLException e) {
            if (fileToProcess != null) {
                logger.error("Unable to execute SQL select query {} due to {}, routing {} to failure", new Object[] { selectQuery, e, fileToProcess });
                fileToProcess = session.putAttribute(fileToProcess, "generatetablefetch.sql.error", e.getMessage());
                session.transfer(fileToProcess, REL_FAILURE);
            } else {
                logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
                throw new ProcessException(e);
            }
        }
        session.commit();
        try {
            // Update the state
            stateManager.setState(statePropertyMap, Scope.CLUSTER);
        } catch (IOException ioe) {
            logger.error("{} failed to update State Manager, observed maximum values will not be recorded. " + "Also, any generated SQL statements may be duplicated.", new Object[] { this, ioe });
        }
    } catch (final ProcessException pe) {
        // Log the cause of the ProcessException if it is available
        Throwable t = (pe.getCause() == null ? pe : pe.getCause());
        logger.error("Error during processing: {}", new Object[] { t.getMessage() }, t);
        session.rollback();
        context.yield();
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) StandardValidators(org.apache.nifi.processor.util.StandardValidators) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Connection(java.sql.Connection) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) Relationship(org.apache.nifi.processor.Relationship) ResultSet(java.sql.ResultSet) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ParseException(java.text.ParseException) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) ValidationResult(org.apache.nifi.components.ValidationResult) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) FlowFile(org.apache.nifi.flowfile.FlowFile) StateManager(org.apache.nifi.components.state.StateManager) Collection(java.util.Collection) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) StateMap(org.apache.nifi.components.state.StateMap) TimeUnit(java.util.concurrent.TimeUnit) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) List(java.util.List) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) Statement(java.sql.Statement) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) Collections(java.util.Collections) ResultSetMetaData(java.sql.ResultSetMetaData) HashMap(java.util.HashMap) SQLException(java.sql.SQLException) StateMap(org.apache.nifi.components.state.StateMap) ArrayList(java.util.ArrayList) ResultSetMetaData(java.sql.ResultSetMetaData) StateManager(org.apache.nifi.components.state.StateManager) ResultSet(java.sql.ResultSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Statement(java.sql.Statement) Connection(java.sql.Connection) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) ParseException(java.text.ParseException) HashMap(java.util.HashMap) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap)

Aggregations

Connection (java.sql.Connection)3 ResultSet (java.sql.ResultSet)3 ResultSetMetaData (java.sql.ResultSetMetaData)3 SQLException (java.sql.SQLException)3 Statement (java.sql.Statement)3 DBCPService (org.apache.nifi.dbcp.DBCPService)3 IOException (java.io.IOException)2 ParseException (java.text.ParseException)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 TimeUnit (java.util.concurrent.TimeUnit)2 IntStream (java.util.stream.IntStream)2 StringUtils (org.apache.commons.lang3.StringUtils)2 DynamicProperty (org.apache.nifi.annotation.behavior.DynamicProperty)2