Search in sources :

Example 1 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project kylo by Teradata.

the class GetTableDataTest method setUp.

/**
 * Initialize instance variables
 */
@Before
public void setUp() throws Exception {
    // Setup services
    final DBCPService jdbcService = new MockDBCPService();
    final MetadataProviderService metadataService = new MockMetadataService();
    // Setup test runner
    runner.addControllerService(JDBC_SERVICE_IDENTIFIER, jdbcService);
    runner.addControllerService(METADATA_SERVICE_IDENTIFIER, metadataService);
    runner.enableControllerService(jdbcService);
    runner.enableControllerService(metadataService);
    runner.setProperty(GetTableData.JDBC_SERVICE, JDBC_SERVICE_IDENTIFIER);
    runner.setProperty(CommonProperties.METADATA_SERVICE, METADATA_SERVICE_IDENTIFIER);
    runner.setProperty(GetTableData.TABLE_NAME, "mytable");
    runner.setProperty(GetTableData.TABLE_SPECS, "id\nfirst_name\nlast_name\nemail  \n last_updated  \n\n");
}
Also used : DBCPService(org.apache.nifi.dbcp.DBCPService) MetadataProviderService(com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService) Before(org.junit.Before)

Example 2 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class QueryDatabaseTable method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSessionFactory sessionFactory) throws ProcessException {
    // Fetch the column/table info once
    if (!setupComplete.get()) {
        super.setup(context);
    }
    ProcessSession session = sessionFactory.createSession();
    final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions().getValue();
    final String columnNames = context.getProperty(COLUMN_NAMES).evaluateAttributeExpressions().getValue();
    final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions().getValue();
    final String customWhereClause = context.getProperty(WHERE_CLAUSE).evaluateAttributeExpressions().getValue();
    final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions().asInteger();
    final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions().asInteger();
    final Integer outputBatchSizeField = context.getProperty(OUTPUT_BATCH_SIZE).evaluateAttributeExpressions().asInteger();
    final int outputBatchSize = outputBatchSizeField == null ? 0 : outputBatchSizeField;
    final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions().asInteger() : 0;
    final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().recordName(tableName).maxRows(maxRowsPerFlowFile).convertNames(context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean()).useLogicalTypes(context.getProperty(USE_AVRO_LOGICAL_TYPES).asBoolean()).defaultPrecision(context.getProperty(DEFAULT_PRECISION).evaluateAttributeExpressions().asInteger()).defaultScale(context.getProperty(DEFAULT_SCALE).evaluateAttributeExpressions().asInteger()).build();
    final StateManager stateManager = context.getStateManager();
    final StateMap stateMap;
    try {
        stateMap = stateManager.getState(Scope.CLUSTER);
    } catch (final IOException ioe) {
        getLogger().error("Failed to retrieve observed maximum values from the State Manager. Will not perform " + "query until this is accomplished.", ioe);
        context.yield();
        return;
    }
    // Make a mutable copy of the current state property map. This will be updated by the result row callback, and eventually
    // set as the current state map (after the session has been committed)
    final Map<String, String> statePropertyMap = new HashMap<>(stateMap.toMap());
    // If an initial max value for column(s) has been specified using properties, and this column is not in the state manager, sync them to the state property map
    for (final Map.Entry<String, String> maxProp : maxValueProperties.entrySet()) {
        String maxPropKey = maxProp.getKey().toLowerCase();
        String fullyQualifiedMaxPropKey = getStateKey(tableName, maxPropKey);
        if (!statePropertyMap.containsKey(fullyQualifiedMaxPropKey)) {
            String newMaxPropValue;
            // but store the new initial max value under the fully-qualified key.
            if (statePropertyMap.containsKey(maxPropKey)) {
                newMaxPropValue = statePropertyMap.get(maxPropKey);
            } else {
                newMaxPropValue = maxProp.getValue();
            }
            statePropertyMap.put(fullyQualifiedMaxPropKey, newMaxPropValue);
        }
    }
    List<String> maxValueColumnNameList = StringUtils.isEmpty(maxValueColumnNames) ? null : Arrays.asList(maxValueColumnNames.split("\\s*,\\s*"));
    final String selectQuery = getQuery(dbAdapter, tableName, columnNames, maxValueColumnNameList, customWhereClause, statePropertyMap);
    final StopWatch stopWatch = new StopWatch(true);
    final String fragmentIdentifier = UUID.randomUUID().toString();
    try (final Connection con = dbcpService.getConnection();
        final Statement st = con.createStatement()) {
        if (fetchSize != null && fetchSize > 0) {
            try {
                st.setFetchSize(fetchSize);
            } catch (SQLException se) {
                // Not all drivers support this, just log the error (at debug level) and move on
                logger.debug("Cannot set fetch size to {} due to {}", new Object[] { fetchSize, se.getLocalizedMessage() }, se);
            }
        }
        String jdbcURL = "DBCPService";
        try {
            DatabaseMetaData databaseMetaData = con.getMetaData();
            if (databaseMetaData != null) {
                jdbcURL = databaseMetaData.getURL();
            }
        } catch (SQLException se) {
        // Ignore and use default JDBC URL. This shouldn't happen unless the driver doesn't implement getMetaData() properly
        }
        final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions().asTimePeriod(TimeUnit.SECONDS).intValue();
        // timeout in seconds
        st.setQueryTimeout(queryTimeout);
        try {
            logger.debug("Executing query {}", new Object[] { selectQuery });
            final ResultSet resultSet = st.executeQuery(selectQuery);
            int fragmentIndex = 0;
            while (true) {
                final AtomicLong nrOfRows = new AtomicLong(0L);
                FlowFile fileToProcess = session.create();
                try {
                    fileToProcess = session.write(fileToProcess, out -> {
                        // Max values will be updated in the state property map by the callback
                        final MaxValueResultSetRowCollector maxValCollector = new MaxValueResultSetRowCollector(tableName, statePropertyMap, dbAdapter);
                        try {
                            nrOfRows.set(JdbcCommon.convertToAvroStream(resultSet, out, options, maxValCollector));
                        } catch (SQLException | RuntimeException e) {
                            throw new ProcessException("Error during database query or conversion of records to Avro.", e);
                        }
                    });
                } catch (ProcessException e) {
                    // Add flowfile to results before rethrowing so it will be removed from session in outer catch
                    resultSetFlowFiles.add(fileToProcess);
                    throw e;
                }
                if (nrOfRows.get() > 0) {
                    // set attribute how many rows were selected
                    fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
                    fileToProcess = session.putAttribute(fileToProcess, RESULT_TABLENAME, tableName);
                    fileToProcess = session.putAttribute(fileToProcess, CoreAttributes.MIME_TYPE.key(), JdbcCommon.MIME_TYPE_AVRO_BINARY);
                    if (maxRowsPerFlowFile > 0) {
                        fileToProcess = session.putAttribute(fileToProcess, "fragment.identifier", fragmentIdentifier);
                        fileToProcess = session.putAttribute(fileToProcess, "fragment.index", String.valueOf(fragmentIndex));
                    }
                    logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() });
                    session.getProvenanceReporter().receive(fileToProcess, jdbcURL, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
                    resultSetFlowFiles.add(fileToProcess);
                    // If we've reached the batch size, send out the flow files
                    if (outputBatchSize > 0 && resultSetFlowFiles.size() >= outputBatchSize) {
                        session.transfer(resultSetFlowFiles, REL_SUCCESS);
                        session.commit();
                        resultSetFlowFiles.clear();
                    }
                } else {
                    // If there were no rows returned, don't send the flowfile
                    session.remove(fileToProcess);
                    context.yield();
                    break;
                }
                fragmentIndex++;
                if (maxFragments > 0 && fragmentIndex >= maxFragments) {
                    break;
                }
            }
            // Even though the maximum value and total count are known at this point, to maintain consistent behavior if Output Batch Size is set, do not store the attributes
            if (outputBatchSize == 0) {
                for (int i = 0; i < resultSetFlowFiles.size(); i++) {
                    // Add maximum values as attributes
                    for (Map.Entry<String, String> entry : statePropertyMap.entrySet()) {
                        // Get just the column name from the key
                        String key = entry.getKey();
                        String colName = key.substring(key.lastIndexOf(NAMESPACE_DELIMITER) + NAMESPACE_DELIMITER.length());
                        resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "maxvalue." + colName, entry.getValue()));
                    }
                    // set count on all FlowFiles
                    if (maxRowsPerFlowFile > 0) {
                        resultSetFlowFiles.set(i, session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
                    }
                }
            }
        } catch (final SQLException e) {
            throw e;
        }
        session.transfer(resultSetFlowFiles, REL_SUCCESS);
    } catch (final ProcessException | SQLException e) {
        logger.error("Unable to execute SQL select query {} due to {}", new Object[] { selectQuery, e });
        if (!resultSetFlowFiles.isEmpty()) {
            session.remove(resultSetFlowFiles);
        }
        context.yield();
    } finally {
        session.commit();
        try {
            // Update the state
            stateManager.setState(statePropertyMap, Scope.CLUSTER);
        } catch (IOException ioe) {
            getLogger().error("{} failed to update State Manager, maximum observed values will not be recorded", new Object[] { this, ioe });
        }
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) StandardValidators(org.apache.nifi.processor.util.StandardValidators) Arrays(java.util.Arrays) Connection(java.sql.Connection) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) ResultSet(java.sql.ResultSet) Map(java.util.Map) ParseException(java.text.ParseException) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) FlowFile(org.apache.nifi.flowfile.FlowFile) NORMALIZE_NAMES_FOR_AVRO(org.apache.nifi.processors.standard.util.JdbcCommon.NORMALIZE_NAMES_FOR_AVRO) Set(java.util.Set) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) UUID(java.util.UUID) StateMap(org.apache.nifi.components.state.StateMap) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) List(java.util.List) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) JdbcCommon(org.apache.nifi.processors.standard.util.JdbcCommon) StopWatch(org.apache.nifi.util.StopWatch) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) ResultSetMetaData(java.sql.ResultSetMetaData) IntStream(java.util.stream.IntStream) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) USE_AVRO_LOGICAL_TYPES(org.apache.nifi.processors.standard.util.JdbcCommon.USE_AVRO_LOGICAL_TYPES) DatabaseMetaData(java.sql.DatabaseMetaData) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) DEFAULT_SCALE(org.apache.nifi.processors.standard.util.JdbcCommon.DEFAULT_SCALE) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) Relationship(org.apache.nifi.processor.Relationship) DEFAULT_PRECISION(org.apache.nifi.processors.standard.util.JdbcCommon.DEFAULT_PRECISION) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) StateManager(org.apache.nifi.components.state.StateManager) ProcessContext(org.apache.nifi.processor.ProcessContext) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) ProcessSessionFactory(org.apache.nifi.processor.ProcessSessionFactory) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) Statement(java.sql.Statement) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) OnStopped(org.apache.nifi.annotation.lifecycle.OnStopped) Collections(java.util.Collections) HashMap(java.util.HashMap) SQLException(java.sql.SQLException) StateMap(org.apache.nifi.components.state.StateMap) ArrayList(java.util.ArrayList) StateManager(org.apache.nifi.components.state.StateManager) ResultSet(java.sql.ResultSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Statement(java.sql.Statement) Connection(java.sql.Connection) IOException(java.io.IOException) DatabaseMetaData(java.sql.DatabaseMetaData) ComponentLog(org.apache.nifi.logging.ComponentLog) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter) StopWatch(org.apache.nifi.util.StopWatch) JdbcCommon(org.apache.nifi.processors.standard.util.JdbcCommon) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap) HashMap(java.util.HashMap)

Example 3 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class ListDatabaseTables method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final String catalog = context.getProperty(CATALOG).getValue();
    final String schemaPattern = context.getProperty(SCHEMA_PATTERN).getValue();
    final String tableNamePattern = context.getProperty(TABLE_NAME_PATTERN).getValue();
    final String[] tableTypes = context.getProperty(TABLE_TYPES).isSet() ? context.getProperty(TABLE_TYPES).getValue().split("\\s*,\\s*") : null;
    final boolean includeCount = context.getProperty(INCLUDE_COUNT).asBoolean();
    final long refreshInterval = context.getProperty(REFRESH_INTERVAL).asTimePeriod(TimeUnit.MILLISECONDS);
    final StateManager stateManager = context.getStateManager();
    final StateMap stateMap;
    final Map<String, String> stateMapProperties;
    try {
        stateMap = stateManager.getState(Scope.CLUSTER);
        stateMapProperties = new HashMap<>(stateMap.toMap());
    } catch (IOException ioe) {
        throw new ProcessException(ioe);
    }
    try (final Connection con = dbcpService.getConnection()) {
        DatabaseMetaData dbMetaData = con.getMetaData();
        ResultSet rs = dbMetaData.getTables(catalog, schemaPattern, tableNamePattern, tableTypes);
        while (rs.next()) {
            final String tableCatalog = rs.getString(1);
            final String tableSchema = rs.getString(2);
            final String tableName = rs.getString(3);
            final String tableType = rs.getString(4);
            final String tableRemarks = rs.getString(5);
            // Build fully-qualified name
            String fqn = Stream.of(tableCatalog, tableSchema, tableName).filter(segment -> !StringUtils.isEmpty(segment)).collect(Collectors.joining("."));
            String lastTimestampForTable = stateMapProperties.get(fqn);
            boolean refreshTable = true;
            try {
                // Refresh state if the interval has elapsed
                long lastRefreshed = -1;
                final long currentTime = System.currentTimeMillis();
                if (!StringUtils.isEmpty(lastTimestampForTable)) {
                    lastRefreshed = Long.parseLong(lastTimestampForTable);
                }
                if (lastRefreshed == -1 || (refreshInterval > 0 && currentTime >= (lastRefreshed + refreshInterval))) {
                    stateMapProperties.remove(lastTimestampForTable);
                } else {
                    refreshTable = false;
                }
            } catch (final NumberFormatException nfe) {
                getLogger().error("Failed to retrieve observed last table fetches from the State Manager. Will not perform " + "query until this is accomplished.", nfe);
                context.yield();
                return;
            }
            if (refreshTable) {
                FlowFile flowFile = session.create();
                logger.info("Found {}: {}", new Object[] { tableType, fqn });
                if (includeCount) {
                    try (Statement st = con.createStatement()) {
                        final String countQuery = "SELECT COUNT(1) FROM " + fqn;
                        logger.debug("Executing query: {}", new Object[] { countQuery });
                        ResultSet countResult = st.executeQuery(countQuery);
                        if (countResult.next()) {
                            flowFile = session.putAttribute(flowFile, DB_TABLE_COUNT, Long.toString(countResult.getLong(1)));
                        }
                    } catch (SQLException se) {
                        logger.error("Couldn't get row count for {}", new Object[] { fqn });
                        session.remove(flowFile);
                        continue;
                    }
                }
                if (tableCatalog != null) {
                    flowFile = session.putAttribute(flowFile, DB_TABLE_CATALOG, tableCatalog);
                }
                if (tableSchema != null) {
                    flowFile = session.putAttribute(flowFile, DB_TABLE_SCHEMA, tableSchema);
                }
                flowFile = session.putAttribute(flowFile, DB_TABLE_NAME, tableName);
                flowFile = session.putAttribute(flowFile, DB_TABLE_FULLNAME, fqn);
                flowFile = session.putAttribute(flowFile, DB_TABLE_TYPE, tableType);
                if (tableRemarks != null) {
                    flowFile = session.putAttribute(flowFile, DB_TABLE_REMARKS, tableRemarks);
                }
                String transitUri;
                try {
                    transitUri = dbMetaData.getURL();
                } catch (SQLException sqle) {
                    transitUri = "<unknown>";
                }
                session.getProvenanceReporter().receive(flowFile, transitUri);
                session.transfer(flowFile, REL_SUCCESS);
                stateMapProperties.put(fqn, Long.toString(System.currentTimeMillis()));
            }
        }
        // Update the timestamps for listed tables
        if (stateMap.getVersion() == -1) {
            stateManager.setState(stateMapProperties, Scope.CLUSTER);
        } else {
            stateManager.replace(stateMap, stateMapProperties, Scope.CLUSTER);
        }
    } catch (final SQLException | IOException e) {
        throw new ProcessException(e);
    }
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) Connection(java.sql.Connection) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) DatabaseMetaData(java.sql.DatabaseMetaData) HashMap(java.util.HashMap) ComponentLog(org.apache.nifi.logging.ComponentLog) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Scope(org.apache.nifi.components.state.Scope) Relationship(org.apache.nifi.processor.Relationship) ResultSet(java.sql.ResultSet) Map(java.util.Map) TriggerSerially(org.apache.nifi.annotation.behavior.TriggerSerially) Validator(org.apache.nifi.components.Validator) FlowFile(org.apache.nifi.flowfile.FlowFile) StateManager(org.apache.nifi.components.state.StateManager) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) StringUtils(org.apache.nifi.util.StringUtils) Collectors(java.util.stream.Collectors) StateMap(org.apache.nifi.components.state.StateMap) TimeUnit(java.util.concurrent.TimeUnit) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stateful(org.apache.nifi.annotation.behavior.Stateful) List(java.util.List) Stream(java.util.stream.Stream) Statement(java.sql.Statement) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) Collections(java.util.Collections) FlowFile(org.apache.nifi.flowfile.FlowFile) SQLException(java.sql.SQLException) Statement(java.sql.Statement) StateMap(org.apache.nifi.components.state.StateMap) Connection(java.sql.Connection) IOException(java.io.IOException) DatabaseMetaData(java.sql.DatabaseMetaData) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) StateManager(org.apache.nifi.components.state.StateManager) DBCPService(org.apache.nifi.dbcp.DBCPService) ResultSet(java.sql.ResultSet)

Example 4 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class QueryDatabaseTableTest method testAddedRowsCustomWhereClause.

@Test
public void testAddedRowsCustomWhereClause() throws ClassNotFoundException, SQLException, InitializationException, IOException {
    // load test data to database
    final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
    Statement stmt = con.createStatement();
    try {
        stmt.execute("drop table TEST_QUERY_DB_TABLE");
    } catch (final SQLException sqle) {
    // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
    }
    stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, type varchar(20), name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (0, 'male', 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (1, 'female', 'Carrie Jones', 5.0, '2000-01-01 03:23:34.234')");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (2, NULL, NULL, 2.0, '2010-01-01 00:00:00')");
    runner.setProperty(QueryDatabaseTable.TABLE_NAME, "TEST_QUERY_DB_TABLE");
    runner.setProperty(QueryDatabaseTable.WHERE_CLAUSE, "type = 'male'");
    runner.setIncomingConnection(false);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "ID");
    runner.setProperty(QueryDatabaseTable.MAX_ROWS_PER_FLOW_FILE, "2");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    MockFlowFile flowFile = runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0);
    assertEquals("TEST_QUERY_DB_TABLE", flowFile.getAttribute(QueryDatabaseTable.RESULT_TABLENAME));
    assertEquals(flowFile.getAttribute("maxvalue.id"), "0");
    InputStream in = new ByteArrayInputStream(flowFile.toByteArray());
    runner.setProperty(QueryDatabaseTable.FETCH_SIZE, "2");
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Run again, this time no flowfiles/rows should be transferred
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.clearTransferState();
    // Remove Max Rows Per Flow File
    runner.setProperty(QueryDatabaseTable.MAX_ROWS_PER_FLOW_FILE, "0");
    // Add a new row with a higher ID and run, one flowfile with one new row should be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (3, 'female', 'Mary West', 15.0, '2000-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.clearTransferState();
    // Sanity check - run again, this time no flowfiles/rows should be transferred
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.clearTransferState();
    // Add timestamp as a max value column name
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "id, created_on");
    // Add a new row with a higher ID and run, one flow file will be transferred because no max value for the timestamp has been stored
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (4, 'male', 'Marty Johnson', 15.0, '2011-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    flowFile = runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0);
    assertEquals(flowFile.getAttribute("maxvalue.id"), "4");
    assertEquals(flowFile.getAttribute("maxvalue.created_on"), "2011-01-01 03:23:34.234");
    in = new ByteArrayInputStream(flowFile.toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Add a new row with a higher ID but lower timestamp and run, no flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (5, 'male', 'NO NAME', 15.0, '2001-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.clearTransferState();
    // Add a new row with a higher ID and run, one flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (6, 'male', 'Mr. NiFi', 1.0, '2012-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Set name as the max value column name (and clear the state), all rows should be returned since the max value for name has not been set
    runner.getStateManager().clear(Scope.CLUSTER);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "name");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(4, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Add a new row with a "higher" name than the max but lower than "NULL" (to test that null values are skipped), one flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (7, 'male', 'NULK', 1.0, '2012-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Set scale as the max value column name (and clear the state), all rows should be returned since the max value for name has not been set
    runner.getStateManager().clear(Scope.CLUSTER);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "scale");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(5, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Add a new row with a higher value for scale than the max, one flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on) VALUES (8, 'male', 'NULK', 100.0, '2012-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Set scale as the max value column name (and clear the state), all rows should be returned since the max value for name has not been set
    runner.getStateManager().clear(Scope.CLUSTER);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "bignum");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(6, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Add a new row with a higher value for scale than the max, one flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, type, name, scale, created_on, bignum) VALUES (9, 'female', 'Alice Bob', 100.0, '2012-01-01 03:23:34.234', 1)");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.clearTransferState();
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) SQLException(java.sql.SQLException) ByteArrayInputStream(org.fusesource.hawtbuf.ByteArrayInputStream) Statement(java.sql.Statement) ByteArrayInputStream(org.fusesource.hawtbuf.ByteArrayInputStream) InputStream(java.io.InputStream) Connection(java.sql.Connection) DBCPService(org.apache.nifi.dbcp.DBCPService) Test(org.junit.Test)

Example 5 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class AbstractDatabaseFetchProcessor method setup.

public void setup(final ProcessContext context, boolean shouldCleanCache, FlowFile flowFile) {
    synchronized (setupComplete) {
        setupComplete.set(false);
        final String maxValueColumnNames = context.getProperty(MAX_VALUE_COLUMN_NAMES).evaluateAttributeExpressions(flowFile).getValue();
        // If there are no max-value column names specified, we don't need to perform this processing
        if (StringUtils.isEmpty(maxValueColumnNames)) {
            setupComplete.set(true);
            return;
        }
        // Try to fill the columnTypeMap with the types of the desired max-value columns
        final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
        final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
        final DatabaseAdapter dbAdapter = dbAdapters.get(context.getProperty(DB_TYPE).getValue());
        try (final Connection con = dbcpService.getConnection();
            final Statement st = con.createStatement()) {
            // Try a query that returns no rows, for the purposes of getting metadata about the columns. It is possible
            // to use DatabaseMetaData.getColumns(), but not all drivers support this, notably the schema-on-read
            // approach as in Apache Drill
            String query = dbAdapter.getSelectStatement(tableName, maxValueColumnNames, "1 = 0", null, null, null);
            ResultSet resultSet = st.executeQuery(query);
            ResultSetMetaData resultSetMetaData = resultSet.getMetaData();
            int numCols = resultSetMetaData.getColumnCount();
            if (numCols > 0) {
                if (shouldCleanCache) {
                    columnTypeMap.clear();
                }
                for (int i = 1; i <= numCols; i++) {
                    String colName = resultSetMetaData.getColumnName(i).toLowerCase();
                    String colKey = getStateKey(tableName, colName);
                    int colType = resultSetMetaData.getColumnType(i);
                    columnTypeMap.putIfAbsent(colKey, colType);
                }
            } else {
                throw new ProcessException("No columns found in table from those specified: " + maxValueColumnNames);
            }
        } catch (SQLException e) {
            throw new ProcessException("Unable to communicate with database in order to determine column types", e);
        }
        setupComplete.set(true);
    }
}
Also used : ResultSetMetaData(java.sql.ResultSetMetaData) ProcessException(org.apache.nifi.processor.exception.ProcessException) SQLException(java.sql.SQLException) Statement(java.sql.Statement) DBCPService(org.apache.nifi.dbcp.DBCPService) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) DatabaseAdapter(org.apache.nifi.processors.standard.db.DatabaseAdapter)

Aggregations

DBCPService (org.apache.nifi.dbcp.DBCPService)73 Connection (java.sql.Connection)61 Statement (java.sql.Statement)57 Test (org.junit.Test)57 SQLException (java.sql.SQLException)46 MockFlowFile (org.apache.nifi.util.MockFlowFile)28 HashMap (java.util.HashMap)25 ResultSet (java.sql.ResultSet)22 HiveDBCPService (org.apache.nifi.dbcp.hive.HiveDBCPService)21 File (java.io.File)18 TestRunner (org.apache.nifi.util.TestRunner)18 Matchers.anyString (org.mockito.Matchers.anyString)14 InputStream (java.io.InputStream)13 ProcessException (org.apache.nifi.processor.exception.ProcessException)10 ByteArrayInputStream (org.fusesource.hawtbuf.ByteArrayInputStream)9 StateManager (org.apache.nifi.components.state.StateManager)7 HashSet (java.util.HashSet)6 Map (java.util.Map)6 FlowFile (org.apache.nifi.flowfile.FlowFile)6 IOException (java.io.IOException)5