Search in sources :

Example 6 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class ConvertJSONToSQL method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final boolean translateFieldNames = context.getProperty(TRANSLATE_FIELD_NAMES).asBoolean();
    final boolean ignoreUnmappedFields = IGNORE_UNMATCHED_FIELD.getValue().equalsIgnoreCase(context.getProperty(UNMATCHED_FIELD_BEHAVIOR).getValue());
    final String statementType = context.getProperty(STATEMENT_TYPE).getValue();
    final String updateKeys = context.getProperty(UPDATE_KEY).evaluateAttributeExpressions(flowFile).getValue();
    final String catalog = context.getProperty(CATALOG_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String schemaName = context.getProperty(SCHEMA_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final SchemaKey schemaKey = new SchemaKey(catalog, tableName);
    final boolean includePrimaryKeys = UPDATE_TYPE.equals(statementType) && updateKeys == null;
    // Is the unmatched column behaviour fail or warning?
    final boolean failUnmappedColumns = FAIL_UNMATCHED_COLUMN.getValue().equalsIgnoreCase(context.getProperty(UNMATCHED_COLUMN_BEHAVIOR).getValue());
    final boolean warningUnmappedColumns = WARNING_UNMATCHED_COLUMN.getValue().equalsIgnoreCase(context.getProperty(UNMATCHED_COLUMN_BEHAVIOR).getValue());
    // Escape column names?
    final boolean escapeColumnNames = context.getProperty(QUOTED_IDENTIFIERS).asBoolean();
    // Quote table name?
    final boolean quoteTableName = context.getProperty(QUOTED_TABLE_IDENTIFIER).asBoolean();
    // Attribute prefix
    final String attributePrefix = context.getProperty(SQL_PARAM_ATTR_PREFIX).evaluateAttributeExpressions(flowFile).getValue();
    // get the database schema from the cache, if one exists. We do this in a synchronized block, rather than
    // using a ConcurrentMap because the Map that we are using is a LinkedHashMap with a capacity such that if
    // the Map grows beyond this capacity, old elements are evicted. We do this in order to avoid filling the
    // Java Heap if there are a lot of different SQL statements being generated that reference different tables.
    TableSchema schema;
    synchronized (this) {
        schema = schemaCache.get(schemaKey);
        if (schema == null) {
            // No schema exists for this table yet. Query the database to determine the schema and put it into the cache.
            final DBCPService dbcpService = context.getProperty(CONNECTION_POOL).asControllerService(DBCPService.class);
            try (final Connection conn = dbcpService.getConnection()) {
                schema = TableSchema.from(conn, catalog, schemaName, tableName, translateFieldNames, includePrimaryKeys);
                schemaCache.put(schemaKey, schema);
            } catch (final SQLException e) {
                getLogger().error("Failed to convert {} into a SQL statement due to {}; routing to failure", new Object[] { flowFile, e.toString() }, e);
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }
    }
    // Parse the JSON document
    final ObjectMapper mapper = new ObjectMapper();
    final AtomicReference<JsonNode> rootNodeRef = new AtomicReference<>(null);
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final InputStream bufferedIn = new BufferedInputStream(in)) {
                    rootNodeRef.set(mapper.readTree(bufferedIn));
                }
            }
        });
    } catch (final ProcessException pe) {
        getLogger().error("Failed to parse {} as JSON due to {}; routing to failure", new Object[] { flowFile, pe.toString() }, pe);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final JsonNode rootNode = rootNodeRef.get();
    // The node may or may not be a Json Array. If it isn't, we will create an
    // ArrayNode and add just the root node to it. We do this so that we can easily iterate
    // over the array node, rather than duplicating the logic or creating another function that takes many variables
    // in order to implement the logic.
    final ArrayNode arrayNode;
    if (rootNode.isArray()) {
        arrayNode = (ArrayNode) rootNode;
    } else {
        final JsonNodeFactory nodeFactory = JsonNodeFactory.instance;
        arrayNode = new ArrayNode(nodeFactory);
        arrayNode.add(rootNode);
    }
    final String fragmentIdentifier = UUID.randomUUID().toString();
    final Set<FlowFile> created = new HashSet<>();
    for (int i = 0; i < arrayNode.size(); i++) {
        final JsonNode jsonNode = arrayNode.get(i);
        final String sql;
        final Map<String, String> attributes = new HashMap<>();
        try {
            // build the fully qualified table name
            final StringBuilder tableNameBuilder = new StringBuilder();
            if (catalog != null) {
                tableNameBuilder.append(catalog).append(".");
            }
            if (schemaName != null) {
                tableNameBuilder.append(schemaName).append(".");
            }
            tableNameBuilder.append(tableName);
            final String fqTableName = tableNameBuilder.toString();
            if (INSERT_TYPE.equals(statementType)) {
                sql = generateInsert(jsonNode, attributes, fqTableName, schema, translateFieldNames, ignoreUnmappedFields, failUnmappedColumns, warningUnmappedColumns, escapeColumnNames, quoteTableName, attributePrefix);
            } else if (UPDATE_TYPE.equals(statementType)) {
                sql = generateUpdate(jsonNode, attributes, fqTableName, updateKeys, schema, translateFieldNames, ignoreUnmappedFields, failUnmappedColumns, warningUnmappedColumns, escapeColumnNames, quoteTableName, attributePrefix);
            } else {
                sql = generateDelete(jsonNode, attributes, fqTableName, schema, translateFieldNames, ignoreUnmappedFields, failUnmappedColumns, warningUnmappedColumns, escapeColumnNames, quoteTableName, attributePrefix);
            }
        } catch (final ProcessException pe) {
            getLogger().error("Failed to convert {} to a SQL {} statement due to {}; routing to failure", new Object[] { flowFile, statementType, pe.toString() }, pe);
            session.remove(created);
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        FlowFile sqlFlowFile = session.create(flowFile);
        created.add(sqlFlowFile);
        sqlFlowFile = session.write(sqlFlowFile, new OutputStreamCallback() {

            @Override
            public void process(final OutputStream out) throws IOException {
                out.write(sql.getBytes(StandardCharsets.UTF_8));
            }
        });
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put(attributePrefix + ".table", tableName);
        attributes.put(FRAGMENT_ID.key(), fragmentIdentifier);
        attributes.put(FRAGMENT_COUNT.key(), String.valueOf(arrayNode.size()));
        attributes.put(FRAGMENT_INDEX.key(), String.valueOf(i));
        if (catalog != null) {
            attributes.put(attributePrefix + ".catalog", catalog);
        }
        sqlFlowFile = session.putAllAttributes(sqlFlowFile, attributes);
        session.transfer(sqlFlowFile, REL_SQL);
    }
    flowFile = copyAttributesToOriginal(session, flowFile, fragmentIdentifier, arrayNode.size());
    session.transfer(flowFile, REL_ORIGINAL);
}
Also used : SQLException(java.sql.SQLException) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) OutputStream(java.io.OutputStream) JsonNode(org.codehaus.jackson.JsonNode) BufferedInputStream(java.io.BufferedInputStream) ArrayNode(org.codehaus.jackson.node.ArrayNode) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) HashSet(java.util.HashSet) FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) Connection(java.sql.Connection) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) JsonNodeFactory(org.codehaus.jackson.node.JsonNodeFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 7 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class ExecuteSQL method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile fileToProcess = null;
    if (context.hasIncomingConnection()) {
        fileToProcess = session.get();
        // we know that we should run only if we have a FlowFile.
        if (fileToProcess == null && context.hasNonLoopConnection()) {
            return;
        }
    }
    final ComponentLog logger = getLogger();
    final DBCPService dbcpService = context.getProperty(DBCP_SERVICE).asControllerService(DBCPService.class);
    final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).asTimePeriod(TimeUnit.SECONDS).intValue();
    final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
    final Boolean useAvroLogicalTypes = context.getProperty(USE_AVRO_LOGICAL_TYPES).asBoolean();
    final Integer defaultPrecision = context.getProperty(DEFAULT_PRECISION).evaluateAttributeExpressions().asInteger();
    final Integer defaultScale = context.getProperty(DEFAULT_SCALE).evaluateAttributeExpressions().asInteger();
    final StopWatch stopWatch = new StopWatch(true);
    final String selectQuery;
    if (context.getProperty(SQL_SELECT_QUERY).isSet()) {
        selectQuery = context.getProperty(SQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
    } else {
        // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
        // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
        final StringBuilder queryContents = new StringBuilder();
        session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, Charset.defaultCharset())));
        selectQuery = queryContents.toString();
    }
    int resultCount = 0;
    try (final Connection con = dbcpService.getConnection();
        final PreparedStatement st = con.prepareStatement(selectQuery)) {
        // timeout in seconds
        st.setQueryTimeout(queryTimeout);
        if (fileToProcess != null) {
            JdbcCommon.setParameters(st, fileToProcess.getAttributes());
        }
        logger.debug("Executing query {}", new Object[] { selectQuery });
        boolean results = st.execute();
        while (results) {
            FlowFile resultSetFF;
            if (fileToProcess == null) {
                resultSetFF = session.create();
            } else {
                resultSetFF = session.create(fileToProcess);
                resultSetFF = session.putAllAttributes(resultSetFF, fileToProcess.getAttributes());
            }
            final AtomicLong nrOfRows = new AtomicLong(0L);
            resultSetFF = session.write(resultSetFF, out -> {
                try {
                    final ResultSet resultSet = st.getResultSet();
                    final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().convertNames(convertNamesForAvro).useLogicalTypes(useAvroLogicalTypes).defaultPrecision(defaultPrecision).defaultScale(defaultScale).build();
                    nrOfRows.set(JdbcCommon.convertToAvroStream(resultSet, out, options, null));
                } catch (final SQLException e) {
                    throw new ProcessException(e);
                }
            });
            long duration = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
            // set attribute how many rows were selected
            resultSetFF = session.putAttribute(resultSetFF, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
            resultSetFF = session.putAttribute(resultSetFF, RESULT_QUERY_DURATION, String.valueOf(duration));
            resultSetFF = session.putAttribute(resultSetFF, CoreAttributes.MIME_TYPE.key(), JdbcCommon.MIME_TYPE_AVRO_BINARY);
            logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { resultSetFF, nrOfRows.get() });
            session.getProvenanceReporter().modifyContent(resultSetFF, "Retrieved " + nrOfRows.get() + " rows", duration);
            session.transfer(resultSetFF, REL_SUCCESS);
            resultCount++;
            // are there anymore result sets?
            try {
                results = st.getMoreResults();
            } catch (SQLException ex) {
                results = false;
            }
        }
        // pass the original flow file down the line to trigger downstream processors
        if (fileToProcess != null) {
            if (resultCount > 0) {
                session.remove(fileToProcess);
            } else {
                fileToProcess = session.write(fileToProcess, JdbcCommon::createEmptyAvroStream);
                session.transfer(fileToProcess, REL_SUCCESS);
            }
        }
    } catch (final ProcessException | SQLException e) {
        // pass the original flow file down the line to trigger downstream processors
        if (fileToProcess == null) {
            // This can happen if any exceptions occur while setting up the connection, statement, etc.
            logger.error("Unable to execute SQL select query {} due to {}. No FlowFile to route to failure", new Object[] { selectQuery, e });
            context.yield();
        } else {
            if (context.hasIncomingConnection()) {
                logger.error("Unable to execute SQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, fileToProcess, e });
                fileToProcess = session.penalize(fileToProcess);
            } else {
                logger.error("Unable to execute SQL select query {} due to {}; routing to failure", new Object[] { selectQuery, e });
                context.yield();
            }
            session.transfer(fileToProcess, REL_FAILURE);
        }
    }
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) Connection(java.sql.Connection) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) USE_AVRO_LOGICAL_TYPES(org.apache.nifi.processors.standard.util.JdbcCommon.USE_AVRO_LOGICAL_TYPES) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) ComponentLog(org.apache.nifi.logging.ComponentLog) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) DEFAULT_SCALE(org.apache.nifi.processors.standard.util.JdbcCommon.DEFAULT_SCALE) HashSet(java.util.HashSet) SQLException(java.sql.SQLException) Charset(java.nio.charset.Charset) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) ResultSet(java.sql.ResultSet) DEFAULT_PRECISION(org.apache.nifi.processors.standard.util.JdbcCommon.DEFAULT_PRECISION) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ReadsAttributes(org.apache.nifi.annotation.behavior.ReadsAttributes) FlowFile(org.apache.nifi.flowfile.FlowFile) NORMALIZE_NAMES_FOR_AVRO(org.apache.nifi.processors.standard.util.JdbcCommon.NORMALIZE_NAMES_FOR_AVRO) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) PreparedStatement(java.sql.PreparedStatement) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) JdbcCommon(org.apache.nifi.processors.standard.util.JdbcCommon) StopWatch(org.apache.nifi.util.StopWatch) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) DBCPService(org.apache.nifi.dbcp.DBCPService) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) Collections(java.util.Collections) ReadsAttribute(org.apache.nifi.annotation.behavior.ReadsAttribute) FlowFile(org.apache.nifi.flowfile.FlowFile) SQLException(java.sql.SQLException) Connection(java.sql.Connection) PreparedStatement(java.sql.PreparedStatement) ComponentLog(org.apache.nifi.logging.ComponentLog) StopWatch(org.apache.nifi.util.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) DBCPService(org.apache.nifi.dbcp.DBCPService) ResultSet(java.sql.ResultSet)

Example 8 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class QueryDatabaseTableTest method testWithRuntimeException.

@Test
public void testWithRuntimeException() throws SQLException {
    // load test data to database
    final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
    Statement stmt = con.createStatement();
    try {
        stmt.execute("drop table TEST_NULL_INT");
    } catch (final SQLException sqle) {
    // Ignore, usually due to Derby not having DROP TABLE IF EXISTS
    }
    stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))");
    stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)");
    stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)");
    runner.setIncomingConnection(false);
    runner.setProperty(QueryDatabaseTable.TABLE_NAME, "TEST_NULL_INT");
    runner.setProperty(AbstractDatabaseFetchProcessor.MAX_VALUE_COLUMN_NAMES, "id");
    QueryDatabaseTable.dbAdapters.put(dbAdapter.getName(), new GenericDatabaseAdapter() {

        @Override
        public String getName() {
            throw new DataFileWriter.AppendWriteException(null);
        }
    });
    runner.run();
    assertTrue(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).isEmpty());
}
Also used : SQLException(java.sql.SQLException) GenericDatabaseAdapter(org.apache.nifi.processors.standard.db.impl.GenericDatabaseAdapter) Statement(java.sql.Statement) DataFileWriter(org.apache.avro.file.DataFileWriter) Connection(java.sql.Connection) DBCPService(org.apache.nifi.dbcp.DBCPService) Test(org.junit.Test)

Example 9 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class QueryDatabaseTableTest method testAddedRows.

@Test
public void testAddedRows() throws ClassNotFoundException, SQLException, InitializationException, IOException {
    // load test data to database
    final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
    Statement stmt = con.createStatement();
    try {
        stmt.execute("drop table TEST_QUERY_DB_TABLE");
    } catch (final SQLException sqle) {
    // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
    }
    stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (0, 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (1, 'Carrie Jones', 5.0, '2000-01-01 03:23:34.234')");
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (2, NULL, 2.0, '2010-01-01 00:00:00')");
    runner.setProperty(QueryDatabaseTable.TABLE_NAME, "TEST_QUERY_DB_TABLE");
    runner.setIncomingConnection(false);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "ID");
    runner.setProperty(QueryDatabaseTable.MAX_ROWS_PER_FLOW_FILE, "2");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 2);
    MockFlowFile flowFile = runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0);
    assertEquals("TEST_QUERY_DB_TABLE", flowFile.getAttribute(QueryDatabaseTable.RESULT_TABLENAME));
    assertEquals(flowFile.getAttribute("maxvalue.id"), "2");
    InputStream in = new ByteArrayInputStream(flowFile.toByteArray());
    runner.setProperty(QueryDatabaseTable.FETCH_SIZE, "2");
    assertEquals(2, getNumberOfRecordsFromStream(in));
    flowFile = runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(1);
    assertEquals(flowFile.getAttribute("maxvalue.id"), "2");
    in = new ByteArrayInputStream(flowFile.toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Run again, this time no flowfiles/rows should be transferred
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.clearTransferState();
    // Remove Max Rows Per Flow File
    runner.setProperty(QueryDatabaseTable.MAX_ROWS_PER_FLOW_FILE, "0");
    // Add a new row with a higher ID and run, one flowfile with one new row should be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (3, 'Mary West', 15.0, '2000-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    flowFile = runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0);
    assertEquals(flowFile.getAttribute("maxvalue.id"), "3");
    in = new ByteArrayInputStream(flowFile.toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    // Sanity check - run again, this time no flowfiles/rows should be transferred
    runner.clearTransferState();
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.clearTransferState();
    // Add timestamp as a max value column name
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "id, created_on");
    // Add a new row with a higher ID and run, one flow file will be transferred because no max value for the timestamp has been stored
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (4, 'Marty Johnson', 15.0, '2011-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    flowFile = runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0);
    assertEquals(flowFile.getAttribute("maxvalue.id"), "4");
    assertEquals(flowFile.getAttribute("maxvalue.created_on"), "2011-01-01 03:23:34.234");
    in = new ByteArrayInputStream(flowFile.toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Add a new row with a higher ID but lower timestamp and run, no flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (5, 'NO NAME', 15.0, '2001-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.clearTransferState();
    // Add a new row with a higher ID and run, one flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (6, 'Mr. NiFi', 1.0, '2012-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Set name as the max value column name (and clear the state), all rows should be returned since the max value for name has not been set
    runner.getStateManager().clear(Scope.CLUSTER);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "name");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(7, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Add a new row with a "higher" name than the max but lower than "NULL" (to test that null values are skipped), one flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (7, 'NULK', 1.0, '2012-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Set scale as the max value column name (and clear the state), all rows should be returned since the max value for name has not been set
    runner.getStateManager().clear(Scope.CLUSTER);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "scale");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(8, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Add a new row with a higher value for scale than the max, one flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (8, 'NULK', 100.0, '2012-01-01 03:23:34.234')");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Set scale as the max value column name (and clear the state), all rows should be returned since the max value for name has not been set
    runner.getStateManager().clear(Scope.CLUSTER);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "bignum");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(9, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
    // Add a new row with a higher value for scale than the max, one flow file will be transferred
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on, bignum) VALUES (9, 'Alice Bob', 100.0, '2012-01-01 03:23:34.234', 1)");
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.clearTransferState();
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) SQLException(java.sql.SQLException) ByteArrayInputStream(org.fusesource.hawtbuf.ByteArrayInputStream) Statement(java.sql.Statement) ByteArrayInputStream(org.fusesource.hawtbuf.ByteArrayInputStream) InputStream(java.io.InputStream) Connection(java.sql.Connection) DBCPService(org.apache.nifi.dbcp.DBCPService) Test(org.junit.Test)

Example 10 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class QueryDatabaseTableTest method testInitialMaxValue.

@Test
public void testInitialMaxValue() throws ClassNotFoundException, SQLException, InitializationException, IOException {
    // load test data to database
    final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
    Statement stmt = con.createStatement();
    InputStream in;
    try {
        stmt.execute("drop table TEST_QUERY_DB_TABLE");
    } catch (final SQLException sqle) {
    // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
    }
    stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
    Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
    cal.setTimeInMillis(0);
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
    int rowCount = 0;
    // create larger row set
    for (int batch = 0; batch < 10; batch++) {
        stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '" + dateFormat.format(cal.getTime().getTime()) + "')");
        rowCount++;
        cal.add(Calendar.MINUTE, 1);
    }
    runner.setProperty(QueryDatabaseTable.TABLE_NAME, "${" + TABLE_NAME_KEY + "}");
    runner.setVariable(TABLE_NAME_KEY, "TEST_QUERY_DB_TABLE");
    runner.setIncomingConnection(false);
    runner.setProperty(QueryDatabaseTable.MAX_VALUE_COLUMN_NAMES, "created_on");
    cal.setTimeInMillis(0);
    cal.add(Calendar.MINUTE, 5);
    runner.setProperty("initial.maxvalue.CREATED_ON", dateFormat.format(cal.getTime().getTime()));
    // Initial run with no previous state. Should get only last 4 records
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(4, getNumberOfRecordsFromStream(in));
    runner.getStateManager().assertStateEquals("test_query_db_table" + AbstractDatabaseFetchProcessor.NAMESPACE_DELIMITER + "created_on", "1970-01-01 00:09:00.0", Scope.CLUSTER);
    runner.clearTransferState();
    // Run again, this time no flowfiles/rows should be transferred
    // Validate Max Value doesn't change also
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 0);
    runner.getStateManager().assertStateEquals("test_query_db_table" + AbstractDatabaseFetchProcessor.NAMESPACE_DELIMITER + "created_on", "1970-01-01 00:09:00.0", Scope.CLUSTER);
    runner.clearTransferState();
    // Append a new row, expect 1 flowfile one row
    cal.setTimeInMillis(0);
    cal.add(Calendar.MINUTE, rowCount);
    stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '" + dateFormat.format(cal.getTime().getTime()) + "')");
    rowCount++;
    runner.run();
    runner.assertAllFlowFilesTransferred(QueryDatabaseTable.REL_SUCCESS, 1);
    in = new ByteArrayInputStream(runner.getFlowFilesForRelationship(QueryDatabaseTable.REL_SUCCESS).get(0).toByteArray());
    assertEquals(1, getNumberOfRecordsFromStream(in));
    runner.getStateManager().assertStateEquals("test_query_db_table" + AbstractDatabaseFetchProcessor.NAMESPACE_DELIMITER + "created_on", "1970-01-01 00:10:00.0", Scope.CLUSTER);
    runner.clearTransferState();
}
Also used : SQLException(java.sql.SQLException) ByteArrayInputStream(org.fusesource.hawtbuf.ByteArrayInputStream) Statement(java.sql.Statement) ByteArrayInputStream(org.fusesource.hawtbuf.ByteArrayInputStream) InputStream(java.io.InputStream) Calendar(java.util.Calendar) Connection(java.sql.Connection) DBCPService(org.apache.nifi.dbcp.DBCPService) SimpleDateFormat(java.text.SimpleDateFormat) Test(org.junit.Test)

Aggregations

DBCPService (org.apache.nifi.dbcp.DBCPService)73 Connection (java.sql.Connection)61 Statement (java.sql.Statement)57 Test (org.junit.Test)57 SQLException (java.sql.SQLException)46 MockFlowFile (org.apache.nifi.util.MockFlowFile)28 HashMap (java.util.HashMap)25 ResultSet (java.sql.ResultSet)22 HiveDBCPService (org.apache.nifi.dbcp.hive.HiveDBCPService)21 File (java.io.File)18 TestRunner (org.apache.nifi.util.TestRunner)18 Matchers.anyString (org.mockito.Matchers.anyString)14 InputStream (java.io.InputStream)13 ProcessException (org.apache.nifi.processor.exception.ProcessException)10 ByteArrayInputStream (org.fusesource.hawtbuf.ByteArrayInputStream)9 StateManager (org.apache.nifi.components.state.StateManager)7 HashSet (java.util.HashSet)6 Map (java.util.Map)6 FlowFile (org.apache.nifi.flowfile.FlowFile)6 IOException (java.io.IOException)5