Search in sources :

Example 36 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class TestSelectHiveQL method setup.

@Before
public void setup() throws InitializationException {
    final DBCPService dbcp = new DBCPServiceSimpleImpl();
    final Map<String, String> dbcpProperties = new HashMap<>();
    runner = TestRunners.newTestRunner(SelectHiveQL.class);
    runner.addControllerService("dbcp", dbcp, dbcpProperties);
    runner.enableControllerService(dbcp);
    runner.setProperty(SelectHiveQL.HIVE_DBCP_SERVICE, "dbcp");
}
Also used : HashMap(java.util.HashMap) HiveDBCPService(org.apache.nifi.dbcp.hive.HiveDBCPService) DBCPService(org.apache.nifi.dbcp.DBCPService) Before(org.junit.Before)

Example 37 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class TestSelectHiveQL method testMaxRowsPerFlowFileCSV.

@Test
public void testMaxRowsPerFlowFileCSV() throws ClassNotFoundException, SQLException, InitializationException, IOException {
    // load test data to database
    final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
    Statement stmt = con.createStatement();
    InputStream in;
    MockFlowFile mff;
    try {
        stmt.execute("drop table TEST_QUERY_DB_TABLE");
    } catch (final SQLException sqle) {
    // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
    }
    stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
    int rowCount = 0;
    // create larger row set
    for (int batch = 0; batch < 100; batch++) {
        stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
        rowCount++;
    }
    runner.setIncomingConnection(true);
    runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
    runner.setProperty(SelectHiveQL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.CSV);
    runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE", new HashMap<String, String>() {

        {
            put(MAX_ROWS_KEY, "9");
        }
    });
    runner.run();
    runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 12);
    // ensure all but the last file have 9 records (10 lines = 9 records + header) each
    for (int ff = 0; ff < 11; ff++) {
        mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(ff);
        in = new ByteArrayInputStream(mff.toByteArray());
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        assertEquals(10, br.lines().count());
        mff.assertAttributeExists("fragment.identifier");
        assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
        assertEquals("12", mff.getAttribute("fragment.count"));
    }
    // last file should have 1 record (2 lines = 1 record + header)
    mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(11);
    in = new ByteArrayInputStream(mff.toByteArray());
    BufferedReader br = new BufferedReader(new InputStreamReader(in));
    assertEquals(2, br.lines().count());
    mff.assertAttributeExists("fragment.identifier");
    assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
    assertEquals("12", mff.getAttribute("fragment.count"));
    runner.clearTransferState();
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) InputStreamReader(java.io.InputStreamReader) SQLException(java.sql.SQLException) ByteArrayInputStream(java.io.ByteArrayInputStream) Statement(java.sql.Statement) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) Connection(java.sql.Connection) HiveDBCPService(org.apache.nifi.dbcp.hive.HiveDBCPService) DBCPService(org.apache.nifi.dbcp.DBCPService) BufferedReader(java.io.BufferedReader) Test(org.junit.Test)

Example 38 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project nifi by apache.

the class ExecuteGroovyScript method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession _session) throws ProcessException {
    boolean toFailureOnError = VALID_FAIL_STRATEGY[1].equals(context.getProperty(FAIL_STRATEGY).getValue());
    // create wrapped session to control list of newly created and files got from this session.
    // so transfer original input to failure will be possible
    GroovyProcessSessionWrap session = new GroovyProcessSessionWrap(_session, toFailureOnError);
    HashMap CTL = new AccessMap("CTL");
    HashMap SQL = new AccessMap("SQL");
    try {
        // compilation must be moved to validation
        Script script = getGroovyScript();
        Map bindings = script.getBinding().getVariables();
        bindings.clear();
        // Find the user-added properties and bind them for the script
        for (Map.Entry<PropertyDescriptor, String> property : context.getProperties().entrySet()) {
            if (property.getKey().isDynamic()) {
                if (property.getKey().getName().startsWith("CTL.")) {
                    // get controller service
                    ControllerService ctl = context.getProperty(property.getKey()).asControllerService(ControllerService.class);
                    CTL.put(property.getKey().getName().substring(4), ctl);
                } else if (property.getKey().getName().startsWith("SQL.")) {
                    DBCPService dbcp = context.getProperty(property.getKey()).asControllerService(DBCPService.class);
                    SQL.put(property.getKey().getName().substring(4), dbcp);
                } else {
                    // Add the dynamic property bound to its full PropertyValue to the script engine
                    if (property.getValue() != null) {
                        bindings.put(property.getKey().getName(), context.getProperty(property.getKey()));
                    }
                }
            }
        }
        onInitSQL(SQL);
        bindings.put("session", session);
        bindings.put("context", context);
        bindings.put("log", getLogger());
        bindings.put("REL_SUCCESS", REL_SUCCESS);
        bindings.put("REL_FAILURE", REL_FAILURE);
        bindings.put("CTL", CTL);
        bindings.put("SQL", SQL);
        script.run();
        bindings.clear();
        onCommitSQL(SQL);
        session.commit();
    } catch (Throwable t) {
        getLogger().error(t.toString(), t);
        onFailSQL(SQL);
        if (toFailureOnError) {
            // transfer all received to failure with two new attributes: ERROR_MESSAGE and ERROR_STACKTRACE.
            session.revertReceivedTo(REL_FAILURE, StackTraceUtils.deepSanitize(t));
        } else {
            session.rollback(true);
        }
    } finally {
        onFinitSQL(SQL);
    }
}
Also used : Script(groovy.lang.Script) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) HashMap(java.util.HashMap) GroovyProcessSessionWrap(org.apache.nifi.processors.groovyx.flow.GroovyProcessSessionWrap) DBCPService(org.apache.nifi.dbcp.DBCPService) HashMap(java.util.HashMap) Map(java.util.Map) ControllerService(org.apache.nifi.controller.ControllerService)

Example 39 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project kylo by Teradata.

the class MergeTable method getConnection.

@Override
public Connection getConnection(ProcessContext context) {
    ThriftService thriftService = context.getProperty(THRIFT_SERVICE).asControllerService(ThriftService.class);
    DBCPService hiveConnectionPool = context.getProperty(HIVE_CONNECTION_POOL).asControllerService(DBCPService.class);
    if (hiveConnectionPool != null) {
        getLogger().info("Returning Connection from HiveConnectionPool");
        return hiveConnectionPool.getConnection();
    } else {
        getLogger().info("Returning Connection from ThriftConnectionPool");
        return thriftService.getConnection();
    }
}
Also used : ThriftService(com.thinkbiganalytics.nifi.v2.thrift.ThriftService) DBCPService(org.apache.nifi.dbcp.DBCPService)

Example 40 with DBCPService

use of org.apache.nifi.dbcp.DBCPService in project kylo by Teradata.

the class GetTableData method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = null;
    if (context.hasIncomingConnection()) {
        flowFile = session.get();
        // we know that we should run only if we have a FlowFile.
        if (flowFile == null && context.hasNonLoopConnection()) {
            return;
        }
    }
    final FlowFile incoming = flowFile;
    final ComponentLog logger = getLog();
    final DBCPService dbcpService = context.getProperty(JDBC_SERVICE).asControllerService(DBCPService.class);
    final MetadataProviderService metadataService = context.getProperty(METADATA_SERVICE).asControllerService(MetadataProviderService.class);
    final String loadStrategy = context.getProperty(LOAD_STRATEGY).getValue();
    final String categoryName = context.getProperty(FEED_CATEGORY).evaluateAttributeExpressions(incoming).getValue();
    final String feedName = context.getProperty(FEED_NAME).evaluateAttributeExpressions(incoming).getValue();
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(incoming).getValue();
    final String fieldSpecs = context.getProperty(TABLE_SPECS).evaluateAttributeExpressions(incoming).getValue();
    final String dateField = context.getProperty(DATE_FIELD).evaluateAttributeExpressions(incoming).getValue();
    final Integer queryTimeout = context.getProperty(QUERY_TIMEOUT).asTimePeriod(TimeUnit.SECONDS).intValue();
    final Integer overlapTime = context.getProperty(OVERLAP_TIME).evaluateAttributeExpressions(incoming).asTimePeriod(TimeUnit.SECONDS).intValue();
    final Integer backoffTime = context.getProperty(BACKOFF_PERIOD).asTimePeriod(TimeUnit.SECONDS).intValue();
    final String unitSize = context.getProperty(UNIT_SIZE).getValue();
    final String outputType = context.getProperty(OUTPUT_TYPE).getValue();
    String outputDelimiter = context.getProperty(OUTPUT_DELIMITER).evaluateAttributeExpressions(incoming).getValue();
    final String delimiter = StringUtils.isBlank(outputDelimiter) ? "," : outputDelimiter;
    final PropertyValue waterMarkPropName = context.getProperty(HIGH_WATER_MARK_PROP).evaluateAttributeExpressions(incoming);
    final String[] selectFields = parseFields(fieldSpecs);
    final LoadStrategy strategy = LoadStrategy.valueOf(loadStrategy);
    final StopWatch stopWatch = new StopWatch(true);
    try (final Connection conn = dbcpService.getConnection()) {
        FlowFile outgoing = (incoming == null ? session.create() : incoming);
        final AtomicLong nrOfRows = new AtomicLong(0L);
        final LastFieldVisitor visitor = new LastFieldVisitor(dateField, null);
        final FlowFile current = outgoing;
        outgoing = session.write(outgoing, new OutputStreamCallback() {

            @Override
            public void process(final OutputStream out) throws IOException {
                ResultSet rs = null;
                try {
                    GetTableDataSupport support = new GetTableDataSupport(conn, queryTimeout);
                    if (strategy == LoadStrategy.FULL_LOAD) {
                        rs = support.selectFullLoad(tableName, selectFields);
                    } else if (strategy == LoadStrategy.INCREMENTAL) {
                        String waterMarkValue = getIncrementalWaterMarkValue(current, waterMarkPropName);
                        LocalDateTime waterMarkTime = LocalDateTime.parse(waterMarkValue, DATE_TIME_FORMAT);
                        Date lastLoadDate = toDate(waterMarkTime);
                        visitor.setLastModifyDate(lastLoadDate);
                        rs = support.selectIncremental(tableName, selectFields, dateField, overlapTime, lastLoadDate, backoffTime, GetTableDataSupport.UnitSizes.valueOf(unitSize));
                    } else {
                        throw new RuntimeException("Unsupported loadStrategy [" + loadStrategy + "]");
                    }
                    if (GetTableDataSupport.OutputType.DELIMITED.equals(GetTableDataSupport.OutputType.valueOf(outputType))) {
                        nrOfRows.set(JdbcCommon.convertToDelimitedStream(rs, out, (strategy == LoadStrategy.INCREMENTAL ? visitor : null), delimiter));
                    } else if (GetTableDataSupport.OutputType.AVRO.equals(GetTableDataSupport.OutputType.valueOf(outputType))) {
                        avroSchema = JdbcCommon.createSchema(rs);
                        nrOfRows.set(JdbcCommon.convertToAvroStream(rs, out, (strategy == LoadStrategy.INCREMENTAL ? visitor : null), avroSchema));
                    } else {
                        throw new RuntimeException("Unsupported output format type [" + outputType + "]");
                    }
                } catch (final SQLException e) {
                    throw new IOException("SQL execution failure", e);
                } finally {
                    if (rs != null) {
                        try {
                            if (rs.getStatement() != null) {
                                rs.getStatement().close();
                            }
                            rs.close();
                        } catch (SQLException e) {
                            getLog().error("Error closing sql statement and resultset");
                        }
                    }
                }
            }
        });
        // set attribute how many rows were selected
        outgoing = session.putAttribute(outgoing, RESULT_ROW_COUNT, Long.toString(nrOfRows.get()));
        // set output format type and avro schema for feed setup, if available
        outgoing = session.putAttribute(outgoing, "db.table.output.format", outputType);
        String avroSchemaForFeedSetup = (avroSchema != null) ? JdbcCommon.getAvroSchemaForFeedSetup(avroSchema) : EMPTY_STRING;
        outgoing = session.putAttribute(outgoing, "db.table.avro.schema", avroSchemaForFeedSetup);
        session.getProvenanceReporter().modifyContent(outgoing, "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        // Terminate flow file if no work
        Long rowcount = nrOfRows.get();
        outgoing = session.putAttribute(outgoing, ComponentAttributes.NUM_SOURCE_RECORDS.key(), String.valueOf(rowcount));
        if (nrOfRows.get() == 0L) {
            logger.info("{} contains no data; transferring to 'nodata'", new Object[] { outgoing });
            session.transfer(outgoing, REL_NO_DATA);
        } else {
            logger.info("{} contains {} records; transferring to 'success'", new Object[] { outgoing, nrOfRows.get() });
            if (strategy == LoadStrategy.INCREMENTAL) {
                String newWaterMarkStr = format(visitor.getLastModifyDate());
                outgoing = setIncrementalWaterMarkValue(session, outgoing, waterMarkPropName, newWaterMarkStr);
                logger.info("Recorded load status feed {} date {}", new Object[] { feedName, newWaterMarkStr });
            }
            session.transfer(outgoing, REL_SUCCESS);
        }
    } catch (final Exception e) {
        if (incoming == null) {
            logger.error("Unable to execute SQL select from table due to {}. No incoming flow file to route to failure", new Object[] { e });
        } else {
            logger.error("Unable to execute SQL select from table due to {}; routing to failure", new Object[] { incoming, e });
            session.transfer(incoming, REL_FAILURE);
        }
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) SQLException(java.sql.SQLException) OutputStream(java.io.OutputStream) ResultSet(java.sql.ResultSet) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) MetadataProviderService(com.thinkbiganalytics.nifi.core.api.metadata.MetadataProviderService) FlowFile(org.apache.nifi.flowfile.FlowFile) Connection(java.sql.Connection) PropertyValue(org.apache.nifi.components.PropertyValue) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) Date(java.util.Date) ProcessException(org.apache.nifi.processor.exception.ProcessException) SQLException(java.sql.SQLException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) GetTableDataSupport(com.thinkbiganalytics.ingest.GetTableDataSupport) DBCPService(org.apache.nifi.dbcp.DBCPService) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Aggregations

DBCPService (org.apache.nifi.dbcp.DBCPService)73 Connection (java.sql.Connection)61 Statement (java.sql.Statement)57 Test (org.junit.Test)57 SQLException (java.sql.SQLException)46 MockFlowFile (org.apache.nifi.util.MockFlowFile)28 HashMap (java.util.HashMap)25 ResultSet (java.sql.ResultSet)22 HiveDBCPService (org.apache.nifi.dbcp.hive.HiveDBCPService)21 File (java.io.File)18 TestRunner (org.apache.nifi.util.TestRunner)18 Matchers.anyString (org.mockito.Matchers.anyString)14 InputStream (java.io.InputStream)13 ProcessException (org.apache.nifi.processor.exception.ProcessException)10 ByteArrayInputStream (org.fusesource.hawtbuf.ByteArrayInputStream)9 StateManager (org.apache.nifi.components.state.StateManager)7 HashSet (java.util.HashSet)6 Map (java.util.Map)6 FlowFile (org.apache.nifi.flowfile.FlowFile)6 IOException (java.io.IOException)5