Search in sources :

Example 1 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class HBaseConnectorITCase method testTableSourceSinkWithDDL.

@Test
public void testTableSourceSinkWithDDL() throws Exception {
    StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(execEnv, streamSettings);
    // register HBase table testTable1 which contains test data
    String table1DDL = createHBaseTableDDL(TEST_TABLE_1, true);
    tEnv.executeSql(table1DDL);
    // register HBase table which is empty
    String table3DDL = createHBaseTableDDL(TEST_TABLE_3, true);
    tEnv.executeSql(table3DDL);
    String insertStatement = "INSERT INTO " + TEST_TABLE_3 + " SELECT rowkey," + " family1," + " family2," + " family3," + " family4" + " from " + TEST_TABLE_1;
    TableResult tableResult = tEnv.executeSql(insertStatement);
    // wait to finish
    tableResult.await();
    assertEquals("Expected INSERT rowKind", RowKind.INSERT, tableResult.collect().next().getKind());
    // start a batch scan job to verify contents in HBase table
    TableEnvironment batchEnv = TableEnvironment.create(batchSettings);
    batchEnv.executeSql(table3DDL);
    List<String> expected = new ArrayList<>();
    expected.add("+I[1, 10, Hello-1, 100, 1.01, false, Welt-1, 2019-08-18T19:00, 2019-08-18, 19:00, 12345678.0001]");
    expected.add("+I[2, 20, Hello-2, 200, 2.02, true, Welt-2, 2019-08-18T19:01, 2019-08-18, 19:01, 12345678.0002]");
    expected.add("+I[3, 30, Hello-3, 300, 3.03, false, Welt-3, 2019-08-18T19:02, 2019-08-18, 19:02, 12345678.0003]");
    expected.add("+I[4, 40, null, 400, 4.04, true, Welt-4, 2019-08-18T19:03, 2019-08-18, 19:03, 12345678.0004]");
    expected.add("+I[5, 50, Hello-5, 500, 5.05, false, Welt-5, 2019-08-19T19:10, 2019-08-19, 19:10, 12345678.0005]");
    expected.add("+I[6, 60, Hello-6, 600, 6.06, true, Welt-6, 2019-08-19T19:20, 2019-08-19, 19:20, 12345678.0006]");
    expected.add("+I[7, 70, Hello-7, 700, 7.07, false, Welt-7, 2019-08-19T19:30, 2019-08-19, 19:30, 12345678.0007]");
    expected.add("+I[8, 80, null, 800, 8.08, true, Welt-8, 2019-08-19T19:40, 2019-08-19, 19:40, 12345678.0008]");
    Table countTable = batchEnv.sqlQuery("SELECT COUNT(h.rowkey) FROM " + TEST_TABLE_3 + " AS h");
    assertEquals(new Long(expected.size()), countTable.execute().collect().next().getField(0));
    String query = "SELECT " + "  h.rowkey, " + "  h.family1.col1, " + "  h.family2.col1, " + "  h.family2.col2, " + "  h.family3.col1, " + "  h.family3.col2, " + "  h.family3.col3, " + "  h.family4.col1, " + "  h.family4.col2, " + "  h.family4.col3, " + "  h.family4.col4 " + " FROM " + TEST_TABLE_3 + " AS h";
    TableResult tableResult3 = batchEnv.executeSql(query);
    List<String> result = Lists.newArrayList(tableResult3.collect()).stream().map(Row::toString).sorted().collect(Collectors.toList());
    assertEquals(expected, result);
}
Also used : TableResult(org.apache.flink.table.api.TableResult) Table(org.apache.flink.table.api.Table) ArrayList(java.util.ArrayList) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) TableEnvironment(org.apache.flink.table.api.TableEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 2 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class HiveTableSourceITCase method testNonPartitionStreamingSource.

private void testNonPartitionStreamingSource(Boolean useMapredReader, String tblName) throws Exception {
    final String catalogName = "hive";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env, SqlDialect.HIVE);
    tEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER, useMapredReader);
    tEnv.registerCatalog(catalogName, hiveCatalog);
    tEnv.useCatalog(catalogName);
    tEnv.executeSql("CREATE TABLE source_db." + tblName + " (" + "  a INT," + "  b CHAR(1) " + ") stored as parquet TBLPROPERTIES (" + "  'streaming-source.enable'='true'," + "  'streaming-source.partition-order'='create-time'," + "  'streaming-source.monitor-interval'='100ms'" + ")");
    TableResult result = tEnv.executeSql("select * from hive.source_db." + tblName);
    CloseableIterator<Row> iter = result.collect();
    for (int i = 1; i < 3; i++) {
        try {
            Thread.sleep(1_000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        batchTableEnv.executeSql("insert into table source_db." + tblName + " values (1,'a'), (2,'b')").await();
        Assert.assertEquals(Arrays.asList(Row.of(1, "a").toString(), Row.of(2, "b").toString()), fetchRows(iter, 2));
    }
    result.getJobClient().get().cancel();
}
Also used : TableResult(org.apache.flink.table.api.TableResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row)

Example 3 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class HiveTableSourceITCase method testStreamPartitionReadByCreateTime.

@Test(timeout = 120000)
public void testStreamPartitionReadByCreateTime() throws Exception {
    final String catalogName = "hive";
    final String dbName = "source_db";
    final String tblName = "stream_create_time_test";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(100);
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env, SqlDialect.HIVE);
    tEnv.registerCatalog(catalogName, hiveCatalog);
    tEnv.useCatalog(catalogName);
    tEnv.executeSql("CREATE TABLE source_db.stream_create_time_test (x int, y string, z int)" + " PARTITIONED BY (" + " p1 string, p2 string, p3 string) TBLPROPERTIES(" + "'streaming-source.enable'='true'," + "'streaming-source.partition-include'='all'," + "'streaming-source.consume-order'='create-time'," + "'streaming-source.monitor-interval'='1s'," + "'streaming-source.consume-start-offset'='2020-10-02 00:00:00'" + ")");
    // the create-time is near current timestamp and bigger than '2020-10-02 00:00:00' since the
    // code wrote
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { 0, "a", 11 }).commit("p1='A1',p2='B1',p3='C1'");
    TableResult result = tEnv.executeSql("select * from hive.source_db.stream_create_time_test");
    CloseableIterator<Row> iter = result.collect();
    Assert.assertEquals(Row.of(0, "a", "11", "A1", "B1", "C1").toString(), fetchRows(iter, 1).get(0));
    for (int i = 1; i < 6; i++) {
        try {
            Thread.sleep(1_000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { i, "new_add", 11 + i }).addRow(new Object[] { i, "new_add_1", 11 + i }).commit("p1='A',p2='B',p3='" + i + "'");
        Assert.assertEquals(Arrays.asList(Row.of(i, "new_add", 11 + i, "A", "B", i).toString(), Row.of(i, "new_add_1", 11 + i, "A", "B", i).toString()), fetchRows(iter, 2));
    }
    result.getJobClient().get().cancel();
}
Also used : TableResult(org.apache.flink.table.api.TableResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 4 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class HiveTableSourceITCase method testStreamReadWithProjectPushDown.

@Test(timeout = 120000)
public void testStreamReadWithProjectPushDown() throws Exception {
    final String catalogName = "hive";
    final String dbName = "source_db";
    final String tblName = "stream_project_pushdown_test";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(100);
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env, SqlDialect.HIVE);
    tEnv.registerCatalog(catalogName, hiveCatalog);
    tEnv.useCatalog(catalogName);
    tEnv.executeSql("CREATE TABLE source_db.stream_project_pushdown_test (x int, y string, z int)" + " PARTITIONED BY (" + " pt_year int, pt_mon string, pt_day string) TBLPROPERTIES(" + "'streaming-source.enable'='true'," + "'streaming-source.monitor-interval'='1s'," + "'streaming-source.consume-start-offset'='pt_year=2019/pt_month=09/pt_day=02'" + ")");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { 0, "a", 11 }).commit("pt_year='2019',pt_mon='09',pt_day='01'");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { 1, "b", 12 }).commit("pt_year='2020',pt_mon='09',pt_day='03'");
    TableResult result = tEnv.executeSql("select x, y from hive.source_db.stream_project_pushdown_test where pt_year = '2020'");
    CloseableIterator<Row> iter = result.collect();
    Assert.assertEquals(Row.of(1, "b").toString(), fetchRows(iter, 1).get(0));
    for (int i = 2; i < 6; i++) {
        try {
            Thread.sleep(1_000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { i, "new_add", 11 + i }).addRow(new Object[] { i, "new_add_1", 11 + i }).commit("pt_year='2020',pt_mon='10',pt_day='0" + i + "'");
        Assert.assertEquals(Arrays.asList(Row.of(i, "new_add").toString(), Row.of(i, "new_add_1").toString()), fetchRows(iter, 2));
    }
    result.getJobClient().get().cancel();
}
Also used : TableResult(org.apache.flink.table.api.TableResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 5 with TableResult

use of org.apache.flink.table.api.TableResult in project flink by apache.

the class HiveTableSourceITCase method testStreamPartitionReadByPartitionTime.

@Test(timeout = 120000)
public void testStreamPartitionReadByPartitionTime() throws Exception {
    final String catalogName = "hive";
    final String dbName = "source_db";
    final String tblName = "stream_test";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(100);
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env, SqlDialect.HIVE);
    tEnv.registerCatalog(catalogName, hiveCatalog);
    tEnv.useCatalog(catalogName);
    tEnv.executeSql("CREATE TABLE source_db.stream_test (" + " a INT," + " b STRING" + ") PARTITIONED BY (ts STRING) TBLPROPERTIES (" + "'streaming-source.enable'='true'," + "'streaming-source.monitor-interval'='1s'," + "'streaming-source.consume-order'='partition-time'" + ")");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { 0, "0" }).commit("ts='2020-05-06 00:00:00'");
    TableResult result = tEnv.executeSql("select * from hive.source_db.stream_test");
    CloseableIterator<Row> iter = result.collect();
    Assert.assertEquals(Row.of(0, "0", "2020-05-06 00:00:00").toString(), fetchRows(iter, 1).get(0));
    for (int i = 1; i < 6; i++) {
        try {
            Thread.sleep(1_000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { i, String.valueOf(i) }).addRow(new Object[] { i, i + "_copy" }).commit("ts='2020-05-06 00:" + i + "0:00'");
        Assert.assertEquals(Arrays.asList(Row.of(i, String.valueOf(i), "2020-05-06 00:" + i + "0:00").toString(), Row.of(i, i + "_copy", "2020-05-06 00:" + i + "0:00").toString()), fetchRows(iter, 2));
    }
    result.getJobClient().get().cancel();
}
Also used : TableResult(org.apache.flink.table.api.TableResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Aggregations

TableResult (org.apache.flink.table.api.TableResult)39 Test (org.junit.Test)26 Row (org.apache.flink.types.Row)20 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)15 ArrayList (java.util.ArrayList)7 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)7 Table (org.apache.flink.table.api.Table)7 TableEnvironment (org.apache.flink.table.api.TableEnvironment)7 JobClient (org.apache.flink.core.execution.JobClient)4 Configuration (org.apache.flink.configuration.Configuration)3 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)2 TableDescriptor (org.apache.flink.table.api.TableDescriptor)2 TableEnvironmentInternal (org.apache.flink.table.api.internal.TableEnvironmentInternal)2 CsvTableSink (org.apache.flink.table.sinks.CsvTableSink)2 DataType (org.apache.flink.table.types.DataType)2 File (java.io.File)1 IOException (java.io.IOException)1 BigDecimal (java.math.BigDecimal)1 Timestamp (java.sql.Timestamp)1 Random (java.util.Random)1