Search in sources :

Example 6 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class AvroTypesITCase method testAvroToRow.

@Test
public void testAvroToRow() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    DataStream<User> ds = testData(env);
    Table t = tEnv.fromDataStream(ds, selectFields(ds));
    Table result = t.select($("*"));
    List<Row> results = CollectionUtil.iteratorToList(DataStreamUtils.collect(tEnv.toAppendStream(result, Row.class)));
    // TODO we should get an Avro record here instead of a nested row.
    // This should be fixed with FLIP-136
    String expected = "+I[black, null, Whatever, [true], [hello], true, java.nio.HeapByteBuffer[pos=0 lim=10 cap=10], " + "2014-03-01, java.nio.HeapByteBuffer[pos=0 lim=2 cap=2], [7, -48], 0.0, GREEN, " + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 42, {}, null, null, null, 00:00:00.123456, " + "12:12:12, 1970-01-01T00:00:00.123456Z, 2014-03-01T12:12:12.321Z, null]\n" + "+I[blue, null, Charlie, [], [], false, java.nio.HeapByteBuffer[pos=0 lim=10 cap=10], 2014-03-01, " + "java.nio.HeapByteBuffer[pos=0 lim=2 cap=2], [7, -48], 1.337, RED, null, 1337, {}, " + "+I[Berlin, 42, Berlin, Bakerstreet, 12049], null, null, 00:00:00.123456, 12:12:12, 1970-01-01T00:00:00.123456Z, " + "2014-03-01T12:12:12.321Z, null]\n" + "+I[yellow, null, Terminator, [false], [world], false, " + "java.nio.HeapByteBuffer[pos=0 lim=10 cap=10], 2014-03-01, " + "java.nio.HeapByteBuffer[pos=0 lim=2 cap=2], [7, -48], 0.0, GREEN, " + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 1, {}, null, null, null, 00:00:00.123456, " + "12:12:12, 1970-01-01T00:00:00.123456Z, 2014-03-01T12:12:12.321Z, null]";
    TestBaseUtils.compareResultAsText(results, expected);
}
Also used : User(org.apache.flink.formats.avro.generated.User) Table(org.apache.flink.table.api.Table) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 7 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testNonPartitionStreamingSource.

private void testNonPartitionStreamingSource(Boolean useMapredReader, String tblName) throws Exception {
    final String catalogName = "hive";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env, SqlDialect.HIVE);
    tEnv.getConfig().getConfiguration().setBoolean(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER, useMapredReader);
    tEnv.registerCatalog(catalogName, hiveCatalog);
    tEnv.useCatalog(catalogName);
    tEnv.executeSql("CREATE TABLE source_db." + tblName + " (" + "  a INT," + "  b CHAR(1) " + ") stored as parquet TBLPROPERTIES (" + "  'streaming-source.enable'='true'," + "  'streaming-source.partition-order'='create-time'," + "  'streaming-source.monitor-interval'='100ms'" + ")");
    TableResult result = tEnv.executeSql("select * from hive.source_db." + tblName);
    CloseableIterator<Row> iter = result.collect();
    for (int i = 1; i < 3; i++) {
        try {
            Thread.sleep(1_000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        batchTableEnv.executeSql("insert into table source_db." + tblName + " values (1,'a'), (2,'b')").await();
        Assert.assertEquals(Arrays.asList(Row.of(1, "a").toString(), Row.of(2, "b").toString()), fetchRows(iter, 2));
    }
    result.getJobClient().get().cancel();
}
Also used : TableResult(org.apache.flink.table.api.TableResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row)

Example 8 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testStreamPartitionReadByCreateTime.

@Test(timeout = 120000)
public void testStreamPartitionReadByCreateTime() throws Exception {
    final String catalogName = "hive";
    final String dbName = "source_db";
    final String tblName = "stream_create_time_test";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(100);
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env, SqlDialect.HIVE);
    tEnv.registerCatalog(catalogName, hiveCatalog);
    tEnv.useCatalog(catalogName);
    tEnv.executeSql("CREATE TABLE source_db.stream_create_time_test (x int, y string, z int)" + " PARTITIONED BY (" + " p1 string, p2 string, p3 string) TBLPROPERTIES(" + "'streaming-source.enable'='true'," + "'streaming-source.partition-include'='all'," + "'streaming-source.consume-order'='create-time'," + "'streaming-source.monitor-interval'='1s'," + "'streaming-source.consume-start-offset'='2020-10-02 00:00:00'" + ")");
    // the create-time is near current timestamp and bigger than '2020-10-02 00:00:00' since the
    // code wrote
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { 0, "a", 11 }).commit("p1='A1',p2='B1',p3='C1'");
    TableResult result = tEnv.executeSql("select * from hive.source_db.stream_create_time_test");
    CloseableIterator<Row> iter = result.collect();
    Assert.assertEquals(Row.of(0, "a", "11", "A1", "B1", "C1").toString(), fetchRows(iter, 1).get(0));
    for (int i = 1; i < 6; i++) {
        try {
            Thread.sleep(1_000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { i, "new_add", 11 + i }).addRow(new Object[] { i, "new_add_1", 11 + i }).commit("p1='A',p2='B',p3='" + i + "'");
        Assert.assertEquals(Arrays.asList(Row.of(i, "new_add", 11 + i, "A", "B", i).toString(), Row.of(i, "new_add_1", 11 + i, "A", "B", i).toString()), fetchRows(iter, 2));
    }
    result.getJobClient().get().cancel();
}
Also used : TableResult(org.apache.flink.table.api.TableResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 9 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testStreamReadWithProjectPushDown.

@Test(timeout = 120000)
public void testStreamReadWithProjectPushDown() throws Exception {
    final String catalogName = "hive";
    final String dbName = "source_db";
    final String tblName = "stream_project_pushdown_test";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(100);
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env, SqlDialect.HIVE);
    tEnv.registerCatalog(catalogName, hiveCatalog);
    tEnv.useCatalog(catalogName);
    tEnv.executeSql("CREATE TABLE source_db.stream_project_pushdown_test (x int, y string, z int)" + " PARTITIONED BY (" + " pt_year int, pt_mon string, pt_day string) TBLPROPERTIES(" + "'streaming-source.enable'='true'," + "'streaming-source.monitor-interval'='1s'," + "'streaming-source.consume-start-offset'='pt_year=2019/pt_month=09/pt_day=02'" + ")");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { 0, "a", 11 }).commit("pt_year='2019',pt_mon='09',pt_day='01'");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { 1, "b", 12 }).commit("pt_year='2020',pt_mon='09',pt_day='03'");
    TableResult result = tEnv.executeSql("select x, y from hive.source_db.stream_project_pushdown_test where pt_year = '2020'");
    CloseableIterator<Row> iter = result.collect();
    Assert.assertEquals(Row.of(1, "b").toString(), fetchRows(iter, 1).get(0));
    for (int i = 2; i < 6; i++) {
        try {
            Thread.sleep(1_000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { i, "new_add", 11 + i }).addRow(new Object[] { i, "new_add_1", 11 + i }).commit("pt_year='2020',pt_mon='10',pt_day='0" + i + "'");
        Assert.assertEquals(Arrays.asList(Row.of(i, "new_add").toString(), Row.of(i, "new_add_1").toString()), fetchRows(iter, 2));
    }
    result.getJobClient().get().cancel();
}
Also used : TableResult(org.apache.flink.table.api.TableResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 10 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class HiveTableSourceITCase method testStreamPartitionReadByPartitionTime.

@Test(timeout = 120000)
public void testStreamPartitionReadByPartitionTime() throws Exception {
    final String catalogName = "hive";
    final String dbName = "source_db";
    final String tblName = "stream_test";
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(100);
    StreamTableEnvironment tEnv = HiveTestUtils.createTableEnvInStreamingMode(env, SqlDialect.HIVE);
    tEnv.registerCatalog(catalogName, hiveCatalog);
    tEnv.useCatalog(catalogName);
    tEnv.executeSql("CREATE TABLE source_db.stream_test (" + " a INT," + " b STRING" + ") PARTITIONED BY (ts STRING) TBLPROPERTIES (" + "'streaming-source.enable'='true'," + "'streaming-source.monitor-interval'='1s'," + "'streaming-source.consume-order'='partition-time'" + ")");
    HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { 0, "0" }).commit("ts='2020-05-06 00:00:00'");
    TableResult result = tEnv.executeSql("select * from hive.source_db.stream_test");
    CloseableIterator<Row> iter = result.collect();
    Assert.assertEquals(Row.of(0, "0", "2020-05-06 00:00:00").toString(), fetchRows(iter, 1).get(0));
    for (int i = 1; i < 6; i++) {
        try {
            Thread.sleep(1_000);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        HiveTestUtils.createTextTableInserter(hiveCatalog, dbName, tblName).addRow(new Object[] { i, String.valueOf(i) }).addRow(new Object[] { i, i + "_copy" }).commit("ts='2020-05-06 00:" + i + "0:00'");
        Assert.assertEquals(Arrays.asList(Row.of(i, String.valueOf(i), "2020-05-06 00:" + i + "0:00").toString(), Row.of(i, i + "_copy", "2020-05-06 00:" + i + "0:00").toString()), fetchRows(iter, 2));
    }
    result.getJobClient().get().cancel();
}
Also used : TableResult(org.apache.flink.table.api.TableResult) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Aggregations

StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)64 Test (org.junit.Test)53 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)41 Row (org.apache.flink.types.Row)38 Table (org.apache.flink.table.api.Table)36 ArrayList (java.util.ArrayList)19 TableResult (org.apache.flink.table.api.TableResult)18 List (java.util.List)10 TableDescriptor (org.apache.flink.table.api.TableDescriptor)10 Arrays (java.util.Arrays)6 Collections (java.util.Collections)6 AbstractTestBase (org.apache.flink.test.util.AbstractTestBase)6 IOException (java.io.IOException)5 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)5 Either (org.apache.flink.types.Either)5 LocalDateTime (java.time.LocalDateTime)4 ZoneId (java.time.ZoneId)4 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)4 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)4