Search in sources :

Example 31 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class DataStreamJavaITCase method testFromChangelogStreamUpsert.

@Test
public void testFromChangelogStreamUpsert() {
    final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
    final List<Either<Row, Row>> inputOrOutput = Arrays.asList(input(RowKind.INSERT, "bob", 0), output(RowKind.INSERT, "bob", 0), // --
    input(RowKind.UPDATE_AFTER, "bob", 1), output(RowKind.UPDATE_BEFORE, "bob", 0), output(RowKind.UPDATE_AFTER, "bob", 1), // --
    input(RowKind.INSERT, "alice", 1), output(RowKind.INSERT, "alice", 1), // no impact
    input(RowKind.INSERT, "alice", 1), // --
    input(RowKind.UPDATE_AFTER, "alice", 2), output(RowKind.UPDATE_BEFORE, "alice", 1), output(RowKind.UPDATE_AFTER, "alice", 2), // --
    input(RowKind.UPDATE_AFTER, "alice", 100), output(RowKind.UPDATE_BEFORE, "alice", 2), output(RowKind.UPDATE_AFTER, "alice", 100));
    final DataStream<Row> changelogStream = env.fromElements(getInput(inputOrOutput));
    tableEnv.createTemporaryView("t", tableEnv.fromChangelogStream(changelogStream, Schema.newBuilder().primaryKey("f0").build(), ChangelogMode.upsert()));
    final Table result = tableEnv.sqlQuery("SELECT f0, SUM(f1) FROM t GROUP BY f0");
    testResult(result.execute(), getOutput(inputOrOutput));
}
Also used : Table(org.apache.flink.table.api.Table) Either(org.apache.flink.types.Either) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 32 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class DataStreamJavaITCase method testFromAndToDataStreamWithRaw.

@Test
public void testFromAndToDataStreamWithRaw() throws Exception {
    final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
    final List<Tuple2<DayOfWeek, ZoneOffset>> rawRecords = Arrays.asList(Tuple2.of(DayOfWeek.MONDAY, ZoneOffset.UTC), Tuple2.of(DayOfWeek.FRIDAY, ZoneOffset.ofHours(5)));
    final DataStream<Tuple2<DayOfWeek, ZoneOffset>> dataStream = env.fromCollection(rawRecords);
    // verify incoming type information
    assertThat(dataStream.getType(), instanceOf(TupleTypeInfo.class));
    final TupleTypeInfo<?> tupleInfo = (TupleTypeInfo<?>) dataStream.getType();
    assertThat(tupleInfo.getFieldTypes()[0], instanceOf(EnumTypeInfo.class));
    assertThat(tupleInfo.getFieldTypes()[1], instanceOf(GenericTypeInfo.class));
    final Table table = tableEnv.fromDataStream(dataStream);
    // verify schema conversion
    final List<DataType> columnDataTypes = table.getResolvedSchema().getColumnDataTypes();
    assertThat(columnDataTypes.get(0).getLogicalType(), instanceOf(RawType.class));
    assertThat(columnDataTypes.get(1).getLogicalType(), instanceOf(RawType.class));
    // test reverse operation
    testResult(table.execute(), Row.of(DayOfWeek.MONDAY, ZoneOffset.UTC), Row.of(DayOfWeek.FRIDAY, ZoneOffset.ofHours(5)));
    testResult(tableEnv.toDataStream(table, DataTypes.of(dataStream.getType())), rawRecords.toArray(new Tuple2[0]));
}
Also used : Table(org.apache.flink.table.api.Table) Tuple2(org.apache.flink.api.java.tuple.Tuple2) EnumTypeInfo(org.apache.flink.api.java.typeutils.EnumTypeInfo) DataType(org.apache.flink.table.types.DataType) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) RawType(org.apache.flink.table.types.logical.RawType) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) Test(org.junit.Test)

Example 33 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class DataStreamJavaITCase method testFromAndToDataStreamEventTime.

@Test
public void testFromAndToDataStreamEventTime() throws Exception {
    final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
    final DataStream<Tuple3<Long, Integer, String>> dataStream = getWatermarkedDataStream();
    final Table table = tableEnv.fromDataStream(dataStream, Schema.newBuilder().columnByMetadata("rowtime", "TIMESTAMP_LTZ(3)").watermark("rowtime", "SOURCE_WATERMARK()").build());
    testSchema(table, new ResolvedSchema(Arrays.asList(Column.physical("f0", BIGINT().notNull()), Column.physical("f1", INT().notNull()), Column.physical("f2", STRING()), Column.metadata("rowtime", TIMESTAMP_LTZ(3), null, false)), Collections.singletonList(WatermarkSpec.of("rowtime", ResolvedExpressionMock.of(TIMESTAMP_LTZ(3), "`SOURCE_WATERMARK`()"))), null));
    tableEnv.createTemporaryView("t", table);
    final TableResult result = tableEnv.executeSql("SELECT f2, SUM(f1) FROM t GROUP BY f2, TUMBLE(rowtime, INTERVAL '0.005' SECOND)");
    testResult(result, Row.of("a", 47), Row.of("c", 1000), Row.of("c", 1000));
    testResult(tableEnv.toDataStream(table).keyBy(k -> k.getField("f2")).window(TumblingEventTimeWindows.of(Time.milliseconds(5))).<Row>apply((key, window, input, out) -> {
        int sum = 0;
        for (Row row : input) {
            sum += row.<Integer>getFieldAs("f1");
        }
        out.collect(Row.of(key, sum));
    }).returns(Types.ROW(Types.STRING, Types.INT)), Row.of("a", 47), Row.of("c", 1000), Row.of("c", 1000));
}
Also used : Table(org.apache.flink.table.api.Table) TableResult(org.apache.flink.table.api.TableResult) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) Test(org.junit.Test)

Example 34 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class DataStreamJavaITCase method getComplexUnifiedPipeline.

// --------------------------------------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------------------------------------
private Table getComplexUnifiedPipeline(StreamExecutionEnvironment env) {
    final DataStream<String> allowedNamesStream = env.fromElements("Bob", "Alice");
    final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
    tableEnv.createTemporaryView("AllowedNamesTable", tableEnv.fromDataStream(allowedNamesStream).as("allowedName"));
    final Table nameCountTable = tableEnv.sqlQuery("SELECT name, COUNT(*) AS c " + "FROM (VALUES ('Bob'), ('Alice'), ('Greg'), ('Bob')) AS NameTable(name) " + "WHERE name IN (SELECT allowedName FROM AllowedNamesTable)" + "GROUP BY name");
    final DataStream<Row> nameCountStream = tableEnv.toChangelogStream(nameCountTable);
    final DataStream<Tuple2<String, Long>> updatesPerNameStream = nameCountStream.keyBy(r -> r.<String>getFieldAs("name")).process(new KeyedProcessFunction<String, Row, Tuple2<String, Long>>() {

        ValueState<Long> count;

        @Override
        public void open(Configuration parameters) {
            count = getRuntimeContext().getState(new ValueStateDescriptor<>("count", Long.class));
        }

        @Override
        public void processElement(Row r, Context ctx, Collector<Tuple2<String, Long>> out) throws IOException {
            Long currentCount = count.value();
            if (currentCount == null) {
                currentCount = 0L;
            }
            final long updatedCount = currentCount + 1;
            count.update(updatedCount);
            out.collect(Tuple2.of(ctx.getCurrentKey(), updatedCount));
        }
    });
    tableEnv.createTemporaryView("UpdatesPerName", updatesPerNameStream);
    return tableEnv.sqlQuery("SELECT DISTINCT f0, f1 FROM UpdatesPerName");
}
Also used : DataType(org.apache.flink.table.types.DataType) BIGINT(org.apache.flink.table.api.DataTypes.BIGINT) STRING(org.apache.flink.table.api.DataTypes.STRING) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Arrays(java.util.Arrays) Schema(org.apache.flink.table.api.Schema) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TableDescriptor(org.apache.flink.table.api.TableDescriptor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) TIMESTAMP_LTZ(org.apache.flink.table.api.DataTypes.TIMESTAMP_LTZ) RawType(org.apache.flink.table.types.logical.RawType) ZoneOffset(java.time.ZoneOffset) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) FIELD(org.apache.flink.table.api.DataTypes.FIELD) Parameterized(org.junit.runners.Parameterized) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) DOUBLE(org.apache.flink.table.api.DataTypes.DOUBLE) TableConfig(org.apache.flink.table.api.TableConfig) Expressions.$(org.apache.flink.table.api.Expressions.$) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) Table(org.apache.flink.table.api.Table) ResolvedExpressionMock(org.apache.flink.table.expressions.utils.ResolvedExpressionMock) ZoneId(java.time.ZoneId) Objects(java.util.Objects) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) STRUCTURED(org.apache.flink.table.api.DataTypes.STRUCTURED) TableResult(org.apache.flink.table.api.TableResult) Row(org.apache.flink.types.Row) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MAP(org.apache.flink.table.api.DataTypes.MAP) BOOLEAN(org.apache.flink.table.api.DataTypes.BOOLEAN) Either(org.apache.flink.types.Either) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) ROW(org.apache.flink.table.api.DataTypes.ROW) Column(org.apache.flink.table.catalog.Column) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) LocalDateTime(java.time.LocalDateTime) Expressions.sourceWatermark(org.apache.flink.table.api.Expressions.sourceWatermark) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) ArrayList(java.util.ArrayList) Collector(org.apache.flink.util.Collector) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) INT(org.apache.flink.table.api.DataTypes.INT) Before(org.junit.Before) Types(org.apache.flink.api.common.typeinfo.Types) Time(org.apache.flink.streaming.api.windowing.time.Time) WatermarkSpec(org.apache.flink.table.catalog.WatermarkSpec) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) Parameter(org.junit.runners.Parameterized.Parameter) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) DataTypes(org.apache.flink.table.api.DataTypes) Test(org.junit.Test) IOException(java.io.IOException) CollectionUtil(org.apache.flink.util.CollectionUtil) DataStream(org.apache.flink.streaming.api.datastream.DataStream) RowKind(org.apache.flink.types.RowKind) DayOfWeek(java.time.DayOfWeek) TIMESTAMP(org.apache.flink.table.api.DataTypes.TIMESTAMP) EnumTypeInfo(org.apache.flink.api.java.typeutils.EnumTypeInfo) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Table(org.apache.flink.table.api.Table) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row)

Example 35 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class DataStreamJavaITCase method testToDataStreamAtomic.

@Test
public void testToDataStreamAtomic() throws Exception {
    final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
    final Table table = tableEnv.fromValues(1, 2, 3, 4, 5);
    testResult(tableEnv.toDataStream(table, Integer.class), 1, 2, 3, 4, 5);
}
Also used : Table(org.apache.flink.table.api.Table) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Test(org.junit.Test)

Aggregations

StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)64 Test (org.junit.Test)53 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)41 Row (org.apache.flink.types.Row)38 Table (org.apache.flink.table.api.Table)36 ArrayList (java.util.ArrayList)19 TableResult (org.apache.flink.table.api.TableResult)18 List (java.util.List)10 TableDescriptor (org.apache.flink.table.api.TableDescriptor)10 Arrays (java.util.Arrays)6 Collections (java.util.Collections)6 AbstractTestBase (org.apache.flink.test.util.AbstractTestBase)6 IOException (java.io.IOException)5 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)5 Either (org.apache.flink.types.Either)5 LocalDateTime (java.time.LocalDateTime)4 ZoneId (java.time.ZoneId)4 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)4 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)4