Search in sources :

Example 16 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class StreamWindowSQLExample method main.

public static void main(String[] args) throws Exception {
    // set up execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    // write source data into temporary file and get the absolute path
    String contents = "1,beer,3,2019-12-12 00:00:01\n" + "1,diaper,4,2019-12-12 00:00:02\n" + "2,pen,3,2019-12-12 00:00:04\n" + "2,rubber,3,2019-12-12 00:00:06\n" + "3,rubber,2,2019-12-12 00:00:05\n" + "4,beer,1,2019-12-12 00:00:08";
    String path = createTempFile(contents);
    // register table via DDL with watermark,
    // the events are out of order, hence, we use 3 seconds to wait the late events
    String ddl = "CREATE TABLE orders (\n" + "  user_id INT,\n" + "  product STRING,\n" + "  amount INT,\n" + "  ts TIMESTAMP(3),\n" + "  WATERMARK FOR ts AS ts - INTERVAL '3' SECOND\n" + ") WITH (\n" + "  'connector.type' = 'filesystem',\n" + "  'connector.path' = '" + path + "',\n" + "  'format.type' = 'csv'\n" + ")";
    tEnv.executeSql(ddl);
    // run a SQL query on the table and retrieve the result as a new Table
    String query = "SELECT\n" + "  CAST(TUMBLE_START(ts, INTERVAL '5' SECOND) AS STRING) window_start,\n" + "  COUNT(*) order_num,\n" + "  SUM(amount) total_amount,\n" + "  COUNT(DISTINCT product) unique_products\n" + "FROM orders\n" + "GROUP BY TUMBLE(ts, INTERVAL '5' SECOND)";
    tEnv.executeSql(query).print();
// should output:
// +----+--------------------------------+--------------+--------------+-----------------+
// | op |                   window_start |    order_num | total_amount | unique_products |
// +----+--------------------------------+--------------+--------------+-----------------+
// | +I |        2019-12-12 00:00:00.000 |            3 |           10 |               3 |
// | +I |        2019-12-12 00:00:05.000 |            3 |            6 |               2 |
// +----+--------------------------------+--------------+--------------+-----------------+
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment)

Example 17 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class ChangelogSocketExample method main.

public static void main(String[] args) throws Exception {
    final ParameterTool params = ParameterTool.fromArgs(args);
    final String hostname = params.get("hostname", "localhost");
    final String port = params.get("port", "9999");
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // source only supports parallelism of 1
    env.setParallelism(1);
    final StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    // register a table in the catalog
    tEnv.executeSql("CREATE TABLE UserScores (name STRING, score INT)\n" + "WITH (\n" + "  'connector' = 'socket',\n" + "  'hostname' = '" + hostname + "',\n" + "  'port' = '" + port + "',\n" + "  'byte-delimiter' = '10',\n" + "  'format' = 'changelog-csv',\n" + "  'changelog-csv.column-delimiter' = '|'\n" + ")");
    // define a dynamic aggregating query
    final Table result = tEnv.sqlQuery("SELECT name, SUM(score) FROM UserScores GROUP BY name");
    // print the result to the console
    tEnv.toChangelogStream(result).print();
    env.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) Table(org.apache.flink.table.api.Table) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment)

Example 18 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class StreamSQLTestProgram method main.

public static void main(String[] args) throws Exception {
    ParameterTool params = ParameterTool.fromArgs(args);
    String outputPath = params.getRequired("outputPath");
    final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10, TimeUnit.SECONDS)));
    sEnv.enableCheckpointing(4000);
    sEnv.getConfig().setAutoWatermarkInterval(1000);
    final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv);
    ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0));
    ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
    int overWindowSizeSeconds = 1;
    int tumbleWindowSizeSeconds = 10;
    String overQuery = String.format("SELECT " + "  key, " + "  rowtime, " + "  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds);
    String tumbleQuery = String.format("SELECT " + "  key, " + "  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + "  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + "  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds);
    String joinQuery = String.format("SELECT " + "  t1.key, " + "  t2.rowtime AS rowtime, " + "  t2.correct," + "  t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + "  t1.key = t2.key AND " + "  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds);
    String finalAgg = String.format("SELECT " + "  SUM(correct) AS correct, " + "  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery);
    // get Table for SQL query
    Table result = tEnv.sqlQuery(finalAgg);
    // convert Table into append-only DataStream
    DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));
    final StreamingFileSink<Row> sink = StreamingFileSink.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
        PrintStream out = new PrintStream(stream);
        out.println(element.toString());
    }).withBucketAssigner(new KeyBucketAssigner()).withRollingPolicy(OnCheckpointRollingPolicy.build()).build();
    resultStream.map(new KillMapper()).setParallelism(1).addSink(sink).setParallelism(1);
    sEnv.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) Path(org.apache.flink.core.fs.Path) PrintStream(java.io.PrintStream) Table(org.apache.flink.table.api.Table) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) Encoder(org.apache.flink.api.common.serialization.Encoder) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row)

Example 19 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class StreamPythonUdfSqlJob method main.

public static void main(String[] args) {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    tEnv.executeSql("create temporary system function add_one as 'add_one.add_one' language python");
    tEnv.createTemporaryView("source", tEnv.fromValues(1L, 2L, 3L).as("a"));
    Iterator<Row> result = tEnv.executeSql("select add_one(a) as a from source").collect();
    List<Long> actual = new ArrayList<>();
    while (result.hasNext()) {
        Row r = result.next();
        actual.add((Long) r.getField(0));
    }
    List<Long> expected = Arrays.asList(2L, 3L, 4L);
    if (!actual.equals(expected)) {
        throw new AssertionError(String.format("The output result: %s is not as expected: %s!", actual, expected));
    }
}
Also used : ArrayList(java.util.ArrayList) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row)

Example 20 with StreamTableEnvironment

use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.

the class CassandraConnectorITCase method testCassandraTableSink.

@Test
public void testCassandraTableSink() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
    DataStreamSource<Row> source = env.fromCollection(rowCollection);
    tEnv.createTemporaryView("testFlinkTable", source);
    ((TableEnvironmentInternal) tEnv).registerTableSinkInternal("cassandraTable", new CassandraAppendTableSink(builderForWriting, injectTableName(INSERT_DATA_QUERY)).configure(new String[] { "f0", "f1", "f2" }, new TypeInformation[] { Types.STRING, Types.INT, Types.INT }));
    tEnv.sqlQuery("select * from testFlinkTable").executeInsert("cassandraTable").await();
    ResultSet rs = session.execute(injectTableName(SELECT_DATA_QUERY));
    // validate that all input was correctly written to Cassandra
    List<Row> input = new ArrayList<>(rowCollection);
    List<com.datastax.driver.core.Row> output = rs.all();
    for (com.datastax.driver.core.Row o : output) {
        Row cmp = new Row(3);
        cmp.setField(0, o.getString(0));
        cmp.setField(1, o.getInt(2));
        cmp.setField(2, o.getInt(1));
        Assert.assertTrue("Row " + cmp + " was written to Cassandra but not in input.", input.remove(cmp));
    }
    Assert.assertTrue("The input data was not completely written to Cassandra", input.isEmpty());
}
Also used : ArrayList(java.util.ArrayList) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) ResultSet(com.datastax.driver.core.ResultSet) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Aggregations

StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)64 Test (org.junit.Test)53 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)41 Row (org.apache.flink.types.Row)38 Table (org.apache.flink.table.api.Table)36 ArrayList (java.util.ArrayList)19 TableResult (org.apache.flink.table.api.TableResult)18 List (java.util.List)10 TableDescriptor (org.apache.flink.table.api.TableDescriptor)10 Arrays (java.util.Arrays)6 Collections (java.util.Collections)6 AbstractTestBase (org.apache.flink.test.util.AbstractTestBase)6 IOException (java.io.IOException)5 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)5 Either (org.apache.flink.types.Either)5 LocalDateTime (java.time.LocalDateTime)4 ZoneId (java.time.ZoneId)4 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)4 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)4