use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.
the class StreamWindowSQLExample method main.
public static void main(String[] args) throws Exception {
// set up execution environment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
// write source data into temporary file and get the absolute path
String contents = "1,beer,3,2019-12-12 00:00:01\n" + "1,diaper,4,2019-12-12 00:00:02\n" + "2,pen,3,2019-12-12 00:00:04\n" + "2,rubber,3,2019-12-12 00:00:06\n" + "3,rubber,2,2019-12-12 00:00:05\n" + "4,beer,1,2019-12-12 00:00:08";
String path = createTempFile(contents);
// register table via DDL with watermark,
// the events are out of order, hence, we use 3 seconds to wait the late events
String ddl = "CREATE TABLE orders (\n" + " user_id INT,\n" + " product STRING,\n" + " amount INT,\n" + " ts TIMESTAMP(3),\n" + " WATERMARK FOR ts AS ts - INTERVAL '3' SECOND\n" + ") WITH (\n" + " 'connector.type' = 'filesystem',\n" + " 'connector.path' = '" + path + "',\n" + " 'format.type' = 'csv'\n" + ")";
tEnv.executeSql(ddl);
// run a SQL query on the table and retrieve the result as a new Table
String query = "SELECT\n" + " CAST(TUMBLE_START(ts, INTERVAL '5' SECOND) AS STRING) window_start,\n" + " COUNT(*) order_num,\n" + " SUM(amount) total_amount,\n" + " COUNT(DISTINCT product) unique_products\n" + "FROM orders\n" + "GROUP BY TUMBLE(ts, INTERVAL '5' SECOND)";
tEnv.executeSql(query).print();
// should output:
// +----+--------------------------------+--------------+--------------+-----------------+
// | op | window_start | order_num | total_amount | unique_products |
// +----+--------------------------------+--------------+--------------+-----------------+
// | +I | 2019-12-12 00:00:00.000 | 3 | 10 | 3 |
// | +I | 2019-12-12 00:00:05.000 | 3 | 6 | 2 |
// +----+--------------------------------+--------------+--------------+-----------------+
}
use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.
the class ChangelogSocketExample method main.
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
final String hostname = params.get("hostname", "localhost");
final String port = params.get("port", "9999");
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// source only supports parallelism of 1
env.setParallelism(1);
final StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
// register a table in the catalog
tEnv.executeSql("CREATE TABLE UserScores (name STRING, score INT)\n" + "WITH (\n" + " 'connector' = 'socket',\n" + " 'hostname' = '" + hostname + "',\n" + " 'port' = '" + port + "',\n" + " 'byte-delimiter' = '10',\n" + " 'format' = 'changelog-csv',\n" + " 'changelog-csv.column-delimiter' = '|'\n" + ")");
// define a dynamic aggregating query
final Table result = tEnv.sqlQuery("SELECT name, SUM(score) FROM UserScores GROUP BY name");
// print the result to the console
tEnv.toChangelogStream(result).print();
env.execute();
}
use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.
the class StreamSQLTestProgram method main.
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
String outputPath = params.getRequired("outputPath");
final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10, TimeUnit.SECONDS)));
sEnv.enableCheckpointing(4000);
sEnv.getConfig().setAutoWatermarkInterval(1000);
final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv);
((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0));
((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
int overWindowSizeSeconds = 1;
int tumbleWindowSizeSeconds = 10;
String overQuery = String.format("SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds);
String tumbleQuery = String.format("SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds);
String joinQuery = String.format("SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds);
String finalAgg = String.format("SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery);
// get Table for SQL query
Table result = tEnv.sqlQuery(finalAgg);
// convert Table into append-only DataStream
DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));
final StreamingFileSink<Row> sink = StreamingFileSink.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
PrintStream out = new PrintStream(stream);
out.println(element.toString());
}).withBucketAssigner(new KeyBucketAssigner()).withRollingPolicy(OnCheckpointRollingPolicy.build()).build();
resultStream.map(new KillMapper()).setParallelism(1).addSink(sink).setParallelism(1);
sEnv.execute();
}
use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.
the class StreamPythonUdfSqlJob method main.
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
tEnv.executeSql("create temporary system function add_one as 'add_one.add_one' language python");
tEnv.createTemporaryView("source", tEnv.fromValues(1L, 2L, 3L).as("a"));
Iterator<Row> result = tEnv.executeSql("select add_one(a) as a from source").collect();
List<Long> actual = new ArrayList<>();
while (result.hasNext()) {
Row r = result.next();
actual.add((Long) r.getField(0));
}
List<Long> expected = Arrays.asList(2L, 3L, 4L);
if (!actual.equals(expected)) {
throw new AssertionError(String.format("The output result: %s is not as expected: %s!", actual, expected));
}
}
use of org.apache.flink.table.api.bridge.java.StreamTableEnvironment in project flink by apache.
the class CassandraConnectorITCase method testCassandraTableSink.
@Test
public void testCassandraTableSink() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);
DataStreamSource<Row> source = env.fromCollection(rowCollection);
tEnv.createTemporaryView("testFlinkTable", source);
((TableEnvironmentInternal) tEnv).registerTableSinkInternal("cassandraTable", new CassandraAppendTableSink(builderForWriting, injectTableName(INSERT_DATA_QUERY)).configure(new String[] { "f0", "f1", "f2" }, new TypeInformation[] { Types.STRING, Types.INT, Types.INT }));
tEnv.sqlQuery("select * from testFlinkTable").executeInsert("cassandraTable").await();
ResultSet rs = session.execute(injectTableName(SELECT_DATA_QUERY));
// validate that all input was correctly written to Cassandra
List<Row> input = new ArrayList<>(rowCollection);
List<com.datastax.driver.core.Row> output = rs.all();
for (com.datastax.driver.core.Row o : output) {
Row cmp = new Row(3);
cmp.setField(0, o.getString(0));
cmp.setField(1, o.getInt(2));
cmp.setField(2, o.getInt(1));
Assert.assertTrue("Row " + cmp + " was written to Cassandra but not in input.", input.remove(cmp));
}
Assert.assertTrue("The input data was not completely written to Cassandra", input.isEmpty());
}
Aggregations