use of org.apache.flink.api.common.serialization.Encoder in project flink by apache.
the class FileSystemTableSink method createStreamingSink.
private DataStreamSink<?> createStreamingSink(ProviderContext providerContext, DataStream<RowData> dataStream, Context sinkContext, final int parallelism) {
FileSystemFactory fsFactory = FileSystem::get;
RowDataPartitionComputer computer = partitionComputer();
boolean autoCompaction = tableOptions.getBoolean(AUTO_COMPACTION);
Object writer = createWriter(sinkContext);
boolean isEncoder = writer instanceof Encoder;
TableBucketAssigner assigner = new TableBucketAssigner(computer);
TableRollingPolicy rollingPolicy = new TableRollingPolicy(!isEncoder || autoCompaction, tableOptions.get(SINK_ROLLING_POLICY_FILE_SIZE).getBytes(), tableOptions.get(SINK_ROLLING_POLICY_ROLLOVER_INTERVAL).toMillis(), tableOptions.get(SINK_ROLLING_POLICY_INACTIVITY_INTERVAL).toMillis());
String randomPrefix = "part-" + UUID.randomUUID().toString();
OutputFileConfig.OutputFileConfigBuilder fileNamingBuilder = OutputFileConfig.builder();
fileNamingBuilder = autoCompaction ? fileNamingBuilder.withPartPrefix(convertToUncompacted(randomPrefix)) : fileNamingBuilder.withPartPrefix(randomPrefix);
OutputFileConfig fileNamingConfig = fileNamingBuilder.build();
BucketsBuilder<RowData, String, ? extends BucketsBuilder<RowData, ?, ?>> bucketsBuilder;
if (isEncoder) {
// noinspection unchecked
bucketsBuilder = StreamingFileSink.forRowFormat(path, new ProjectionEncoder((Encoder<RowData>) writer, computer)).withBucketAssigner(assigner).withOutputFileConfig(fileNamingConfig).withRollingPolicy(rollingPolicy);
} else {
// noinspection unchecked
bucketsBuilder = StreamingFileSink.forBulkFormat(path, new ProjectionBulkFactory((BulkWriter.Factory<RowData>) writer, computer)).withBucketAssigner(assigner).withOutputFileConfig(fileNamingConfig).withRollingPolicy(rollingPolicy);
}
long bucketCheckInterval = tableOptions.get(SINK_ROLLING_POLICY_CHECK_INTERVAL).toMillis();
DataStream<PartitionCommitInfo> writerStream;
if (autoCompaction) {
long compactionSize = tableOptions.getOptional(COMPACTION_FILE_SIZE).orElse(tableOptions.get(SINK_ROLLING_POLICY_FILE_SIZE)).getBytes();
CompactReader.Factory<RowData> reader = createCompactReaderFactory(sinkContext).orElseThrow(() -> new TableException("Please implement available reader for compaction:" + " BulkFormat, FileInputFormat."));
writerStream = StreamingSink.compactionWriter(providerContext, dataStream, bucketCheckInterval, bucketsBuilder, fsFactory, path, reader, compactionSize, parallelism);
} else {
writerStream = StreamingSink.writer(providerContext, dataStream, bucketCheckInterval, bucketsBuilder, parallelism, partitionKeys, tableOptions);
}
return StreamingSink.sink(providerContext, writerStream, path, tableIdentifier, partitionKeys, new EmptyMetaStoreFactory(path), fsFactory, tableOptions);
}
use of org.apache.flink.api.common.serialization.Encoder in project flink by apache.
the class StreamSQLTestProgram method main.
public static void main(String[] args) throws Exception {
ParameterTool params = ParameterTool.fromArgs(args);
String outputPath = params.getRequired("outputPath");
final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10, TimeUnit.SECONDS)));
sEnv.enableCheckpointing(4000);
sEnv.getConfig().setAutoWatermarkInterval(1000);
final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv);
((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0));
((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
int overWindowSizeSeconds = 1;
int tumbleWindowSizeSeconds = 10;
String overQuery = String.format("SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds);
String tumbleQuery = String.format("SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds);
String joinQuery = String.format("SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds);
String finalAgg = String.format("SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery);
// get Table for SQL query
Table result = tEnv.sqlQuery(finalAgg);
// convert Table into append-only DataStream
DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));
final StreamingFileSink<Row> sink = StreamingFileSink.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
PrintStream out = new PrintStream(stream);
out.println(element.toString());
}).withBucketAssigner(new KeyBucketAssigner()).withRollingPolicy(OnCheckpointRollingPolicy.build()).build();
resultStream.map(new KillMapper()).setParallelism(1).addSink(sink).setParallelism(1);
sEnv.execute();
}
Aggregations