Search in sources :

Example 1 with Encoder

use of org.apache.flink.api.common.serialization.Encoder in project flink by apache.

the class FileSystemTableSink method createStreamingSink.

private DataStreamSink<?> createStreamingSink(ProviderContext providerContext, DataStream<RowData> dataStream, Context sinkContext, final int parallelism) {
    FileSystemFactory fsFactory = FileSystem::get;
    RowDataPartitionComputer computer = partitionComputer();
    boolean autoCompaction = tableOptions.getBoolean(AUTO_COMPACTION);
    Object writer = createWriter(sinkContext);
    boolean isEncoder = writer instanceof Encoder;
    TableBucketAssigner assigner = new TableBucketAssigner(computer);
    TableRollingPolicy rollingPolicy = new TableRollingPolicy(!isEncoder || autoCompaction, tableOptions.get(SINK_ROLLING_POLICY_FILE_SIZE).getBytes(), tableOptions.get(SINK_ROLLING_POLICY_ROLLOVER_INTERVAL).toMillis(), tableOptions.get(SINK_ROLLING_POLICY_INACTIVITY_INTERVAL).toMillis());
    String randomPrefix = "part-" + UUID.randomUUID().toString();
    OutputFileConfig.OutputFileConfigBuilder fileNamingBuilder = OutputFileConfig.builder();
    fileNamingBuilder = autoCompaction ? fileNamingBuilder.withPartPrefix(convertToUncompacted(randomPrefix)) : fileNamingBuilder.withPartPrefix(randomPrefix);
    OutputFileConfig fileNamingConfig = fileNamingBuilder.build();
    BucketsBuilder<RowData, String, ? extends BucketsBuilder<RowData, ?, ?>> bucketsBuilder;
    if (isEncoder) {
        // noinspection unchecked
        bucketsBuilder = StreamingFileSink.forRowFormat(path, new ProjectionEncoder((Encoder<RowData>) writer, computer)).withBucketAssigner(assigner).withOutputFileConfig(fileNamingConfig).withRollingPolicy(rollingPolicy);
    } else {
        // noinspection unchecked
        bucketsBuilder = StreamingFileSink.forBulkFormat(path, new ProjectionBulkFactory((BulkWriter.Factory<RowData>) writer, computer)).withBucketAssigner(assigner).withOutputFileConfig(fileNamingConfig).withRollingPolicy(rollingPolicy);
    }
    long bucketCheckInterval = tableOptions.get(SINK_ROLLING_POLICY_CHECK_INTERVAL).toMillis();
    DataStream<PartitionCommitInfo> writerStream;
    if (autoCompaction) {
        long compactionSize = tableOptions.getOptional(COMPACTION_FILE_SIZE).orElse(tableOptions.get(SINK_ROLLING_POLICY_FILE_SIZE)).getBytes();
        CompactReader.Factory<RowData> reader = createCompactReaderFactory(sinkContext).orElseThrow(() -> new TableException("Please implement available reader for compaction:" + " BulkFormat, FileInputFormat."));
        writerStream = StreamingSink.compactionWriter(providerContext, dataStream, bucketCheckInterval, bucketsBuilder, fsFactory, path, reader, compactionSize, parallelism);
    } else {
        writerStream = StreamingSink.writer(providerContext, dataStream, bucketCheckInterval, bucketsBuilder, parallelism, partitionKeys, tableOptions);
    }
    return StreamingSink.sink(providerContext, writerStream, path, tableIdentifier, partitionKeys, new EmptyMetaStoreFactory(path), fsFactory, tableOptions);
}
Also used : TableException(org.apache.flink.table.api.TableException) PartitionCommitInfo(org.apache.flink.connector.file.table.stream.PartitionCommitInfo) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) RowData(org.apache.flink.table.data.RowData) CompactReader(org.apache.flink.connector.file.table.stream.compact.CompactReader) Encoder(org.apache.flink.api.common.serialization.Encoder)

Example 2 with Encoder

use of org.apache.flink.api.common.serialization.Encoder in project flink by apache.

the class StreamSQLTestProgram method main.

public static void main(String[] args) throws Exception {
    ParameterTool params = ParameterTool.fromArgs(args);
    String outputPath = params.getRequired("outputPath");
    final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10, TimeUnit.SECONDS)));
    sEnv.enableCheckpointing(4000);
    sEnv.getConfig().setAutoWatermarkInterval(1000);
    final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv);
    ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0));
    ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5));
    int overWindowSizeSeconds = 1;
    int tumbleWindowSizeSeconds = 10;
    String overQuery = String.format("SELECT " + "  key, " + "  rowtime, " + "  COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds);
    String tumbleQuery = String.format("SELECT " + "  key, " + "  CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + "  TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + "  TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds);
    String joinQuery = String.format("SELECT " + "  t1.key, " + "  t2.rowtime AS rowtime, " + "  t2.correct," + "  t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + "  t1.key = t2.key AND " + "  t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds);
    String finalAgg = String.format("SELECT " + "  SUM(correct) AS correct, " + "  TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery);
    // get Table for SQL query
    Table result = tEnv.sqlQuery(finalAgg);
    // convert Table into append-only DataStream
    DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP));
    final StreamingFileSink<Row> sink = StreamingFileSink.forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> {
        PrintStream out = new PrintStream(stream);
        out.println(element.toString());
    }).withBucketAssigner(new KeyBucketAssigner()).withRollingPolicy(OnCheckpointRollingPolicy.build()).build();
    resultStream.map(new KillMapper()).setParallelism(1).addSink(sink).setParallelism(1);
    sEnv.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) Path(org.apache.flink.core.fs.Path) PrintStream(java.io.PrintStream) Table(org.apache.flink.table.api.Table) TableEnvironmentInternal(org.apache.flink.table.api.internal.TableEnvironmentInternal) Encoder(org.apache.flink.api.common.serialization.Encoder) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Row(org.apache.flink.types.Row)

Aggregations

Encoder (org.apache.flink.api.common.serialization.Encoder)2 PrintStream (java.io.PrintStream)1 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)1 PartitionCommitInfo (org.apache.flink.connector.file.table.stream.PartitionCommitInfo)1 CompactReader (org.apache.flink.connector.file.table.stream.compact.CompactReader)1 Path (org.apache.flink.core.fs.Path)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 OutputFileConfig (org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig)1 Table (org.apache.flink.table.api.Table)1 TableException (org.apache.flink.table.api.TableException)1 StreamTableEnvironment (org.apache.flink.table.api.bridge.java.StreamTableEnvironment)1 TableEnvironmentInternal (org.apache.flink.table.api.internal.TableEnvironmentInternal)1 RowData (org.apache.flink.table.data.RowData)1 Row (org.apache.flink.types.Row)1