use of org.apache.flink.connector.file.table.stream.compact.CompactFileWriter in project flink by apache.
the class StreamingSink method compactionWriter.
/**
* Create a file writer with compaction operators by input stream. In addition, it can emit
* {@link PartitionCommitInfo} to down stream.
*/
public static <T> DataStream<PartitionCommitInfo> compactionWriter(ProviderContext providerContext, DataStream<T> inputStream, long bucketCheckInterval, StreamingFileSink.BucketsBuilder<T, String, ? extends StreamingFileSink.BucketsBuilder<T, String, ?>> bucketsBuilder, FileSystemFactory fsFactory, Path path, CompactReader.Factory<T> readFactory, long targetFileSize, int parallelism) {
CompactFileWriter<T> writer = new CompactFileWriter<>(bucketCheckInterval, bucketsBuilder);
SupplierWithException<FileSystem, IOException> fsSupplier = (SupplierWithException<FileSystem, IOException> & Serializable) () -> fsFactory.create(path.toUri());
CompactCoordinator coordinator = new CompactCoordinator(fsSupplier, targetFileSize);
SingleOutputStreamOperator<CoordinatorOutput> coordinatorOp = inputStream.transform("streaming-writer", TypeInformation.of(CoordinatorInput.class), writer).uid(providerContext.generateUid("streaming-writer").get()).setParallelism(parallelism).transform("compact-coordinator", TypeInformation.of(CoordinatorOutput.class), coordinator).uid(providerContext.generateUid("compact-coordinator").get()).setParallelism(1).setMaxParallelism(1);
CompactWriter.Factory<T> writerFactory = CompactBucketWriter.factory((SupplierWithException<BucketWriter<T, String>, IOException> & Serializable) bucketsBuilder::createBucketWriter);
CompactOperator<T> compacter = new CompactOperator<>(fsSupplier, readFactory, writerFactory);
return coordinatorOp.broadcast().transform("compact-operator", TypeInformation.of(PartitionCommitInfo.class), compacter).uid(providerContext.generateUid("compact-operator").get()).setParallelism(parallelism);
}
Aggregations