use of org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.UniqueBucketAssigner in project flink by apache.
the class CompressWriterFactoryTest method prepareCompressedFile.
private File prepareCompressedFile(CompressWriterFactory<String> writer, List<String> lines) throws Exception {
final File outDir = TEMPORARY_FOLDER.newFolder();
StreamingFileSink<String> sink = StreamingFileSink.forBulkFormat(new Path(outDir.toURI()), writer).withBucketAssigner(new UniqueBucketAssigner<>("test")).build();
try (OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), 1, 1, 0)) {
testHarness.setup();
testHarness.open();
int time = 0;
for (String line : lines) {
testHarness.processElement(new StreamRecord<>(line, ++time));
}
testHarness.snapshot(1, ++time);
testHarness.notifyOfCompletedCheckpoint(1);
}
return outDir;
}
use of org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.UniqueBucketAssigner in project flink by apache.
the class OrcBulkWriterITCase method testOrcBulkWriter.
@Test
public void testOrcBulkWriter() throws Exception {
final File outDir = TEMPORARY_FOLDER.newFolder();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final Properties writerProps = new Properties();
writerProps.setProperty("orc.compress", "LZ4");
final OrcBulkWriterFactory<Record> factory = new OrcBulkWriterFactory<>(new RecordVectorizer(schema), writerProps, new Configuration());
env.setParallelism(1);
env.enableCheckpointing(100);
DataStream<Record> stream = env.addSource(new FiniteTestSource<>(testData), TypeInformation.of(Record.class));
stream.map(str -> str).addSink(StreamingFileSink.forBulkFormat(new Path(outDir.toURI()), factory).withBucketAssigner(new UniqueBucketAssigner<>("test")).build());
env.execute();
OrcBulkWriterTestUtil.validate(outDir, testData);
}
use of org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.UniqueBucketAssigner in project flink by apache.
the class CompressionFactoryITCase method testWriteCompressedFile.
@Test
public void testWriteCompressedFile() throws Exception {
final File folder = TEMPORARY_FOLDER.newFolder();
final Path testPath = Path.fromLocalFile(folder);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.enableCheckpointing(100);
DataStream<String> stream = env.addSource(new FiniteTestSource<>(testData), TypeInformation.of(String.class));
stream.map(str -> str).addSink(StreamingFileSink.forBulkFormat(testPath, CompressWriters.forExtractor(new DefaultExtractor<String>()).withHadoopCompression(TEST_CODEC_NAME)).withBucketAssigner(new UniqueBucketAssigner<>("test")).build());
env.execute();
validateResults(folder, testData, new CompressionCodecFactory(configuration).getCodecByName(TEST_CODEC_NAME));
}
Aggregations