use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorOutput in project flink by apache.
the class StreamingSink method compactionWriter.
/**
* Create a file writer with compaction operators by input stream. In addition, it can emit
* {@link PartitionCommitInfo} to down stream.
*/
public static <T> DataStream<PartitionCommitInfo> compactionWriter(ProviderContext providerContext, DataStream<T> inputStream, long bucketCheckInterval, StreamingFileSink.BucketsBuilder<T, String, ? extends StreamingFileSink.BucketsBuilder<T, String, ?>> bucketsBuilder, FileSystemFactory fsFactory, Path path, CompactReader.Factory<T> readFactory, long targetFileSize, int parallelism) {
CompactFileWriter<T> writer = new CompactFileWriter<>(bucketCheckInterval, bucketsBuilder);
SupplierWithException<FileSystem, IOException> fsSupplier = (SupplierWithException<FileSystem, IOException> & Serializable) () -> fsFactory.create(path.toUri());
CompactCoordinator coordinator = new CompactCoordinator(fsSupplier, targetFileSize);
SingleOutputStreamOperator<CoordinatorOutput> coordinatorOp = inputStream.transform("streaming-writer", TypeInformation.of(CoordinatorInput.class), writer).uid(providerContext.generateUid("streaming-writer").get()).setParallelism(parallelism).transform("compact-coordinator", TypeInformation.of(CoordinatorOutput.class), coordinator).uid(providerContext.generateUid("compact-coordinator").get()).setParallelism(1).setMaxParallelism(1);
CompactWriter.Factory<T> writerFactory = CompactBucketWriter.factory((SupplierWithException<BucketWriter<T, String>, IOException> & Serializable) bucketsBuilder::createBucketWriter);
CompactOperator<T> compacter = new CompactOperator<>(fsSupplier, readFactory, writerFactory);
return coordinatorOp.broadcast().transform("compact-operator", TypeInformation.of(PartitionCommitInfo.class), compacter).uid(providerContext.generateUid("compact-operator").get()).setParallelism(parallelism);
}
use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorOutput in project flink by apache.
the class CompactOperator method processElement.
@Override
public void processElement(StreamRecord<CoordinatorOutput> element) throws Exception {
CoordinatorOutput value = element.getValue();
if (value instanceof CompactionUnit) {
CompactionUnit unit = (CompactionUnit) value;
if (unit.isTaskMessage(getRuntimeContext().getNumberOfParallelSubtasks(), getRuntimeContext().getIndexOfThisSubtask())) {
String partition = unit.getPartition();
List<Path> paths = unit.getPaths();
doCompact(partition, paths);
this.partitions.add(partition);
// Only after the current checkpoint is successfully executed can delete
// the expired files, so as to ensure the existence of the files.
this.currentExpiredFiles.addAll(paths);
}
} else if (value instanceof EndCompaction) {
endCompaction(((EndCompaction) value).getCheckpointId());
}
}
use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorOutput in project flink by apache.
the class CompactOperatorTest method testUnitSelection.
@Test
public void testUnitSelection() throws Exception {
OneInputStreamOperatorTestHarness<CoordinatorOutput, PartitionCommitInfo> harness0 = create(2, 0);
harness0.setup();
harness0.open();
OneInputStreamOperatorTestHarness<CoordinatorOutput, PartitionCommitInfo> harness1 = create(2, 1);
harness1.setup();
harness1.open();
Path f0 = newFile(".uncompacted-f0", 3);
Path f1 = newFile(".uncompacted-f1", 2);
Path f2 = newFile(".uncompacted-f2", 2);
Path f3 = newFile(".uncompacted-f3", 5);
Path f4 = newFile(".uncompacted-f4", 1);
Path f5 = newFile(".uncompacted-f5", 5);
Path f6 = newFile(".uncompacted-f6", 4);
FileSystem fs = f0.getFileSystem();
// broadcast
harness0.processElement(new CompactionUnit(0, "p0", Arrays.asList(f0, f1, f4)), 0);
harness0.processElement(new CompactionUnit(1, "p0", Collections.singletonList(f3)), 0);
harness0.processElement(new CompactionUnit(2, "p0", Arrays.asList(f2, f5)), 0);
harness0.processElement(new CompactionUnit(3, "p0", Collections.singletonList(f6)), 0);
harness0.processElement(new EndCompaction(1), 0);
// check compacted file generated
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f0")));
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f2")));
// f3 and f6 are in the charge of another task
Assert.assertFalse(fs.exists(new Path(folder, "compacted-f3")));
Assert.assertFalse(fs.exists(new Path(folder, "compacted-f6")));
harness1.processElement(new CompactionUnit(0, "p0", Arrays.asList(f0, f1, f4)), 0);
harness1.processElement(new CompactionUnit(1, "p0", Collections.singletonList(f3)), 0);
harness1.processElement(new CompactionUnit(2, "p0", Arrays.asList(f2, f5)), 0);
harness1.processElement(new CompactionUnit(3, "p0", Collections.singletonList(f6)), 0);
harness1.processElement(new EndCompaction(1), 0);
// check compacted file generated
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f3")));
Assert.assertTrue(fs.exists(new Path(folder, "compacted-f6")));
harness0.close();
harness1.close();
}
use of org.apache.flink.connector.file.table.stream.compact.CompactMessages.CoordinatorOutput in project flink by apache.
the class CompactCoordinatorTest method testCoordinatorCrossCheckpoints.
@Test
public void testCoordinatorCrossCheckpoints() throws Exception {
AtomicReference<OperatorSubtaskState> state = new AtomicReference<>();
runCoordinator(harness -> {
harness.setup();
harness.open();
harness.processElement(new InputFile("p0", newFile("f0", 3)), 0);
harness.processElement(new InputFile("p0", newFile("f1", 2)), 0);
harness.processElement(new InputFile("p1", newFile("f2", 2)), 0);
harness.processElement(new InputFile("p0", newFile("f3", 5)), 0);
harness.processElement(new InputFile("p0", newFile("f4", 1)), 0);
harness.processElement(new InputFile("p1", newFile("f5", 5)), 0);
harness.processElement(new InputFile("p1", newFile("f6", 4)), 0);
state.set(harness.snapshot(1, 0));
});
runCoordinator(harness -> {
harness.setup();
harness.initializeState(state.get());
harness.open();
harness.processElement(new InputFile("p0", newFile("f7", 3)), 0);
harness.processElement(new InputFile("p0", newFile("f8", 2)), 0);
state.set(harness.snapshot(2, 0));
});
runCoordinator(harness -> {
harness.setup();
harness.initializeState(state.get());
harness.open();
harness.processElement(new EndCheckpoint(2, 0, 1), 0);
List<CoordinatorOutput> outputs = harness.extractOutputValues();
Assert.assertEquals(7, outputs.size());
List<CompactionUnit> cp1Units = new ArrayList<>();
for (int i = 0; i < 4; i++) {
CoordinatorOutput output = outputs.get(i);
Assert.assertTrue(output instanceof CompactionUnit);
cp1Units.add((CompactionUnit) output);
}
cp1Units.sort(Comparator.comparing(CompactionUnit::getPartition).thenComparingInt(CompactionUnit::getUnitId));
assertUnit(cp1Units.get(0), 0, "p0", Arrays.asList("f0", "f1", "f4"));
assertUnit(cp1Units.get(1), 1, "p0", Collections.singletonList("f3"));
assertUnit(cp1Units.get(2), 2, "p1", Arrays.asList("f2", "f5"));
assertUnit(cp1Units.get(3), 3, "p1", Collections.singletonList("f6"));
assertEndCompaction(outputs.get(4), 1);
assertUnit(outputs.get(5), 0, "p0", Arrays.asList("f7", "f8"));
assertEndCompaction(outputs.get(6), 2);
});
}
Aggregations