use of org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest in project flink by apache.
the class FileSink method addPreCommitTopology.
@Override
public DataStream<CommittableMessage<FileSinkCommittable>> addPreCommitTopology(DataStream<CommittableMessage<FileSinkCommittable>> committableStream) {
FileCompactStrategy strategy = bucketsBuilder.getCompactStrategy();
if (strategy == null) {
// not enabled, handlers will be added to process the remaining states of the compact
// coordinator and the compactor operators.
SingleOutputStreamOperator<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> coordinatorOp = committableStream.forward().transform("CompactorCoordinator", new EitherTypeInfo<>(committableStream.getType(), new CompactorRequestTypeInfo(bucketsBuilder::getCommittableSerializer)), new CompactCoordinatorStateHandlerFactory(bucketsBuilder::getCommittableSerializer)).setParallelism(committableStream.getParallelism()).uid("FileSinkCompactorCoordinator");
return coordinatorOp.forward().transform("CompactorOperator", committableStream.getType(), new CompactorOperatorStateHandlerFactory(bucketsBuilder::getCommittableSerializer, bucketsBuilder::createBucketWriter)).setParallelism(committableStream.getParallelism()).uid("FileSinkCompactorOperator");
}
// explicitly rebalance here is required, or the partitioner will be forward, which is in
// fact the partitioner from the writers to the committers
SingleOutputStreamOperator<CompactorRequest> coordinatorOp = committableStream.rebalance().transform("CompactorCoordinator", new CompactorRequestTypeInfo(bucketsBuilder::getCommittableSerializer), new CompactCoordinatorFactory(strategy, bucketsBuilder::getCommittableSerializer)).setParallelism(1).uid("FileSinkCompactorCoordinator");
// parallelism of the compactors is not configurable at present, since it must be identical
// to that of the committers, or the committable summary and the committables may be
// distributed to different committers, which will cause a failure
TypeInformation<CommittableMessage<FileSinkCommittable>> committableType = committableStream.getType();
return coordinatorOp.transform("CompactorOperator", committableType, new CompactorOperatorFactory(strategy, bucketsBuilder.getFileCompactor(), bucketsBuilder::getCommittableSerializer, bucketsBuilder::createBucketWriter)).setParallelism(committableStream.getParallelism()).uid("FileSinkCompactorOperator");
}
use of org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest in project flink by apache.
the class CompactCoordinatorTest method testSizeThreshold.
@Test
public void testSizeThreshold() throws Exception {
FileCompactStrategy strategy = Builder.newBuilder().setSizeThreshold(10).build();
CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
harness.setup();
harness.open();
FileSinkCommittable committable0 = committable("0", ".0", 5);
FileSinkCommittable committable1 = committable("0", ".1", 6);
harness.processElement(message(committable0));
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.processElement(message(committable1));
List<CompactorRequest> results = harness.extractOutputValues();
Assert.assertEquals(1, results.size());
assertToCompact(results.get(0), committable0, committable1);
harness.processElement(message(committable("0", ".2", 5)));
harness.processElement(message(committable("1", ".0", 5)));
Assert.assertEquals(1, harness.extractOutputValues().size());
}
}
use of org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest in project flink by apache.
the class CompactCoordinatorTest method testCompactOverMultipleCheckpoints.
@Test
public void testCompactOverMultipleCheckpoints() throws Exception {
FileCompactStrategy strategy = Builder.newBuilder().enableCompactionOnCheckpoint(3).build();
CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
harness.setup();
harness.open();
FileSinkCommittable committable0 = committable("0", ".0", 5);
FileSinkCommittable committable1 = committable("0", ".1", 6);
harness.processElement(message(committable0));
harness.processElement(message(committable1));
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(1);
harness.snapshot(1, 1);
harness.prepareSnapshotPreBarrier(2);
harness.snapshot(2, 2);
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(3);
harness.snapshot(3, 3);
List<CompactorRequest> results = harness.extractOutputValues();
Assert.assertEquals(1, results.size());
assertToCompact(results.get(0), committable0, committable1);
}
}
use of org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest in project flink by apache.
the class CompactCoordinatorTest method testCompactOnEndOfInput.
@Test
public void testCompactOnEndOfInput() throws Exception {
FileCompactStrategy strategy = Builder.newBuilder().setSizeThreshold(10).build();
CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
harness.setup();
harness.open();
FileSinkCommittable committable0 = committable("0", ".0", 5);
harness.processElement(message(committable0));
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(1);
harness.snapshot(1, 1);
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.endInput();
List<CompactorRequest> results = harness.extractOutputValues();
Assert.assertEquals(1, results.size());
assertToCompact(results.get(0), committable0);
}
}
use of org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest in project flink by apache.
the class CompactCoordinatorTest method testStateHandler.
@Test
public void testStateHandler() throws Exception {
FileCompactStrategy strategy = Builder.newBuilder().setSizeThreshold(10).build();
CompactCoordinator coordinator = new CompactCoordinator(strategy, getTestCommittableSerializer());
// with . prefix
FileSinkCommittable committable0 = committable("0", ".0", 5);
FileSinkCommittable committable1 = committable("0", ".1", 6);
// without . prefix
FileSinkCommittable committable2 = committable("0", "2", 6);
OperatorSubtaskState state;
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, CompactorRequest> harness = new OneInputStreamOperatorTestHarness<>(coordinator)) {
harness.setup();
harness.open();
harness.processElement(message(committable0));
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(1);
state = harness.snapshot(1, 1);
}
CompactCoordinatorStateHandler handler = new CompactCoordinatorStateHandler(getTestCommittableSerializer());
try (OneInputStreamOperatorTestHarness<CommittableMessage<FileSinkCommittable>, Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> harness = new OneInputStreamOperatorTestHarness<>(handler)) {
harness.setup(new EitherSerializer<>(new SimpleVersionedSerializerTypeSerializerProxy<>(() -> new CommittableMessageSerializer<>(getTestCommittableSerializer())), new SimpleVersionedSerializerTypeSerializerProxy<>(() -> new CompactorRequestSerializer(getTestCommittableSerializer()))));
harness.initializeState(state);
harness.open();
Assert.assertEquals(1, harness.extractOutputValues().size());
harness.processElement(message(committable1));
harness.processElement(message(committable2));
List<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> results = harness.extractOutputValues();
Assert.assertEquals(3, results.size());
// restored request
Assert.assertTrue(results.get(0).isRight());
assertToCompact(results.get(0).right(), committable0);
// committable with . prefix should also be passed through
Assert.assertTrue(results.get(1).isLeft() && results.get(1).left() instanceof CommittableWithLineage);
Assert.assertEquals(((CommittableWithLineage<FileSinkCommittable>) results.get(1).left()).getCommittable(), committable1);
// committable without . prefix should be passed through normally
Assert.assertTrue(results.get(2).isLeft() && results.get(2).left() instanceof CommittableWithLineage);
Assert.assertEquals(((CommittableWithLineage<FileSinkCommittable>) results.get(2).left()).getCommittable(), committable2);
}
}
Aggregations