use of org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage in project flink by apache.
the class CompactorOperatorTest method testStateHandler.
@Test
public void testStateHandler() throws Exception {
FileCompactor fileCompactor = new RecordWiseFileCompactor<>(new DecoderBasedReader.Factory<>(IntDecoder::new));
CompactorOperator compactor = createTestOperator(fileCompactor);
OperatorSubtaskState state;
try (OneInputStreamOperatorTestHarness<CompactorRequest, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(compactor)) {
harness.setup();
harness.open();
harness.processElement(request("0", Arrays.asList(committable("0", ".0", 1), committable("0", ".1", 2)), null));
harness.snapshot(1, 1L);
harness.processElement(request("0", Arrays.asList(committable("0", ".2", 3), committable("0", ".3", 4)), null));
harness.notifyOfCompletedCheckpoint(1);
// request 1 is submitted and request 2 is pending
state = harness.snapshot(2, 2L);
}
CompactorOperatorStateHandler handler = new CompactorOperatorStateHandler(getTestCommittableSerializer(), createTestBucketWriter());
try (OneInputStreamOperatorTestHarness<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(handler)) {
harness.setup();
harness.initializeState(state);
harness.open();
// remaining requests from coordinator
harness.processElement(new StreamRecord<>(Either.Right(request("0", Collections.singletonList(committable("0", ".4", 5)), null).getValue())));
harness.processElement(new StreamRecord<>(Either.Right(request("0", Collections.singletonList(committable("0", ".5", 6)), null).getValue())));
harness.processElement(new StreamRecord<>(Either.Left(new CommittableSummary<>(0, 1, 3L, 2, 2, 0))));
// remaining in-progress file from file writer
harness.processElement(new StreamRecord<>(Either.Left(new CommittableWithLineage<>(committable("0", ".6", 7), 3L, 0))));
// new pending file written this time
harness.processElement(new StreamRecord<>(Either.Left(new CommittableWithLineage<>(committable("0", "7", 8), 3L, 0))));
Assert.assertTrue(handler.isWriterStateDrained());
Assert.assertFalse(handler.isStateDrained());
// the result should not be emitted yet, but all requests should already be submitted
Assert.assertEquals(0, harness.extractOutputValues().size());
compactor.getAllTasksFuture().join();
// state should be drained, and all results and holding messages should be emitted
harness.prepareSnapshotPreBarrier(3);
Assert.assertTrue(handler.isStateDrained());
// summary should be merged into one
// 1 summary+ 1 compacted + (1 compacted committable + 1 compacted cleanup) * 7
List<CommittableMessage<FileSinkCommittable>> results = harness.extractOutputValues();
Assert.assertEquals(16, results.size());
SinkV2Assertions.assertThat((CommittableSummary<?>) results.get(0)).hasPendingCommittables(15);
List<FileSinkCommittable> expectedResult = Arrays.asList(committable("0", "7", 8), committable("0", "compacted-0", 1), cleanupPath("0", ".0"), committable("0", "compacted-1", 2), cleanupPath("0", ".1"), committable("0", "compacted-2", 3), cleanupPath("0", ".2"), committable("0", "compacted-3", 4), cleanupPath("0", ".3"), committable("0", "compacted-4", 5), cleanupPath("0", ".4"), committable("0", "compacted-5", 6), cleanupPath("0", ".5"), committable("0", "compacted-6", 7), cleanupPath("0", ".6"));
for (int i = 1; i < results.size(); ++i) {
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(i)).hasCommittable(expectedResult.get(i - 1));
}
}
}
use of org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage in project flink by apache.
the class CompactorOperatorTest method testPassthrough.
@Test
public void testPassthrough() throws Exception {
FileCompactor fileCompactor = new RecordWiseFileCompactor<>(new DecoderBasedReader.Factory<>(IntDecoder::new));
CompactorOperator compactor = createTestOperator(fileCompactor);
try (OneInputStreamOperatorTestHarness<CompactorRequest, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(compactor)) {
harness.setup();
harness.open();
FileSinkCommittable cleanupInprogressRequest = cleanupInprogress("0", "0", 1);
FileSinkCommittable cleanupPathRequest = cleanupPath("0", "1");
harness.processElement(request("0", null, Collections.singletonList(cleanupInprogressRequest)));
harness.processElement(request("0", null, Collections.singletonList(cleanupPathRequest)));
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(1);
harness.snapshot(1, 1L);
harness.notifyOfCompletedCheckpoint(1);
compactor.getAllTasksFuture().join();
Assert.assertEquals(0, harness.extractOutputValues().size());
harness.prepareSnapshotPreBarrier(2);
List<CommittableMessage<FileSinkCommittable>> results = harness.extractOutputValues();
Assert.assertEquals(3, results.size());
SinkV2Assertions.assertThat((CommittableSummary<?>) results.get(0)).hasPendingCommittables(2);
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(1)).hasCommittable(cleanupInprogressRequest);
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(2)).hasCommittable(cleanupPathRequest);
}
}
use of org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage in project flink by apache.
the class CompactorOperatorTest method testRestore.
@Test
public void testRestore() throws Exception {
FileCompactor fileCompactor = new RecordWiseFileCompactor<>(new DecoderBasedReader.Factory<>(IntDecoder::new));
CompactorOperator compactor = createTestOperator(fileCompactor);
OperatorSubtaskState state;
try (OneInputStreamOperatorTestHarness<CompactorRequest, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(compactor)) {
harness.setup();
harness.open();
harness.processElement(request("0", Arrays.asList(committable("0", ".0", 5), committable("0", ".1", 5)), null));
harness.snapshot(1, 1L);
harness.processElement(request("0", Arrays.asList(committable("0", ".2", 5), committable("0", ".3", 5)), null));
harness.notifyOfCompletedCheckpoint(1);
// request 1 is submitted and request 2 is pending
state = harness.snapshot(2, 2L);
}
compactor = createTestOperator(fileCompactor);
try (OneInputStreamOperatorTestHarness<CompactorRequest, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(compactor)) {
harness.setup();
harness.initializeState(state);
harness.open();
// request 1 should be submitted
compactor.getAllTasksFuture().join();
harness.prepareSnapshotPreBarrier(3);
// the result of request 1 should be emitted
Assert.assertEquals(4, harness.extractOutputValues().size());
harness.snapshot(3, 3L);
harness.notifyOfCompletedCheckpoint(3L);
// request 2 should be submitted
compactor.getAllTasksFuture().join();
harness.prepareSnapshotPreBarrier(4);
// the result of request 2 should be emitted
Assert.assertEquals(8, harness.extractOutputValues().size());
// 1summary+1compacted+2cleanup * 2
List<CommittableMessage<FileSinkCommittable>> results = harness.extractOutputValues();
Assert.assertEquals(8, results.size());
SinkV2Assertions.assertThat((CommittableSummary<?>) results.get(0)).hasPendingCommittables(3);
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(1)).hasCommittable(committable("0", "compacted-0", 10));
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(2)).hasCommittable(cleanupPath("0", ".0"));
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(3)).hasCommittable(cleanupPath("0", ".1"));
SinkV2Assertions.assertThat((CommittableSummary<?>) results.get(4)).hasPendingCommittables(3);
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(5)).hasCommittable(committable("0", "compacted-2", 10));
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(6)).hasCommittable(cleanupPath("0", ".2"));
SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(7)).hasCommittable(cleanupPath("0", ".3"));
}
}
use of org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage in project flink by apache.
the class CommitterOperatorTest method testStateRestore.
@Test
void testStateRestore() throws Exception {
final OneInputStreamOperatorTestHarness<CommittableMessage<String>, CommittableMessage<String>> testHarness = createTestHarness(new TestSink.RetryOnceCommitter());
testHarness.open();
final CommittableSummary<String> committableSummary = new CommittableSummary<>(1, 1, 0L, 1, 1, 0);
testHarness.processElement(new StreamRecord<>(committableSummary));
final CommittableWithLineage<String> first = new CommittableWithLineage<>("1", 0L, 1);
testHarness.processElement(new StreamRecord<>(first));
final OperatorSubtaskState snapshot = testHarness.snapshot(0L, 2L);
// Trigger first checkpoint but committer needs retry
testHarness.notifyOfCompletedCheckpoint(0);
assertThat(testHarness.getOutput()).isEmpty();
testHarness.close();
final ForwardingCommitter committer = new ForwardingCommitter();
final OneInputStreamOperatorTestHarness<CommittableMessage<String>, CommittableMessage<String>> restored = createTestHarness(committer);
restored.initializeState(snapshot);
restored.open();
// Previous committables are immediately committed if possible
final List<StreamElement> output = fromOutput(restored.getOutput());
assertThat(output).hasSize(2);
assertThat(committer.getSuccessfulCommits()).isEqualTo(1);
SinkV2Assertions.assertThat(toCommittableSummary(output.get(0))).hasFailedCommittables(committableSummary.getNumberOfFailedCommittables()).hasOverallCommittables(committableSummary.getNumberOfCommittables()).hasPendingCommittables(0);
SinkV2Assertions.assertThat(toCommittableWithLinage(output.get(1))).isEqualTo(new CommittableWithLineage<>(first.getCommittable(), 1L, 0));
restored.close();
}
use of org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage in project flink by apache.
the class CompactorOperatorStateHandler method processElement.
@Override
public void processElement(StreamRecord<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> element) throws Exception {
Either<CommittableMessage<FileSinkCommittable>, CompactorRequest> record = element.getValue();
if (stateDrained) {
// all input should be committable messages to pass through
output.collect(new StreamRecord<>(record.left()));
return;
}
if (record.isRight()) {
submit(element.getValue().right());
return;
}
CommittableMessage<FileSinkCommittable> message = record.left();
if (message instanceof CommittableSummary) {
checkState(holdingSummary == null, "Duplicate summary before the first checkpoint.");
holdingSummary = (CommittableSummary<FileSinkCommittable>) message;
holdingMessages = new ArrayList<>(holdingSummary.getNumberOfCommittables());
} else {
boolean compacting = false;
CommittableWithLineage<FileSinkCommittable> committableWithLineage = (CommittableWithLineage<FileSinkCommittable>) message;
if (committableWithLineage.getCommittable().hasPendingFile()) {
FileSinkCommittable committable = committableWithLineage.getCommittable();
PendingFileRecoverable pendingFile = committable.getPendingFile();
if (pendingFile.getPath() != null && pendingFile.getPath().getName().startsWith(".")) {
// The pending file is the in-progress file of the previous run, which
// should be committed and compacted before sending to the committer.
CompactorRequest request = new CompactorRequest(committable.getBucketId());
request.addToCompact(committable);
submit(request);
compacting = true;
compactingMessages.add(message);
} else {
// A normal file is received, indicating the writer state is drained.
writerStateDrained = true;
if (compactingMessages.isEmpty() && compactingRequests.isEmpty()) {
// No state needs to be handled, the holding summary and all committable
// messages can be sent eagerly
checkState(holdingSummary != null);
output.collect(new StreamRecord<>(holdingSummary));
holdingSummary = null;
this.stateDrained = true;
output.collect(new StreamRecord<>(committableWithLineage));
}
}
}
if (!compacting && !stateDrained) {
// Compacting messages should not be added
// If the state is drained, no further messages need to be added
holdingMessages.add(message);
}
}
}
Aggregations