Search in sources :

Example 6 with CommittableSummary

use of org.apache.flink.streaming.api.connector.sink2.CommittableSummary in project flink by apache.

the class CompactorOperatorTest method testStateHandler.

@Test
public void testStateHandler() throws Exception {
    FileCompactor fileCompactor = new RecordWiseFileCompactor<>(new DecoderBasedReader.Factory<>(IntDecoder::new));
    CompactorOperator compactor = createTestOperator(fileCompactor);
    OperatorSubtaskState state;
    try (OneInputStreamOperatorTestHarness<CompactorRequest, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(compactor)) {
        harness.setup();
        harness.open();
        harness.processElement(request("0", Arrays.asList(committable("0", ".0", 1), committable("0", ".1", 2)), null));
        harness.snapshot(1, 1L);
        harness.processElement(request("0", Arrays.asList(committable("0", ".2", 3), committable("0", ".3", 4)), null));
        harness.notifyOfCompletedCheckpoint(1);
        // request 1 is submitted and request 2 is pending
        state = harness.snapshot(2, 2L);
    }
    CompactorOperatorStateHandler handler = new CompactorOperatorStateHandler(getTestCommittableSerializer(), createTestBucketWriter());
    try (OneInputStreamOperatorTestHarness<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(handler)) {
        harness.setup();
        harness.initializeState(state);
        harness.open();
        // remaining requests from coordinator
        harness.processElement(new StreamRecord<>(Either.Right(request("0", Collections.singletonList(committable("0", ".4", 5)), null).getValue())));
        harness.processElement(new StreamRecord<>(Either.Right(request("0", Collections.singletonList(committable("0", ".5", 6)), null).getValue())));
        harness.processElement(new StreamRecord<>(Either.Left(new CommittableSummary<>(0, 1, 3L, 2, 2, 0))));
        // remaining in-progress file from file writer
        harness.processElement(new StreamRecord<>(Either.Left(new CommittableWithLineage<>(committable("0", ".6", 7), 3L, 0))));
        // new pending file written this time
        harness.processElement(new StreamRecord<>(Either.Left(new CommittableWithLineage<>(committable("0", "7", 8), 3L, 0))));
        Assert.assertTrue(handler.isWriterStateDrained());
        Assert.assertFalse(handler.isStateDrained());
        // the result should not be emitted yet, but all requests should already be submitted
        Assert.assertEquals(0, harness.extractOutputValues().size());
        compactor.getAllTasksFuture().join();
        // state should be drained, and all results and holding messages should be emitted
        harness.prepareSnapshotPreBarrier(3);
        Assert.assertTrue(handler.isStateDrained());
        // summary should be merged into one
        // 1 summary+ 1 compacted + (1 compacted committable + 1 compacted cleanup) * 7
        List<CommittableMessage<FileSinkCommittable>> results = harness.extractOutputValues();
        Assert.assertEquals(16, results.size());
        SinkV2Assertions.assertThat((CommittableSummary<?>) results.get(0)).hasPendingCommittables(15);
        List<FileSinkCommittable> expectedResult = Arrays.asList(committable("0", "7", 8), committable("0", "compacted-0", 1), cleanupPath("0", ".0"), committable("0", "compacted-1", 2), cleanupPath("0", ".1"), committable("0", "compacted-2", 3), cleanupPath("0", ".2"), committable("0", "compacted-3", 4), cleanupPath("0", ".3"), committable("0", "compacted-4", 5), cleanupPath("0", ".4"), committable("0", "compacted-5", 6), cleanupPath("0", ".5"), committable("0", "compacted-6", 7), cleanupPath("0", ".6"));
        for (int i = 1; i < results.size(); ++i) {
            SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(i)).hasCommittable(expectedResult.get(i - 1));
        }
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) CompactorOperatorStateHandler(org.apache.flink.connector.file.sink.compactor.operator.CompactorOperatorStateHandler) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) CompactorOperator(org.apache.flink.connector.file.sink.compactor.operator.CompactorOperator) Either(org.apache.flink.types.Either) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) CompactorRequest(org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest) Test(org.junit.Test)

Example 7 with CommittableSummary

use of org.apache.flink.streaming.api.connector.sink2.CommittableSummary in project flink by apache.

the class CompactorOperatorTest method testPassthrough.

@Test
public void testPassthrough() throws Exception {
    FileCompactor fileCompactor = new RecordWiseFileCompactor<>(new DecoderBasedReader.Factory<>(IntDecoder::new));
    CompactorOperator compactor = createTestOperator(fileCompactor);
    try (OneInputStreamOperatorTestHarness<CompactorRequest, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(compactor)) {
        harness.setup();
        harness.open();
        FileSinkCommittable cleanupInprogressRequest = cleanupInprogress("0", "0", 1);
        FileSinkCommittable cleanupPathRequest = cleanupPath("0", "1");
        harness.processElement(request("0", null, Collections.singletonList(cleanupInprogressRequest)));
        harness.processElement(request("0", null, Collections.singletonList(cleanupPathRequest)));
        Assert.assertEquals(0, harness.extractOutputValues().size());
        harness.prepareSnapshotPreBarrier(1);
        harness.snapshot(1, 1L);
        harness.notifyOfCompletedCheckpoint(1);
        compactor.getAllTasksFuture().join();
        Assert.assertEquals(0, harness.extractOutputValues().size());
        harness.prepareSnapshotPreBarrier(2);
        List<CommittableMessage<FileSinkCommittable>> results = harness.extractOutputValues();
        Assert.assertEquals(3, results.size());
        SinkV2Assertions.assertThat((CommittableSummary<?>) results.get(0)).hasPendingCommittables(2);
        SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(1)).hasCommittable(cleanupInprogressRequest);
        SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(2)).hasCommittable(cleanupPathRequest);
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) CompactorOperator(org.apache.flink.connector.file.sink.compactor.operator.CompactorOperator) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable) CompactorRequest(org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest) Test(org.junit.Test)

Example 8 with CommittableSummary

use of org.apache.flink.streaming.api.connector.sink2.CommittableSummary in project flink by apache.

the class CompactorOperatorTest method testRestore.

@Test
public void testRestore() throws Exception {
    FileCompactor fileCompactor = new RecordWiseFileCompactor<>(new DecoderBasedReader.Factory<>(IntDecoder::new));
    CompactorOperator compactor = createTestOperator(fileCompactor);
    OperatorSubtaskState state;
    try (OneInputStreamOperatorTestHarness<CompactorRequest, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(compactor)) {
        harness.setup();
        harness.open();
        harness.processElement(request("0", Arrays.asList(committable("0", ".0", 5), committable("0", ".1", 5)), null));
        harness.snapshot(1, 1L);
        harness.processElement(request("0", Arrays.asList(committable("0", ".2", 5), committable("0", ".3", 5)), null));
        harness.notifyOfCompletedCheckpoint(1);
        // request 1 is submitted and request 2 is pending
        state = harness.snapshot(2, 2L);
    }
    compactor = createTestOperator(fileCompactor);
    try (OneInputStreamOperatorTestHarness<CompactorRequest, CommittableMessage<FileSinkCommittable>> harness = new OneInputStreamOperatorTestHarness<>(compactor)) {
        harness.setup();
        harness.initializeState(state);
        harness.open();
        // request 1 should be submitted
        compactor.getAllTasksFuture().join();
        harness.prepareSnapshotPreBarrier(3);
        // the result of request 1 should be emitted
        Assert.assertEquals(4, harness.extractOutputValues().size());
        harness.snapshot(3, 3L);
        harness.notifyOfCompletedCheckpoint(3L);
        // request 2 should be submitted
        compactor.getAllTasksFuture().join();
        harness.prepareSnapshotPreBarrier(4);
        // the result of request 2 should be emitted
        Assert.assertEquals(8, harness.extractOutputValues().size());
        // 1summary+1compacted+2cleanup * 2
        List<CommittableMessage<FileSinkCommittable>> results = harness.extractOutputValues();
        Assert.assertEquals(8, results.size());
        SinkV2Assertions.assertThat((CommittableSummary<?>) results.get(0)).hasPendingCommittables(3);
        SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(1)).hasCommittable(committable("0", "compacted-0", 10));
        SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(2)).hasCommittable(cleanupPath("0", ".0"));
        SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(3)).hasCommittable(cleanupPath("0", ".1"));
        SinkV2Assertions.assertThat((CommittableSummary<?>) results.get(4)).hasPendingCommittables(3);
        SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(5)).hasCommittable(committable("0", "compacted-2", 10));
        SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(6)).hasCommittable(cleanupPath("0", ".2"));
        SinkV2Assertions.assertThat((CommittableWithLineage<?>) results.get(7)).hasCommittable(cleanupPath("0", ".3"));
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) CompactorOperator(org.apache.flink.connector.file.sink.compactor.operator.CompactorOperator) CompactorRequest(org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest) Test(org.junit.Test)

Example 9 with CommittableSummary

use of org.apache.flink.streaming.api.connector.sink2.CommittableSummary in project flink by apache.

the class CommitterOperatorTest method testStateRestore.

@Test
void testStateRestore() throws Exception {
    final OneInputStreamOperatorTestHarness<CommittableMessage<String>, CommittableMessage<String>> testHarness = createTestHarness(new TestSink.RetryOnceCommitter());
    testHarness.open();
    final CommittableSummary<String> committableSummary = new CommittableSummary<>(1, 1, 0L, 1, 1, 0);
    testHarness.processElement(new StreamRecord<>(committableSummary));
    final CommittableWithLineage<String> first = new CommittableWithLineage<>("1", 0L, 1);
    testHarness.processElement(new StreamRecord<>(first));
    final OperatorSubtaskState snapshot = testHarness.snapshot(0L, 2L);
    // Trigger first checkpoint but committer needs retry
    testHarness.notifyOfCompletedCheckpoint(0);
    assertThat(testHarness.getOutput()).isEmpty();
    testHarness.close();
    final ForwardingCommitter committer = new ForwardingCommitter();
    final OneInputStreamOperatorTestHarness<CommittableMessage<String>, CommittableMessage<String>> restored = createTestHarness(committer);
    restored.initializeState(snapshot);
    restored.open();
    // Previous committables are immediately committed if possible
    final List<StreamElement> output = fromOutput(restored.getOutput());
    assertThat(output).hasSize(2);
    assertThat(committer.getSuccessfulCommits()).isEqualTo(1);
    SinkV2Assertions.assertThat(toCommittableSummary(output.get(0))).hasFailedCommittables(committableSummary.getNumberOfFailedCommittables()).hasOverallCommittables(committableSummary.getNumberOfCommittables()).hasPendingCommittables(0);
    SinkV2Assertions.assertThat(toCommittableWithLinage(output.get(1))).isEqualTo(new CommittableWithLineage<>(first.getCommittable(), 1L, 0));
    restored.close();
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) SinkTestUtil.toCommittableSummary(org.apache.flink.streaming.runtime.operators.sink.SinkTestUtil.toCommittableSummary) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 10 with CommittableSummary

use of org.apache.flink.streaming.api.connector.sink2.CommittableSummary in project flink by apache.

the class CompactorOperatorStateHandler method processElement.

@Override
public void processElement(StreamRecord<Either<CommittableMessage<FileSinkCommittable>, CompactorRequest>> element) throws Exception {
    Either<CommittableMessage<FileSinkCommittable>, CompactorRequest> record = element.getValue();
    if (stateDrained) {
        // all input should be committable messages to pass through
        output.collect(new StreamRecord<>(record.left()));
        return;
    }
    if (record.isRight()) {
        submit(element.getValue().right());
        return;
    }
    CommittableMessage<FileSinkCommittable> message = record.left();
    if (message instanceof CommittableSummary) {
        checkState(holdingSummary == null, "Duplicate summary before the first checkpoint.");
        holdingSummary = (CommittableSummary<FileSinkCommittable>) message;
        holdingMessages = new ArrayList<>(holdingSummary.getNumberOfCommittables());
    } else {
        boolean compacting = false;
        CommittableWithLineage<FileSinkCommittable> committableWithLineage = (CommittableWithLineage<FileSinkCommittable>) message;
        if (committableWithLineage.getCommittable().hasPendingFile()) {
            FileSinkCommittable committable = committableWithLineage.getCommittable();
            PendingFileRecoverable pendingFile = committable.getPendingFile();
            if (pendingFile.getPath() != null && pendingFile.getPath().getName().startsWith(".")) {
                // The pending file is the in-progress file of the previous run, which
                // should be committed and compacted before sending to the committer.
                CompactorRequest request = new CompactorRequest(committable.getBucketId());
                request.addToCompact(committable);
                submit(request);
                compacting = true;
                compactingMessages.add(message);
            } else {
                // A normal file is received, indicating the writer state is drained.
                writerStateDrained = true;
                if (compactingMessages.isEmpty() && compactingRequests.isEmpty()) {
                    // No state needs to be handled, the holding summary and all committable
                    // messages can be sent eagerly
                    checkState(holdingSummary != null);
                    output.collect(new StreamRecord<>(holdingSummary));
                    holdingSummary = null;
                    this.stateDrained = true;
                    output.collect(new StreamRecord<>(committableWithLineage));
                }
            }
        }
        if (!compacting && !stateDrained) {
            // Compacting messages should not be added
            // If the state is drained, no further messages need to be added
            holdingMessages.add(message);
        }
    }
}
Also used : CommittableMessage(org.apache.flink.streaming.api.connector.sink2.CommittableMessage) CommittableSummary(org.apache.flink.streaming.api.connector.sink2.CommittableSummary) CommittableWithLineage(org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage) PendingFileRecoverable(org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable) FileSinkCommittable(org.apache.flink.connector.file.sink.FileSinkCommittable)

Aggregations

CommittableSummary (org.apache.flink.streaming.api.connector.sink2.CommittableSummary)10 CommittableWithLineage (org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage)9 CommittableMessage (org.apache.flink.streaming.api.connector.sink2.CommittableMessage)8 FileSinkCommittable (org.apache.flink.connector.file.sink.FileSinkCommittable)5 CompactorOperator (org.apache.flink.connector.file.sink.compactor.operator.CompactorOperator)4 CompactorRequest (org.apache.flink.connector.file.sink.compactor.operator.CompactorRequest)4 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)4 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)4 Test (org.junit.Test)4 ArrayList (java.util.ArrayList)2 CompletableFuture (java.util.concurrent.CompletableFuture)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 PendingFileRecoverable (org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter.PendingFileRecoverable)2 Either (org.apache.flink.types.Either)2 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)2 List (java.util.List)1 Map (java.util.Map)1 ExecutionException (java.util.concurrent.ExecutionException)1 Internal (org.apache.flink.annotation.Internal)1 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)1