Search in sources :

Example 26 with RecordingChannelStateWriter

use of org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter in project flink by splunk.

the class CheckpointedInputGateTest method testPersisting.

/**
 * This tests a scenario where an older triggered checkpoint, was cancelled and a newer
 * checkpoint was triggered very quickly after the cancellation. It can happen that a task can
 * receive first the more recent checkpoint barrier and later the obsoleted one. This can happen
 * for many reasons (for example Source tasks not running, or just a race condition with
 * notifyCheckpointAborted RPCs) and Task should be able to handle this properly. In FLINK-21104
 * the problem was that this obsoleted checkpoint barrier was causing a checkState to fail.
 */
public void testPersisting(boolean drainGate) throws Exception {
    int numberOfChannels = 3;
    NetworkBufferPool bufferPool = new NetworkBufferPool(numberOfChannels * 3, 1024);
    try {
        long checkpointId = 2L;
        long obsoleteCheckpointId = 1L;
        ValidatingCheckpointHandler validatingHandler = new ValidatingCheckpointHandler(checkpointId);
        RecordingChannelStateWriter stateWriter = new RecordingChannelStateWriter();
        CheckpointedInputGate gate = setupInputGateWithAlternatingController(numberOfChannels, bufferPool, validatingHandler, stateWriter);
        // enqueue first checkpointId before obsoleteCheckpointId, so that we never trigger
        // and also never cancel the obsoleteCheckpointId
        enqueue(gate, 0, buildSomeBuffer());
        enqueue(gate, 0, barrier(checkpointId));
        enqueue(gate, 0, buildSomeBuffer());
        enqueue(gate, 1, buildSomeBuffer());
        enqueue(gate, 1, barrier(obsoleteCheckpointId));
        enqueue(gate, 1, buildSomeBuffer());
        enqueue(gate, 2, buildSomeBuffer());
        assertEquals(0, validatingHandler.getTriggeredCheckpointCounter());
        // trigger checkpoint
        gate.pollNext();
        assertEquals(1, validatingHandler.getTriggeredCheckpointCounter());
        assertAddedInputSize(stateWriter, 0, 1);
        assertAddedInputSize(stateWriter, 1, 2);
        assertAddedInputSize(stateWriter, 2, 1);
        enqueue(gate, 0, buildSomeBuffer());
        enqueue(gate, 1, buildSomeBuffer());
        enqueue(gate, 2, buildSomeBuffer());
        while (drainGate && gate.pollNext().isPresent()) {
        }
        assertAddedInputSize(stateWriter, 0, 1);
        assertAddedInputSize(stateWriter, 1, 3);
        assertAddedInputSize(stateWriter, 2, 2);
        enqueue(gate, 1, barrier(checkpointId));
        enqueue(gate, 1, buildSomeBuffer());
        // Another obsoleted barrier that should be ignored
        enqueue(gate, 2, barrier(obsoleteCheckpointId));
        enqueue(gate, 2, buildSomeBuffer());
        while (drainGate && gate.pollNext().isPresent()) {
        }
        assertAddedInputSize(stateWriter, 0, 1);
        assertAddedInputSize(stateWriter, 1, 3);
        assertAddedInputSize(stateWriter, 2, 3);
        enqueue(gate, 2, barrier(checkpointId));
        enqueue(gate, 2, buildSomeBuffer());
        while (drainGate && gate.pollNext().isPresent()) {
        }
        assertAddedInputSize(stateWriter, 0, 1);
        assertAddedInputSize(stateWriter, 1, 3);
        assertAddedInputSize(stateWriter, 2, 3);
    } finally {
        bufferPool.destroy();
    }
}
Also used : RecordingChannelStateWriter(org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool)

Example 27 with RecordingChannelStateWriter

use of org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter in project flink-mirror by flink-ci.

the class CheckpointedInputGateTest method setupInputGateWithAlternatingController.

private CheckpointedInputGate setupInputGateWithAlternatingController(int numberOfChannels, NetworkBufferPool networkBufferPool, AbstractInvokable abstractInvokable, RecordingChannelStateWriter stateWriter) throws Exception {
    ConnectionManager connectionManager = new TestingConnectionManager();
    SingleInputGate singleInputGate = new SingleInputGateBuilder().setBufferPoolFactory(networkBufferPool.createBufferPool(numberOfChannels, Integer.MAX_VALUE)).setSegmentProvider(networkBufferPool).setChannelFactory((builder, gate) -> builder.setConnectionManager(connectionManager).buildRemoteChannel(gate)).setNumberOfChannels(numberOfChannels).setChannelStateWriter(stateWriter).build();
    singleInputGate.setup();
    MailboxExecutorImpl mailboxExecutor = new MailboxExecutorImpl(new TaskMailboxImpl(), 0, StreamTaskActionExecutor.IMMEDIATE);
    SingleCheckpointBarrierHandler barrierHandler = TestBarrierHandlerFactory.forTarget(abstractInvokable).create(singleInputGate, stateWriter);
    CheckpointedInputGate checkpointedInputGate = new CheckpointedInputGate(singleInputGate, barrierHandler, mailboxExecutor, UpstreamRecoveryTracker.forInputGate(singleInputGate));
    for (int i = 0; i < numberOfChannels; i++) {
        ((RemoteInputChannel) checkpointedInputGate.getChannel(i)).requestSubpartition();
    }
    return checkpointedInputGate;
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) TestingConnectionManager(org.apache.flink.runtime.io.network.TestingConnectionManager) SystemClock(org.apache.flink.util.clock.SystemClock) EndOfPartitionEvent(org.apache.flink.runtime.io.network.api.EndOfPartitionEvent) HashMap(java.util.HashMap) PartitionRequestClient(org.apache.flink.runtime.io.network.PartitionRequestClient) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool) CheckedThread(org.apache.flink.core.testutils.CheckedThread) Duration(java.time.Duration) TaskMailboxImpl(org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailboxImpl) BufferBuilderTestUtils.buildSomeBuffer(org.apache.flink.runtime.io.network.buffer.BufferBuilderTestUtils.buildSomeBuffer) InputChannelInfo(org.apache.flink.runtime.checkpoint.channel.InputChannelInfo) MockChannelStateWriter(org.apache.flink.runtime.checkpoint.channel.MockChannelStateWriter) SingleInputGate(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate) Before(org.junit.Before) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) EndOfChannelStateEvent(org.apache.flink.runtime.io.network.partition.consumer.EndOfChannelStateEvent) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) SingleInputGateBuilder(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateBuilder) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) EventSerializer(org.apache.flink.runtime.io.network.api.serialization.EventSerializer) AbstractEvent(org.apache.flink.runtime.event.AbstractEvent) RemoteInputChannel(org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel) ConnectionManager(org.apache.flink.runtime.io.network.ConnectionManager) Closer(org.apache.flink.shaded.guava30.com.google.common.io.Closer) InputChannelBuilder(org.apache.flink.runtime.io.network.partition.consumer.InputChannelBuilder) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) Assert.assertTrue(org.junit.Assert.assertTrue) ConnectionID(org.apache.flink.runtime.io.network.ConnectionID) Test(org.junit.Test) IOException(java.io.IOException) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) MailboxExecutorImpl(org.apache.flink.streaming.runtime.tasks.mailbox.MailboxExecutorImpl) Buffer(org.apache.flink.runtime.io.network.buffer.Buffer) CountDownLatch(java.util.concurrent.CountDownLatch) RecordingChannelStateWriter(org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) Assert.assertFalse(org.junit.Assert.assertFalse) Optional(java.util.Optional) TestingPartitionRequestClient(org.apache.flink.runtime.io.network.TestingPartitionRequestClient) DummyEnvironment(org.apache.flink.runtime.operators.testutils.DummyEnvironment) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) Assert.assertEquals(org.junit.Assert.assertEquals) StreamTaskActionExecutor(org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor) SingleInputGateBuilder(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateBuilder) MailboxExecutorImpl(org.apache.flink.streaming.runtime.tasks.mailbox.MailboxExecutorImpl) TestingConnectionManager(org.apache.flink.runtime.io.network.TestingConnectionManager) ConnectionManager(org.apache.flink.runtime.io.network.ConnectionManager) TaskMailboxImpl(org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailboxImpl) TestingConnectionManager(org.apache.flink.runtime.io.network.TestingConnectionManager) SingleInputGate(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate) RemoteInputChannel(org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel)

Aggregations

RecordingChannelStateWriter (org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter)27 Test (org.junit.Test)21 CheckpointBarrier (org.apache.flink.runtime.io.network.api.CheckpointBarrier)18 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)9 InputChannelInfo (org.apache.flink.runtime.checkpoint.channel.InputChannelInfo)9 Buffer (org.apache.flink.runtime.io.network.buffer.Buffer)9 CheckpointStorageLocationReference (org.apache.flink.runtime.state.CheckpointStorageLocationReference)9 BufferBuilderTestUtils.createFilledFinishedBufferConsumer (org.apache.flink.runtime.io.network.buffer.BufferBuilderTestUtils.createFilledFinishedBufferConsumer)6 BufferConsumer (org.apache.flink.runtime.io.network.buffer.BufferConsumer)6 NetworkBufferPool (org.apache.flink.runtime.io.network.buffer.NetworkBufferPool)6 InputChannelTestUtils.createLocalInputChannel (org.apache.flink.runtime.io.network.partition.InputChannelTestUtils.createLocalInputChannel)6 RemoteInputChannel (org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel)6 TestingResultPartitionManager (org.apache.flink.runtime.io.network.partition.consumer.SingleInputGateTest.TestingResultPartitionManager)6 TestCheckpointedInputGateBuilder (org.apache.flink.streaming.util.TestCheckpointedInputGateBuilder)4 IOException (java.io.IOException)3 Duration (java.time.Duration)3 HashMap (java.util.HashMap)3 Optional (java.util.Optional)3 CountDownLatch (java.util.concurrent.CountDownLatch)3 Deadline (org.apache.flink.api.common.time.Deadline)3