use of org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter in project flink by apache.
the class CheckpointedInputGateTest method testPersisting.
/**
* This tests a scenario where an older triggered checkpoint, was cancelled and a newer
* checkpoint was triggered very quickly after the cancellation. It can happen that a task can
* receive first the more recent checkpoint barrier and later the obsoleted one. This can happen
* for many reasons (for example Source tasks not running, or just a race condition with
* notifyCheckpointAborted RPCs) and Task should be able to handle this properly. In FLINK-21104
* the problem was that this obsoleted checkpoint barrier was causing a checkState to fail.
*/
public void testPersisting(boolean drainGate) throws Exception {
int numberOfChannels = 3;
NetworkBufferPool bufferPool = new NetworkBufferPool(numberOfChannels * 3, 1024);
try {
long checkpointId = 2L;
long obsoleteCheckpointId = 1L;
ValidatingCheckpointHandler validatingHandler = new ValidatingCheckpointHandler(checkpointId);
RecordingChannelStateWriter stateWriter = new RecordingChannelStateWriter();
CheckpointedInputGate gate = setupInputGateWithAlternatingController(numberOfChannels, bufferPool, validatingHandler, stateWriter);
// enqueue first checkpointId before obsoleteCheckpointId, so that we never trigger
// and also never cancel the obsoleteCheckpointId
enqueue(gate, 0, buildSomeBuffer());
enqueue(gate, 0, barrier(checkpointId));
enqueue(gate, 0, buildSomeBuffer());
enqueue(gate, 1, buildSomeBuffer());
enqueue(gate, 1, barrier(obsoleteCheckpointId));
enqueue(gate, 1, buildSomeBuffer());
enqueue(gate, 2, buildSomeBuffer());
assertEquals(0, validatingHandler.getTriggeredCheckpointCounter());
// trigger checkpoint
gate.pollNext();
assertEquals(1, validatingHandler.getTriggeredCheckpointCounter());
assertAddedInputSize(stateWriter, 0, 1);
assertAddedInputSize(stateWriter, 1, 2);
assertAddedInputSize(stateWriter, 2, 1);
enqueue(gate, 0, buildSomeBuffer());
enqueue(gate, 1, buildSomeBuffer());
enqueue(gate, 2, buildSomeBuffer());
while (drainGate && gate.pollNext().isPresent()) {
}
assertAddedInputSize(stateWriter, 0, 1);
assertAddedInputSize(stateWriter, 1, 3);
assertAddedInputSize(stateWriter, 2, 2);
enqueue(gate, 1, barrier(checkpointId));
enqueue(gate, 1, buildSomeBuffer());
// Another obsoleted barrier that should be ignored
enqueue(gate, 2, barrier(obsoleteCheckpointId));
enqueue(gate, 2, buildSomeBuffer());
while (drainGate && gate.pollNext().isPresent()) {
}
assertAddedInputSize(stateWriter, 0, 1);
assertAddedInputSize(stateWriter, 1, 3);
assertAddedInputSize(stateWriter, 2, 3);
enqueue(gate, 2, barrier(checkpointId));
enqueue(gate, 2, buildSomeBuffer());
while (drainGate && gate.pollNext().isPresent()) {
}
assertAddedInputSize(stateWriter, 0, 1);
assertAddedInputSize(stateWriter, 1, 3);
assertAddedInputSize(stateWriter, 2, 3);
} finally {
bufferPool.destroy();
}
}
use of org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter in project flink-mirror by flink-ci.
the class LocalInputChannelTest method testCheckpointingInflightData.
@Test
public void testCheckpointingInflightData() throws Exception {
SingleInputGate inputGate = new SingleInputGateBuilder().build();
PipelinedResultPartition parent = (PipelinedResultPartition) PartitionTestUtils.createPartition(ResultPartitionType.PIPELINED, NoOpFileChannelManager.INSTANCE);
ResultSubpartition subpartition = parent.getAllPartitions()[0];
ResultSubpartitionView subpartitionView = subpartition.createReadView(() -> {
});
TestingResultPartitionManager partitionManager = new TestingResultPartitionManager(subpartitionView);
final RecordingChannelStateWriter stateWriter = new RecordingChannelStateWriter();
LocalInputChannel channel = createLocalInputChannel(inputGate, partitionManager, 0, 0, b -> b.setStateWriter(stateWriter));
inputGate.setInputChannels(channel);
channel.requestSubpartition();
final CheckpointStorageLocationReference location = getDefault();
CheckpointOptions options = CheckpointOptions.unaligned(CheckpointType.CHECKPOINT, location);
stateWriter.start(0, options);
final CheckpointBarrier barrier = new CheckpointBarrier(0, 123L, options);
channel.checkpointStarted(barrier);
// add 1 buffer before barrier and 1 buffer afterwards. Only the first buffer should be
// written.
subpartition.add(createFilledFinishedBufferConsumer(1));
assertTrue(channel.getNextBuffer().isPresent());
subpartition.add(EventSerializer.toBufferConsumer(barrier, true));
assertTrue(channel.getNextBuffer().isPresent());
subpartition.add(createFilledFinishedBufferConsumer(2));
assertTrue(channel.getNextBuffer().isPresent());
assertArrayEquals(stateWriter.getAddedInput().get(channel.getChannelInfo()).stream().mapToInt(Buffer::getSize).toArray(), new int[] { 1 });
}
use of org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter in project flink by splunk.
the class ChannelStatePersisterTest method testNewBarrierNotOverwrittenByStopPersisting.
@Test
public void testNewBarrierNotOverwrittenByStopPersisting() throws Exception {
RecordingChannelStateWriter channelStateWriter = new RecordingChannelStateWriter();
InputChannelInfo channelInfo = new InputChannelInfo(0, 0);
ChannelStatePersister persister = new ChannelStatePersister(channelStateWriter, channelInfo);
long checkpointId = 1L;
channelStateWriter.start(checkpointId, CheckpointOptions.unaligned(CheckpointType.CHECKPOINT, getDefault()));
persister.checkForBarrier(barrier(checkpointId));
persister.startPersisting(checkpointId, Arrays.asList(buildSomeBuffer()));
assertEquals(1, channelStateWriter.getAddedInput().get(channelInfo).size());
persister.maybePersist(buildSomeBuffer());
assertEquals(1, channelStateWriter.getAddedInput().get(channelInfo).size());
// meanwhile, checkpoint coordinator timed out the 1st checkpoint and started the 2nd
// now task thread is picking up the barrier and aborts the 1st:
persister.checkForBarrier(barrier(checkpointId + 1));
persister.maybePersist(buildSomeBuffer());
persister.stopPersisting(checkpointId);
persister.maybePersist(buildSomeBuffer());
assertEquals(1, channelStateWriter.getAddedInput().get(channelInfo).size());
assertTrue(persister.hasBarrierReceived());
}
use of org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter in project flink by splunk.
the class PipelinedSubpartitionWithReadViewTest method testBarrierOvertaking.
@Test
public void testBarrierOvertaking() throws Exception {
final RecordingChannelStateWriter channelStateWriter = new RecordingChannelStateWriter();
subpartition.setChannelStateWriter(channelStateWriter);
subpartition.add(createFilledFinishedBufferConsumer(1));
assertEquals(0, availablityListener.getNumNotifications());
assertEquals(0, availablityListener.getNumPriorityEvents());
subpartition.add(createFilledFinishedBufferConsumer(2));
assertEquals(1, availablityListener.getNumNotifications());
assertEquals(0, availablityListener.getNumPriorityEvents());
BufferConsumer eventBuffer = EventSerializer.toBufferConsumer(EndOfSuperstepEvent.INSTANCE, false);
subpartition.add(eventBuffer);
assertEquals(1, availablityListener.getNumNotifications());
assertEquals(0, availablityListener.getNumPriorityEvents());
subpartition.add(createFilledFinishedBufferConsumer(4));
assertEquals(1, availablityListener.getNumNotifications());
assertEquals(0, availablityListener.getNumPriorityEvents());
CheckpointOptions options = CheckpointOptions.unaligned(CheckpointType.CHECKPOINT, new CheckpointStorageLocationReference(new byte[] { 0, 1, 2 }));
channelStateWriter.start(0, options);
BufferConsumer barrierBuffer = EventSerializer.toBufferConsumer(new CheckpointBarrier(0, 0, options), true);
subpartition.add(barrierBuffer);
assertEquals(1, availablityListener.getNumNotifications());
assertEquals(1, availablityListener.getNumPriorityEvents());
final List<Buffer> inflight = channelStateWriter.getAddedOutput().get(subpartition.getSubpartitionInfo());
assertEquals(Arrays.asList(1, 2, 4), inflight.stream().map(Buffer::getSize).collect(Collectors.toList()));
inflight.forEach(Buffer::recycleBuffer);
assertNextEvent(readView, barrierBuffer.getWrittenBytes(), CheckpointBarrier.class, true, 2, false, true);
assertNextBuffer(readView, 1, true, 1, false, true);
assertNextBuffer(readView, 2, true, 0, true, true);
assertNextEvent(readView, eventBuffer.getWrittenBytes(), EndOfSuperstepEvent.class, false, 0, false, true);
assertNextBuffer(readView, 4, false, 0, false, true);
assertNoNextBuffer(readView);
}
use of org.apache.flink.runtime.checkpoint.channel.RecordingChannelStateWriter in project flink by splunk.
the class CheckpointedInputGateTest method setupInputGateWithAlternatingController.
private CheckpointedInputGate setupInputGateWithAlternatingController(int numberOfChannels, NetworkBufferPool networkBufferPool, AbstractInvokable abstractInvokable, RecordingChannelStateWriter stateWriter) throws Exception {
ConnectionManager connectionManager = new TestingConnectionManager();
SingleInputGate singleInputGate = new SingleInputGateBuilder().setBufferPoolFactory(networkBufferPool.createBufferPool(numberOfChannels, Integer.MAX_VALUE)).setSegmentProvider(networkBufferPool).setChannelFactory((builder, gate) -> builder.setConnectionManager(connectionManager).buildRemoteChannel(gate)).setNumberOfChannels(numberOfChannels).setChannelStateWriter(stateWriter).build();
singleInputGate.setup();
MailboxExecutorImpl mailboxExecutor = new MailboxExecutorImpl(new TaskMailboxImpl(), 0, StreamTaskActionExecutor.IMMEDIATE);
SingleCheckpointBarrierHandler barrierHandler = TestBarrierHandlerFactory.forTarget(abstractInvokable).create(singleInputGate, stateWriter);
CheckpointedInputGate checkpointedInputGate = new CheckpointedInputGate(singleInputGate, barrierHandler, mailboxExecutor, UpstreamRecoveryTracker.forInputGate(singleInputGate));
for (int i = 0; i < numberOfChannels; i++) {
((RemoteInputChannel) checkpointedInputGate.getChannel(i)).requestSubpartition();
}
return checkpointedInputGate;
}
Aggregations