use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.
the class Task method notifyCheckpointComplete.
public void notifyCheckpointComplete(final long checkpointID) {
AbstractInvokable invokable = this.invokable;
if (executionState == ExecutionState.RUNNING && invokable != null) {
if (invokable instanceof StatefulTask) {
// build a local closure
final StatefulTask statefulTask = (StatefulTask) invokable;
final String taskName = taskNameWithSubtask;
Runnable runnable = new Runnable() {
@Override
public void run() {
try {
statefulTask.notifyCheckpointComplete(checkpointID);
} catch (Throwable t) {
if (getExecutionState() == ExecutionState.RUNNING) {
// fail task if checkpoint confirmation failed.
failExternally(new RuntimeException("Error while confirming checkpoint", t));
}
}
}
};
executeAsyncCallRunnable(runnable, "Checkpoint Confirmation for " + taskName);
} else {
LOG.error("Task received a checkpoint commit notification, but is not a checkpoint committing task - {}.", taskNameWithSubtask);
}
} else {
LOG.debug("Ignoring checkpoint commit notification for non-running task {}.", taskNameWithSubtask);
}
}
use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.
the class BarrierBufferTest method testMultiChannelAbortCheckpoint.
@Test
public void testMultiChannelAbortCheckpoint() throws Exception {
BufferOrEvent[] sequence = { /* 0 */
createBuffer(0), createBuffer(2), createBuffer(0), /* 3 */
createBarrier(1, 1), createBarrier(1, 2), /* 5 */
createBuffer(2), createBuffer(1), /* 7 */
createBarrier(1, 0), /* 8 */
createBuffer(0), createBuffer(2), /* 10 */
createBarrier(2, 0), createBarrier(2, 2), /* 12 */
createBuffer(0), createBuffer(2), /* 14 */
createCancellationBarrier(2, 1), /* 15 */
createBuffer(2), createBuffer(1), /* 17 */
createBarrier(3, 1), createBarrier(3, 2), createBarrier(3, 0), /* 20 */
createBuffer(0), createBuffer(1), /* 22 */
createCancellationBarrier(4, 1), createBarrier(4, 2), /* 24 */
createBuffer(0), /* 25 */
createBarrier(4, 0), /* 26 */
createBuffer(0), createBuffer(1), createBuffer(2), /* 29 */
createBarrier(5, 2), createBarrier(5, 1), createBarrier(5, 0), /* 32 */
createBuffer(0), createBuffer(1), /* 34 */
createCancellationBarrier(6, 1), createCancellationBarrier(6, 2), /* 36 */
createBarrier(6, 0), /* 37 */
createBuffer(0) };
MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER);
StatefulTask toNotify = mock(StatefulTask.class);
buffer.registerCheckpointEventHandler(toNotify);
long startTs;
// successful first checkpoint, with some aligned buffers
check(sequence[0], buffer.getNextNonBlocked());
check(sequence[1], buffer.getNextNonBlocked());
check(sequence[2], buffer.getNextNonBlocked());
startTs = System.nanoTime();
check(sequence[5], buffer.getNextNonBlocked());
verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(1L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
validateAlignmentTime(startTs, buffer);
check(sequence[6], buffer.getNextNonBlocked());
check(sequence[8], buffer.getNextNonBlocked());
check(sequence[9], buffer.getNextNonBlocked());
// canceled checkpoint on last barrier
startTs = System.nanoTime();
check(sequence[12], buffer.getNextNonBlocked());
verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(2L), any(CheckpointDeclineOnCancellationBarrierException.class));
validateAlignmentTime(startTs, buffer);
check(sequence[13], buffer.getNextNonBlocked());
// one more successful checkpoint
check(sequence[15], buffer.getNextNonBlocked());
check(sequence[16], buffer.getNextNonBlocked());
startTs = System.nanoTime();
check(sequence[20], buffer.getNextNonBlocked());
verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(3L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
validateAlignmentTime(startTs, buffer);
check(sequence[21], buffer.getNextNonBlocked());
// this checkpoint gets immediately canceled
check(sequence[24], buffer.getNextNonBlocked());
verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(4L), any(CheckpointDeclineOnCancellationBarrierException.class));
assertEquals(0L, buffer.getAlignmentDurationNanos());
// some buffers
check(sequence[26], buffer.getNextNonBlocked());
check(sequence[27], buffer.getNextNonBlocked());
check(sequence[28], buffer.getNextNonBlocked());
// a simple successful checkpoint
startTs = System.nanoTime();
check(sequence[32], buffer.getNextNonBlocked());
verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(5L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
validateAlignmentTime(startTs, buffer);
check(sequence[33], buffer.getNextNonBlocked());
check(sequence[37], buffer.getNextNonBlocked());
verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(6L), any(CheckpointDeclineOnCancellationBarrierException.class));
assertEquals(0L, buffer.getAlignmentDurationNanos());
// all done
assertNull(buffer.getNextNonBlocked());
assertNull(buffer.getNextNonBlocked());
buffer.cleanup();
checkNoTempFilesRemain();
}
use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.
the class BarrierBufferTest method testSingleChannelAbortCheckpoint.
@Test
public void testSingleChannelAbortCheckpoint() throws Exception {
BufferOrEvent[] sequence = { createBuffer(0), createBarrier(1, 0), createBuffer(0), createBarrier(2, 0), createCancellationBarrier(4, 0), createBarrier(5, 0), createBuffer(0), createCancellationBarrier(6, 0), createBuffer(0) };
MockInputGate gate = new MockInputGate(PAGE_SIZE, 1, Arrays.asList(sequence));
BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER);
StatefulTask toNotify = mock(StatefulTask.class);
buffer.registerCheckpointEventHandler(toNotify);
check(sequence[0], buffer.getNextNonBlocked());
check(sequence[2], buffer.getNextNonBlocked());
verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(1L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
assertEquals(0L, buffer.getAlignmentDurationNanos());
check(sequence[6], buffer.getNextNonBlocked());
assertEquals(5L, buffer.getCurrentCheckpointId());
verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(2L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(4L), any(CheckpointDeclineOnCancellationBarrierException.class));
verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(5L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
assertEquals(0L, buffer.getAlignmentDurationNanos());
check(sequence[8], buffer.getNextNonBlocked());
assertEquals(6L, buffer.getCurrentCheckpointId());
verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(6L), any(CheckpointDeclineOnCancellationBarrierException.class));
assertEquals(0L, buffer.getAlignmentDurationNanos());
buffer.cleanup();
checkNoTempFilesRemain();
}
use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.
the class BarrierTrackerTest method testInterleavedCancellationBarriers.
/**
* Tests that each checkpoint is only aborted once in case of an interleaved cancellation
* barrier arrival of two consecutive checkpoints.
*/
@Test
public void testInterleavedCancellationBarriers() throws Exception {
BufferOrEvent[] sequence = { createBarrier(1L, 0), createCancellationBarrier(2L, 0), createCancellationBarrier(1L, 1), createCancellationBarrier(2L, 1), createCancellationBarrier(1L, 2), createCancellationBarrier(2L, 2), createBuffer(0) };
MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
BarrierTracker tracker = new BarrierTracker(gate);
StatefulTask statefulTask = mock(StatefulTask.class);
tracker.registerCheckpointEventHandler(statefulTask);
for (BufferOrEvent boe : sequence) {
if (boe.isBuffer() || (boe.getEvent().getClass() != CheckpointBarrier.class && boe.getEvent().getClass() != CancelCheckpointMarker.class)) {
assertEquals(boe, tracker.getNextNonBlocked());
}
}
verify(statefulTask, times(1)).abortCheckpointOnBarrier(eq(1L), any(Throwable.class));
verify(statefulTask, times(1)).abortCheckpointOnBarrier(eq(2L), any(Throwable.class));
}
use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.
the class BarrierBufferAlignmentLimitTest method testAlignmentLimitWithQueuedAlignments.
/**
* This tests the following case:
* - an alignment starts
* - barriers from a second checkpoint queue before the first completes
* - together they are larger than the threshold
* - after the first checkpoint (with second checkpoint data queued) aborts, the second completes
*/
@Test
public void testAlignmentLimitWithQueuedAlignments() throws Exception {
BufferOrEvent[] sequence = { /* 0 */
createBuffer(1, 100), createBuffer(2, 70), /* 2 */
createBarrier(3, 2), /* 3 */
createBuffer(1, 100), createBuffer(2, 100), /* 5 */
createBarrier(3, 0), /* 6 */
createBuffer(0, 100), createBuffer(1, 100), /* 8 */
createBarrier(4, 0), /* 9 */
createBuffer(0, 100), createBuffer(0, 120), createBuffer(1, 100), /* 12 */
createBuffer(2, 100), /* 13 */
createBarrier(3, 1), /* 14 */
createBarrier(4, 1), /* 15 */
createBuffer(0, 100), createBuffer(1, 100), createBuffer(2, 100), /* 18 */
createBarrier(4, 2), /* 19 */
createBuffer(0, 100), createBuffer(1, 100), createBuffer(2, 100) };
// the barrier buffer has a limit that only 1000 bytes may be spilled in alignment
MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER, 500);
StatefulTask toNotify = mock(StatefulTask.class);
buffer.registerCheckpointEventHandler(toNotify);
// validating the sequence of buffers
long startTs;
check(sequence[0], buffer.getNextNonBlocked());
check(sequence[1], buffer.getNextNonBlocked());
// start of checkpoint
startTs = System.nanoTime();
check(sequence[3], buffer.getNextNonBlocked());
check(sequence[7], buffer.getNextNonBlocked());
// next checkpoint also in progress
check(sequence[11], buffer.getNextNonBlocked());
// checkpoint alignment aborted due to too much data
check(sequence[4], buffer.getNextNonBlocked());
validateAlignmentTime(startTs, buffer);
verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(3L), any(AlignmentLimitExceededException.class));
// replay buffered data - in the middle, the alignment for checkpoint 4 starts
check(sequence[6], buffer.getNextNonBlocked());
startTs = System.nanoTime();
check(sequence[12], buffer.getNextNonBlocked());
// only checkpoint 4 is pending now - the last checkpoint 3 barrier will not trigger success
check(sequence[17], buffer.getNextNonBlocked());
// checkpoint 4 completed - check and validate buffered replay
check(sequence[9], buffer.getNextNonBlocked());
validateAlignmentTime(startTs, buffer);
verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(4L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
check(sequence[10], buffer.getNextNonBlocked());
check(sequence[15], buffer.getNextNonBlocked());
check(sequence[16], buffer.getNextNonBlocked());
// trailing data
check(sequence[19], buffer.getNextNonBlocked());
check(sequence[20], buffer.getNextNonBlocked());
check(sequence[21], buffer.getNextNonBlocked());
// only checkpoint 4 was successfully completed, not checkpoint 3
verify(toNotify, times(0)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(3L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
assertNull(buffer.getNextNonBlocked());
assertNull(buffer.getNextNonBlocked());
buffer.cleanup();
checkNoTempFilesRemain();
}
Aggregations