Search in sources :

Example 1 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class Task method notifyCheckpointComplete.

public void notifyCheckpointComplete(final long checkpointID) {
    AbstractInvokable invokable = this.invokable;
    if (executionState == ExecutionState.RUNNING && invokable != null) {
        if (invokable instanceof StatefulTask) {
            // build a local closure
            final StatefulTask statefulTask = (StatefulTask) invokable;
            final String taskName = taskNameWithSubtask;
            Runnable runnable = new Runnable() {

                @Override
                public void run() {
                    try {
                        statefulTask.notifyCheckpointComplete(checkpointID);
                    } catch (Throwable t) {
                        if (getExecutionState() == ExecutionState.RUNNING) {
                            // fail task if checkpoint confirmation failed.
                            failExternally(new RuntimeException("Error while confirming checkpoint", t));
                        }
                    }
                }
            };
            executeAsyncCallRunnable(runnable, "Checkpoint Confirmation for " + taskName);
        } else {
            LOG.error("Task received a checkpoint commit notification, but is not a checkpoint committing task - {}.", taskNameWithSubtask);
        }
    } else {
        LOG.debug("Ignoring checkpoint commit notification for non-running task {}.", taskNameWithSubtask);
    }
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)

Example 2 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierBufferTest method testMultiChannelAbortCheckpoint.

@Test
public void testMultiChannelAbortCheckpoint() throws Exception {
    BufferOrEvent[] sequence = { /* 0 */
    createBuffer(0), createBuffer(2), createBuffer(0), /* 3 */
    createBarrier(1, 1), createBarrier(1, 2), /* 5 */
    createBuffer(2), createBuffer(1), /* 7 */
    createBarrier(1, 0), /* 8 */
    createBuffer(0), createBuffer(2), /* 10 */
    createBarrier(2, 0), createBarrier(2, 2), /* 12 */
    createBuffer(0), createBuffer(2), /* 14 */
    createCancellationBarrier(2, 1), /* 15 */
    createBuffer(2), createBuffer(1), /* 17 */
    createBarrier(3, 1), createBarrier(3, 2), createBarrier(3, 0), /* 20 */
    createBuffer(0), createBuffer(1), /* 22 */
    createCancellationBarrier(4, 1), createBarrier(4, 2), /* 24 */
    createBuffer(0), /* 25 */
    createBarrier(4, 0), /* 26 */
    createBuffer(0), createBuffer(1), createBuffer(2), /* 29 */
    createBarrier(5, 2), createBarrier(5, 1), createBarrier(5, 0), /* 32 */
    createBuffer(0), createBuffer(1), /* 34 */
    createCancellationBarrier(6, 1), createCancellationBarrier(6, 2), /* 36 */
    createBarrier(6, 0), /* 37 */
    createBuffer(0) };
    MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
    BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER);
    StatefulTask toNotify = mock(StatefulTask.class);
    buffer.registerCheckpointEventHandler(toNotify);
    long startTs;
    // successful first checkpoint, with some aligned buffers
    check(sequence[0], buffer.getNextNonBlocked());
    check(sequence[1], buffer.getNextNonBlocked());
    check(sequence[2], buffer.getNextNonBlocked());
    startTs = System.nanoTime();
    check(sequence[5], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(1L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    validateAlignmentTime(startTs, buffer);
    check(sequence[6], buffer.getNextNonBlocked());
    check(sequence[8], buffer.getNextNonBlocked());
    check(sequence[9], buffer.getNextNonBlocked());
    // canceled checkpoint on last barrier
    startTs = System.nanoTime();
    check(sequence[12], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(2L), any(CheckpointDeclineOnCancellationBarrierException.class));
    validateAlignmentTime(startTs, buffer);
    check(sequence[13], buffer.getNextNonBlocked());
    // one more successful checkpoint
    check(sequence[15], buffer.getNextNonBlocked());
    check(sequence[16], buffer.getNextNonBlocked());
    startTs = System.nanoTime();
    check(sequence[20], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(3L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    validateAlignmentTime(startTs, buffer);
    check(sequence[21], buffer.getNextNonBlocked());
    // this checkpoint gets immediately canceled
    check(sequence[24], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(4L), any(CheckpointDeclineOnCancellationBarrierException.class));
    assertEquals(0L, buffer.getAlignmentDurationNanos());
    // some buffers
    check(sequence[26], buffer.getNextNonBlocked());
    check(sequence[27], buffer.getNextNonBlocked());
    check(sequence[28], buffer.getNextNonBlocked());
    // a simple successful checkpoint
    startTs = System.nanoTime();
    check(sequence[32], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(5L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    validateAlignmentTime(startTs, buffer);
    check(sequence[33], buffer.getNextNonBlocked());
    check(sequence[37], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(6L), any(CheckpointDeclineOnCancellationBarrierException.class));
    assertEquals(0L, buffer.getAlignmentDurationNanos());
    // all done
    assertNull(buffer.getNextNonBlocked());
    assertNull(buffer.getNextNonBlocked());
    buffer.cleanup();
    checkNoTempFilesRemain();
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointDeclineOnCancellationBarrierException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineOnCancellationBarrierException) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) Test(org.junit.Test)

Example 3 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierBufferTest method testSingleChannelAbortCheckpoint.

@Test
public void testSingleChannelAbortCheckpoint() throws Exception {
    BufferOrEvent[] sequence = { createBuffer(0), createBarrier(1, 0), createBuffer(0), createBarrier(2, 0), createCancellationBarrier(4, 0), createBarrier(5, 0), createBuffer(0), createCancellationBarrier(6, 0), createBuffer(0) };
    MockInputGate gate = new MockInputGate(PAGE_SIZE, 1, Arrays.asList(sequence));
    BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER);
    StatefulTask toNotify = mock(StatefulTask.class);
    buffer.registerCheckpointEventHandler(toNotify);
    check(sequence[0], buffer.getNextNonBlocked());
    check(sequence[2], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(1L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    assertEquals(0L, buffer.getAlignmentDurationNanos());
    check(sequence[6], buffer.getNextNonBlocked());
    assertEquals(5L, buffer.getCurrentCheckpointId());
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(2L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(4L), any(CheckpointDeclineOnCancellationBarrierException.class));
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(5L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    assertEquals(0L, buffer.getAlignmentDurationNanos());
    check(sequence[8], buffer.getNextNonBlocked());
    assertEquals(6L, buffer.getCurrentCheckpointId());
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(6L), any(CheckpointDeclineOnCancellationBarrierException.class));
    assertEquals(0L, buffer.getAlignmentDurationNanos());
    buffer.cleanup();
    checkNoTempFilesRemain();
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointDeclineOnCancellationBarrierException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineOnCancellationBarrierException) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) Test(org.junit.Test)

Example 4 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierTrackerTest method testInterleavedCancellationBarriers.

/**
	 * Tests that each checkpoint is only aborted once in case of an interleaved cancellation
	 * barrier arrival of two consecutive checkpoints.
	 */
@Test
public void testInterleavedCancellationBarriers() throws Exception {
    BufferOrEvent[] sequence = { createBarrier(1L, 0), createCancellationBarrier(2L, 0), createCancellationBarrier(1L, 1), createCancellationBarrier(2L, 1), createCancellationBarrier(1L, 2), createCancellationBarrier(2L, 2), createBuffer(0) };
    MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
    BarrierTracker tracker = new BarrierTracker(gate);
    StatefulTask statefulTask = mock(StatefulTask.class);
    tracker.registerCheckpointEventHandler(statefulTask);
    for (BufferOrEvent boe : sequence) {
        if (boe.isBuffer() || (boe.getEvent().getClass() != CheckpointBarrier.class && boe.getEvent().getClass() != CancelCheckpointMarker.class)) {
            assertEquals(boe, tracker.getNextNonBlocked());
        }
    }
    verify(statefulTask, times(1)).abortCheckpointOnBarrier(eq(1L), any(Throwable.class));
    verify(statefulTask, times(1)).abortCheckpointOnBarrier(eq(2L), any(Throwable.class));
}
Also used : CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CancelCheckpointMarker(org.apache.flink.runtime.io.network.api.CancelCheckpointMarker) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) Test(org.junit.Test)

Example 5 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierBufferAlignmentLimitTest method testAlignmentLimitWithQueuedAlignments.

/**
	 * This tests the following case:
	 *   - an alignment starts
	 *   - barriers from a second checkpoint queue before the first completes
	 *   - together they are larger than the threshold
	 *   - after the first checkpoint (with second checkpoint data queued) aborts, the second completes 
	 */
@Test
public void testAlignmentLimitWithQueuedAlignments() throws Exception {
    BufferOrEvent[] sequence = { /*  0 */
    createBuffer(1, 100), createBuffer(2, 70), /*  2 */
    createBarrier(3, 2), /*  3 */
    createBuffer(1, 100), createBuffer(2, 100), /*  5 */
    createBarrier(3, 0), /*  6 */
    createBuffer(0, 100), createBuffer(1, 100), /*  8 */
    createBarrier(4, 0), /*  9 */
    createBuffer(0, 100), createBuffer(0, 120), createBuffer(1, 100), /* 12 */
    createBuffer(2, 100), /* 13 */
    createBarrier(3, 1), /* 14 */
    createBarrier(4, 1), /* 15 */
    createBuffer(0, 100), createBuffer(1, 100), createBuffer(2, 100), /* 18 */
    createBarrier(4, 2), /* 19 */
    createBuffer(0, 100), createBuffer(1, 100), createBuffer(2, 100) };
    // the barrier buffer has a limit that only 1000 bytes may be spilled in alignment
    MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
    BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER, 500);
    StatefulTask toNotify = mock(StatefulTask.class);
    buffer.registerCheckpointEventHandler(toNotify);
    // validating the sequence of buffers
    long startTs;
    check(sequence[0], buffer.getNextNonBlocked());
    check(sequence[1], buffer.getNextNonBlocked());
    // start of checkpoint
    startTs = System.nanoTime();
    check(sequence[3], buffer.getNextNonBlocked());
    check(sequence[7], buffer.getNextNonBlocked());
    // next checkpoint also in progress
    check(sequence[11], buffer.getNextNonBlocked());
    // checkpoint alignment aborted due to too much data
    check(sequence[4], buffer.getNextNonBlocked());
    validateAlignmentTime(startTs, buffer);
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(3L), any(AlignmentLimitExceededException.class));
    // replay buffered data - in the middle, the alignment for checkpoint 4 starts
    check(sequence[6], buffer.getNextNonBlocked());
    startTs = System.nanoTime();
    check(sequence[12], buffer.getNextNonBlocked());
    // only checkpoint 4 is pending now - the last checkpoint 3 barrier will not trigger success 
    check(sequence[17], buffer.getNextNonBlocked());
    // checkpoint 4 completed - check and validate buffered replay
    check(sequence[9], buffer.getNextNonBlocked());
    validateAlignmentTime(startTs, buffer);
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(4L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    check(sequence[10], buffer.getNextNonBlocked());
    check(sequence[15], buffer.getNextNonBlocked());
    check(sequence[16], buffer.getNextNonBlocked());
    // trailing data
    check(sequence[19], buffer.getNextNonBlocked());
    check(sequence[20], buffer.getNextNonBlocked());
    check(sequence[21], buffer.getNextNonBlocked());
    // only checkpoint 4 was successfully completed, not checkpoint 3
    verify(toNotify, times(0)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(3L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    assertNull(buffer.getNextNonBlocked());
    assertNull(buffer.getNextNonBlocked());
    buffer.cleanup();
    checkNoTempFilesRemain();
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) AlignmentLimitExceededException(org.apache.flink.runtime.checkpoint.decline.AlignmentLimitExceededException) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) Test(org.junit.Test)

Aggregations

StatefulTask (org.apache.flink.runtime.jobgraph.tasks.StatefulTask)11 BufferOrEvent (org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent)9 Test (org.junit.Test)9 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)8 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)8 CheckpointDeclineOnCancellationBarrierException (org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineOnCancellationBarrierException)5 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)4 AlignmentLimitExceededException (org.apache.flink.runtime.checkpoint.decline.AlignmentLimitExceededException)2 CheckpointDeclineSubsumedException (org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineSubsumedException)2 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 Path (org.apache.flink.core.fs.Path)1 CheckpointDeclineTaskNotCheckpointingException (org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineTaskNotCheckpointingException)1