Search in sources :

Example 6 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierBufferAlignmentLimitTest method testBreakCheckpointAtAlignmentLimit.

// ------------------------------------------------------------------------
//  Tests
// ------------------------------------------------------------------------
/**
	 * This tests that a single alignment that buffers too much data cancels
	 */
@Test
public void testBreakCheckpointAtAlignmentLimit() throws Exception {
    BufferOrEvent[] sequence = { /*  0 */
    createBuffer(1, 100), createBuffer(2, 70), /*  2 */
    createBuffer(0, 42), createBuffer(2, 111), /*  4 */
    createBarrier(7, 1), /*  5 */
    createBuffer(1, 100), createBuffer(2, 200), createBuffer(1, 300), createBuffer(0, 50), /*  9 */
    createBarrier(7, 0), /* 10 */
    createBuffer(2, 100), createBuffer(0, 100), createBuffer(1, 200), createBuffer(0, 200), /* 14 */
    createBuffer(0, 101), /* 15 */
    createBuffer(0, 100), createBuffer(1, 100), createBuffer(2, 100), /* 18 */
    createBarrier(7, 2), /* 19 */
    createBuffer(0, 100), createBuffer(1, 100), createBuffer(2, 100) };
    // the barrier buffer has a limit that only 1000 bytes may be spilled in alignment
    MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
    BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER, 1000);
    StatefulTask toNotify = mock(StatefulTask.class);
    buffer.registerCheckpointEventHandler(toNotify);
    // validating the sequence of buffers
    check(sequence[0], buffer.getNextNonBlocked());
    check(sequence[1], buffer.getNextNonBlocked());
    check(sequence[2], buffer.getNextNonBlocked());
    check(sequence[3], buffer.getNextNonBlocked());
    // start of checkpoint
    long startTs = System.nanoTime();
    check(sequence[6], buffer.getNextNonBlocked());
    check(sequence[8], buffer.getNextNonBlocked());
    check(sequence[10], buffer.getNextNonBlocked());
    // trying to pull the next makes the alignment overflow - so buffered buffers are replayed
    check(sequence[5], buffer.getNextNonBlocked());
    validateAlignmentTime(startTs, buffer);
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(7L), any(AlignmentLimitExceededException.class));
    // playing back buffered events
    check(sequence[7], buffer.getNextNonBlocked());
    check(sequence[11], buffer.getNextNonBlocked());
    check(sequence[12], buffer.getNextNonBlocked());
    check(sequence[13], buffer.getNextNonBlocked());
    check(sequence[14], buffer.getNextNonBlocked());
    // the additional data
    check(sequence[15], buffer.getNextNonBlocked());
    check(sequence[16], buffer.getNextNonBlocked());
    check(sequence[17], buffer.getNextNonBlocked());
    check(sequence[19], buffer.getNextNonBlocked());
    check(sequence[20], buffer.getNextNonBlocked());
    check(sequence[21], buffer.getNextNonBlocked());
    // no call for a completed checkpoint must have happened
    verify(toNotify, times(0)).triggerCheckpointOnBarrier(any(CheckpointMetaData.class), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    assertNull(buffer.getNextNonBlocked());
    assertNull(buffer.getNextNonBlocked());
    buffer.cleanup();
    checkNoTempFilesRemain();
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) AlignmentLimitExceededException(org.apache.flink.runtime.checkpoint.decline.AlignmentLimitExceededException) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) Test(org.junit.Test)

Example 7 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierBufferTest method testIgnoreCancelBarrierIfCheckpointSubsumed.

/**
	 * This tests the where a cancellation barrier is received for a checkpoint already
	 * canceled due to receiving a newer checkpoint barrier.
	 */
@Test
public void testIgnoreCancelBarrierIfCheckpointSubsumed() throws Exception {
    BufferOrEvent[] sequence = { /*  0 */
    createBuffer(2), /*  1 */
    createBarrier(3, 1), createBarrier(3, 0), /*  3 */
    createBuffer(0), createBuffer(1), createBuffer(2), /*  6 */
    createBarrier(5, 2), /*  7 */
    createBuffer(2), createBuffer(1), createBuffer(0), /* 10 */
    createCancellationBarrier(3, 2), /* 11 */
    createBuffer(2), createBuffer(0), createBuffer(1), /* 14 */
    createBarrier(5, 0), createBarrier(5, 1), /* 16 */
    createBuffer(0), createBuffer(1), createBuffer(2) };
    MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
    BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER);
    StatefulTask toNotify = mock(StatefulTask.class);
    buffer.registerCheckpointEventHandler(toNotify);
    long startTs;
    // validate the sequence
    check(sequence[0], buffer.getNextNonBlocked());
    // beginning of first checkpoint
    check(sequence[5], buffer.getNextNonBlocked());
    // future barrier aborts checkpoint
    startTs = System.nanoTime();
    check(sequence[3], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(3L), any(CheckpointDeclineSubsumedException.class));
    check(sequence[4], buffer.getNextNonBlocked());
    // alignment of next checkpoint
    check(sequence[8], buffer.getNextNonBlocked());
    check(sequence[9], buffer.getNextNonBlocked());
    check(sequence[12], buffer.getNextNonBlocked());
    check(sequence[13], buffer.getNextNonBlocked());
    // checkpoint finished
    check(sequence[7], buffer.getNextNonBlocked());
    validateAlignmentTime(startTs, buffer);
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(5L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    check(sequence[11], buffer.getNextNonBlocked());
    // remaining data
    check(sequence[16], buffer.getNextNonBlocked());
    check(sequence[17], buffer.getNextNonBlocked());
    check(sequence[18], buffer.getNextNonBlocked());
    // all done
    assertNull(buffer.getNextNonBlocked());
    assertNull(buffer.getNextNonBlocked());
    buffer.cleanup();
    checkNoTempFilesRemain();
    // check overall notifications
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(any(CheckpointMetaData.class), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    verify(toNotify, times(1)).abortCheckpointOnBarrier(anyLong(), any(Throwable.class));
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) CheckpointDeclineSubsumedException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineSubsumedException) Test(org.junit.Test)

Example 8 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierBufferTest method testMultiChannelSkippingCheckpoints.

/**
	 * Validates that the buffer skips over the current checkpoint if it
	 * receives a barrier from a later checkpoint on a non-blocked input.
	 */
@Test
public void testMultiChannelSkippingCheckpoints() {
    try {
        BufferOrEvent[] sequence = { // checkpoint 1 - with blocked data
        createBuffer(0), createBuffer(2), createBuffer(0), createBarrier(1, 1), createBarrier(1, 2), createBuffer(2), createBuffer(1), createBuffer(0), createBarrier(1, 0), createBuffer(1), createBuffer(0), // checkpoint 2 will not complete: pre-mature barrier from checkpoint 3
        createBarrier(2, 1), createBuffer(1), createBuffer(2), createBarrier(2, 0), createBuffer(2), createBuffer(0), createBarrier(3, 2), createBuffer(2), createBuffer(1), createEndOfPartition(1), createBuffer(2), createEndOfPartition(2), createBuffer(0), createEndOfPartition(0) };
        MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
        BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER);
        StatefulTask toNotify = mock(StatefulTask.class);
        buffer.registerCheckpointEventHandler(toNotify);
        long startTs;
        // initial data
        check(sequence[0], buffer.getNextNonBlocked());
        check(sequence[1], buffer.getNextNonBlocked());
        check(sequence[2], buffer.getNextNonBlocked());
        // align checkpoint 1
        startTs = System.nanoTime();
        check(sequence[7], buffer.getNextNonBlocked());
        assertEquals(1L, buffer.getCurrentCheckpointId());
        // checkpoint done - replay buffered
        check(sequence[5], buffer.getNextNonBlocked());
        validateAlignmentTime(startTs, buffer);
        verify(toNotify).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(1L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
        check(sequence[6], buffer.getNextNonBlocked());
        check(sequence[9], buffer.getNextNonBlocked());
        check(sequence[10], buffer.getNextNonBlocked());
        // alignment of checkpoint 2
        startTs = System.nanoTime();
        check(sequence[13], buffer.getNextNonBlocked());
        check(sequence[15], buffer.getNextNonBlocked());
        // checkpoint 2 aborted, checkpoint 3 started
        check(sequence[12], buffer.getNextNonBlocked());
        assertEquals(3L, buffer.getCurrentCheckpointId());
        validateAlignmentTime(startTs, buffer);
        verify(toNotify).abortCheckpointOnBarrier(eq(2L), any(CheckpointDeclineSubsumedException.class));
        check(sequence[16], buffer.getNextNonBlocked());
        // checkpoint 3 alignment in progress
        check(sequence[19], buffer.getNextNonBlocked());
        // checkpoint 3 aborted (end of partition)
        check(sequence[20], buffer.getNextNonBlocked());
        verify(toNotify).abortCheckpointOnBarrier(eq(3L), any(CheckpointDeclineSubsumedException.class));
        // replay buffered data from checkpoint 3
        check(sequence[18], buffer.getNextNonBlocked());
        // all the remaining messages
        check(sequence[21], buffer.getNextNonBlocked());
        check(sequence[22], buffer.getNextNonBlocked());
        check(sequence[23], buffer.getNextNonBlocked());
        check(sequence[24], buffer.getNextNonBlocked());
        assertNull(buffer.getNextNonBlocked());
        assertNull(buffer.getNextNonBlocked());
        buffer.cleanup();
        checkNoTempFilesRemain();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointDeclineOnCancellationBarrierException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineOnCancellationBarrierException) CheckpointDeclineSubsumedException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineSubsumedException) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) CheckpointDeclineSubsumedException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineSubsumedException) Test(org.junit.Test)

Example 9 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierBufferTest method testAbortViaQueuedBarriers.

@Test
public void testAbortViaQueuedBarriers() throws Exception {
    BufferOrEvent[] sequence = { /* 0 */
    createBuffer(1), /* 1 */
    createBarrier(1, 1), createBarrier(1, 2), /* 3 */
    createBuffer(2), createBuffer(0), createBuffer(1), /* 6 */
    createCancellationBarrier(2, 2), /* 7 */
    createBarrier(2, 1), /* 8 */
    createBuffer(0), createBuffer(1), createBuffer(2), /* 11 */
    createBarrier(1, 0), /* 12 */
    createBuffer(2), createBuffer(1), createBuffer(0), /* 15 */
    createBarrier(2, 0), /* 16 */
    createBuffer(0), createBuffer(1), createBuffer(2) };
    MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
    BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER);
    StatefulTask toNotify = mock(StatefulTask.class);
    buffer.registerCheckpointEventHandler(toNotify);
    long startTs;
    check(sequence[0], buffer.getNextNonBlocked());
    // starting first checkpoint
    startTs = System.nanoTime();
    check(sequence[4], buffer.getNextNonBlocked());
    check(sequence[8], buffer.getNextNonBlocked());
    // finished first checkpoint
    check(sequence[3], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(1L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    validateAlignmentTime(startTs, buffer);
    check(sequence[5], buffer.getNextNonBlocked());
    // re-read the queued cancellation barriers
    check(sequence[9], buffer.getNextNonBlocked());
    verify(toNotify, times(1)).abortCheckpointOnBarrier(eq(2L), any(CheckpointDeclineOnCancellationBarrierException.class));
    assertEquals(0L, buffer.getAlignmentDurationNanos());
    check(sequence[10], buffer.getNextNonBlocked());
    check(sequence[12], buffer.getNextNonBlocked());
    check(sequence[13], buffer.getNextNonBlocked());
    check(sequence[14], buffer.getNextNonBlocked());
    check(sequence[16], buffer.getNextNonBlocked());
    check(sequence[17], buffer.getNextNonBlocked());
    check(sequence[18], buffer.getNextNonBlocked());
    // no further alignment should have happened
    assertEquals(0L, buffer.getAlignmentDurationNanos());
    // no further checkpoint (abort) notifications
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(any(CheckpointMetaData.class), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    verify(toNotify, times(1)).abortCheckpointOnBarrier(anyLong(), any(CheckpointDeclineOnCancellationBarrierException.class));
    // all done
    assertNull(buffer.getNextNonBlocked());
    assertNull(buffer.getNextNonBlocked());
    buffer.cleanup();
    checkNoTempFilesRemain();
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CheckpointDeclineOnCancellationBarrierException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineOnCancellationBarrierException) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) Test(org.junit.Test)

Example 10 with StatefulTask

use of org.apache.flink.runtime.jobgraph.tasks.StatefulTask in project flink by apache.

the class BarrierBufferTest method testAbortWhileHavingQueuedBarriers.

/**
	 * This tests the where a replay of queued checkpoint barriers meets
	 * a canceled checkpoint.
	 *
	 * The replayed newer checkpoint barrier must not try to cancel the
	 * already canceled checkpoint.
	 */
@Test
public void testAbortWhileHavingQueuedBarriers() throws Exception {
    BufferOrEvent[] sequence = { /*  0 */
    createBuffer(1), /*  1 */
    createBarrier(1, 1), /*  2 */
    createBuffer(2), createBuffer(0), createBuffer(1), /*  5 */
    createBarrier(2, 1), /*  6 */
    createBuffer(2), createBuffer(1), /*  8 */
    createCancellationBarrier(1, 0), /*  9 */
    createBuffer(2), createBuffer(1), createBuffer(0), /* 12 */
    createBarrier(1, 2), /* 13 */
    createBuffer(0), createBuffer(1), createBuffer(2), /* 16 */
    createBarrier(2, 0), createBarrier(2, 2), /* 18 */
    createBuffer(0), createBuffer(1), createBuffer(2) };
    MockInputGate gate = new MockInputGate(PAGE_SIZE, 3, Arrays.asList(sequence));
    BarrierBuffer buffer = new BarrierBuffer(gate, IO_MANAGER);
    StatefulTask toNotify = mock(StatefulTask.class);
    buffer.registerCheckpointEventHandler(toNotify);
    long startTs;
    check(sequence[0], buffer.getNextNonBlocked());
    // starting first checkpoint
    startTs = System.nanoTime();
    check(sequence[2], buffer.getNextNonBlocked());
    check(sequence[3], buffer.getNextNonBlocked());
    check(sequence[6], buffer.getNextNonBlocked());
    // cancelled by cancellation barrier
    check(sequence[4], buffer.getNextNonBlocked());
    validateAlignmentTime(startTs, buffer);
    verify(toNotify).abortCheckpointOnBarrier(eq(1L), any(CheckpointDeclineOnCancellationBarrierException.class));
    // the next checkpoint alignment starts now
    startTs = System.nanoTime();
    check(sequence[9], buffer.getNextNonBlocked());
    check(sequence[11], buffer.getNextNonBlocked());
    check(sequence[13], buffer.getNextNonBlocked());
    check(sequence[15], buffer.getNextNonBlocked());
    // checkpoint done
    check(sequence[7], buffer.getNextNonBlocked());
    validateAlignmentTime(startTs, buffer);
    verify(toNotify).triggerCheckpointOnBarrier(argThat(new CheckpointMatcher(2L)), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    // queued data
    check(sequence[10], buffer.getNextNonBlocked());
    check(sequence[14], buffer.getNextNonBlocked());
    // trailing data
    check(sequence[18], buffer.getNextNonBlocked());
    check(sequence[19], buffer.getNextNonBlocked());
    check(sequence[20], buffer.getNextNonBlocked());
    // all done
    assertNull(buffer.getNextNonBlocked());
    assertNull(buffer.getNextNonBlocked());
    buffer.cleanup();
    checkNoTempFilesRemain();
    // check overall notifications
    verify(toNotify, times(1)).triggerCheckpointOnBarrier(any(CheckpointMetaData.class), any(CheckpointOptions.class), any(CheckpointMetrics.class));
    verify(toNotify, times(1)).abortCheckpointOnBarrier(anyLong(), any(Throwable.class));
}
Also used : StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CheckpointDeclineOnCancellationBarrierException(org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineOnCancellationBarrierException) BufferOrEvent(org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent) Test(org.junit.Test)

Aggregations

StatefulTask (org.apache.flink.runtime.jobgraph.tasks.StatefulTask)12 BufferOrEvent (org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent)9 Test (org.junit.Test)9 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)8 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)8 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)5 CheckpointDeclineOnCancellationBarrierException (org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineOnCancellationBarrierException)5 AbstractInvokable (org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable)3 IOException (java.io.IOException)2 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)2 TimeoutException (java.util.concurrent.TimeoutException)2 AlignmentLimitExceededException (org.apache.flink.runtime.checkpoint.decline.AlignmentLimitExceededException)2 CheckpointDeclineSubsumedException (org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineSubsumedException)2 CheckpointDeclineTaskNotCheckpointingException (org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineTaskNotCheckpointingException)2 CheckpointDeclineTaskNotReadyException (org.apache.flink.runtime.checkpoint.decline.CheckpointDeclineTaskNotReadyException)2 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)2 PartitionProducerDisposedException (org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ExecutorService (java.util.concurrent.ExecutorService)1