Search in sources :

Example 21 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointCoordinatorTest method testCheckpointStatsTrackerPendingCheckpointCallback.

/**
 * Tests that the pending checkpoint stats callbacks are created.
 */
@Test
public void testCheckpointStatsTrackerPendingCheckpointCallback() throws Exception {
    // set up the coordinator and validate the initial state
    CheckpointStatsTracker tracker = mock(CheckpointStatsTracker.class);
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setTimer(manuallyTriggeredScheduledExecutor).setCheckpointStatsTracker(tracker).build();
    when(tracker.reportPendingCheckpoint(anyLong(), anyLong(), any(CheckpointProperties.class), any(Map.class))).thenReturn(mock(PendingCheckpointStats.class));
    // Trigger a checkpoint and verify callback
    CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
    verify(tracker, times(1)).reportPendingCheckpoint(eq(1L), any(Long.class), eq(CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION)), any());
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) ArgumentMatchers.anyLong(org.mockito.ArgumentMatchers.anyLong) Map(java.util.Map) HashMap(java.util.HashMap) Collections.singletonMap(java.util.Collections.singletonMap) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Test(org.junit.Test)

Example 22 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointCoordinatorTest method testSavepointsAreNotSubsumed.

/**
 * Triggers a savepoint and two checkpoints. The second checkpoint completes and subsumes the
 * first checkpoint, but not the first savepoint. Then we trigger another checkpoint and
 * savepoint. The 2nd savepoint completes and subsumes the last checkpoint, but not the first
 * savepoint.
 */
@Test
public void testSavepointsAreNotSubsumed() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
    ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
    StandaloneCheckpointIDCounter counter = new StandaloneCheckpointIDCounter();
    // set up the coordinator and validate the initial state
    CheckpointCoordinator checkpointCoordinator = spy(new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setCheckpointIDCounter(counter).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(1)).setTimer(manuallyTriggeredScheduledExecutor).build());
    String savepointDir = tmpFolder.newFolder().getAbsolutePath();
    // Trigger savepoint and checkpoint
    CompletableFuture<CompletedCheckpoint> savepointFuture1 = checkpointCoordinator.triggerSavepoint(savepointDir, SavepointFormatType.CANONICAL);
    manuallyTriggeredScheduledExecutor.triggerAll();
    long savepointId1 = counter.getLast();
    assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
    CompletableFuture<CompletedCheckpoint> checkpointFuture1 = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertEquals(2, checkpointCoordinator.getNumberOfPendingCheckpoints());
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture1);
    CompletableFuture<CompletedCheckpoint> checkpointFuture2 = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture2);
    long checkpointId2 = counter.getLast();
    assertEquals(3, checkpointCoordinator.getNumberOfPendingCheckpoints());
    // 2nd checkpoint should subsume the 1st checkpoint, but not the savepoint
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId2), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId2), TASK_MANAGER_LOCATION_INFO);
    // no completed checkpoint before checkpointId2.
    verify(checkpointCoordinator, times(1)).sendAcknowledgeMessages(anyList(), eq(checkpointId2), anyLong(), eq(INVALID_CHECKPOINT_ID));
    assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    assertFalse(checkpointCoordinator.getPendingCheckpoints().get(savepointId1).isDisposed());
    assertFalse(savepointFuture1.isDone());
    CompletableFuture<CompletedCheckpoint> checkpointFuture3 = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture3);
    assertEquals(2, checkpointCoordinator.getNumberOfPendingCheckpoints());
    CompletableFuture<CompletedCheckpoint> savepointFuture2 = checkpointCoordinator.triggerSavepoint(savepointDir, SavepointFormatType.CANONICAL);
    manuallyTriggeredScheduledExecutor.triggerAll();
    long savepointId2 = counter.getLast();
    FutureUtils.throwIfCompletedExceptionally(savepointFuture2);
    assertEquals(3, checkpointCoordinator.getNumberOfPendingCheckpoints());
    // savepoints should not subsume checkpoints
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, savepointId2), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, savepointId2), TASK_MANAGER_LOCATION_INFO);
    // we do not send notify checkpoint complete for savepoints
    verify(checkpointCoordinator, times(0)).sendAcknowledgeMessages(anyList(), eq(savepointId2), anyLong(), anyLong());
    assertEquals(2, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    assertFalse(checkpointCoordinator.getPendingCheckpoints().get(savepointId1).isDisposed());
    assertFalse(savepointFuture1.isDone());
    assertNotNull(savepointFuture2.get());
    // Ack first savepoint
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, savepointId1), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, savepointId1), TASK_MANAGER_LOCATION_INFO);
    // we do not send notify checkpoint complete for savepoints
    verify(checkpointCoordinator, times(0)).sendAcknowledgeMessages(anyList(), eq(savepointId1), anyLong(), anyLong());
    assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    assertNotNull(savepointFuture1.get());
    CompletableFuture<CompletedCheckpoint> checkpointFuture4 = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture4);
    long checkpointId4 = counter.getLast();
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId4), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId4), TASK_MANAGER_LOCATION_INFO);
    // checkpoint2 would be subsumed.
    verify(checkpointCoordinator, times(1)).sendAcknowledgeMessages(anyList(), eq(checkpointId4), anyLong(), eq(checkpointId2));
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 23 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointCoordinatorTriggeringTest method testTriggerCheckpointInitializationFailed.

@Test
public void testTriggerCheckpointInitializationFailed() throws Exception {
    // set up the coordinator and validate the initial state
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setCheckpointIDCounter(new UnstableCheckpointIDCounter(id -> id == 0)).setTimer(manuallyTriggeredScheduledExecutor).build();
    checkpointCoordinator.startCheckpointScheduler();
    final CompletableFuture<CompletedCheckpoint> onCompletionPromise1 = triggerPeriodicCheckpoint(checkpointCoordinator);
    assertTrue(checkpointCoordinator.isTriggering());
    assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
    manuallyTriggeredScheduledExecutor.triggerAll();
    try {
        onCompletionPromise1.get();
        fail("This checkpoint should fail through UnstableCheckpointIDCounter");
    } catch (ExecutionException e) {
        final Optional<CheckpointException> checkpointExceptionOptional = ExceptionUtils.findThrowable(e, CheckpointException.class);
        assertTrue(checkpointExceptionOptional.isPresent());
        assertEquals(CheckpointFailureReason.TRIGGER_CHECKPOINT_FAILURE, checkpointExceptionOptional.get().getCheckpointFailureReason());
    }
    assertFalse(checkpointCoordinator.isTriggering());
    assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
    final CompletableFuture<CompletedCheckpoint> onCompletionPromise2 = triggerPeriodicCheckpoint(checkpointCoordinator);
    assertTrue(checkpointCoordinator.isTriggering());
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertFalse(onCompletionPromise2.isCompletedExceptionally());
    assertFalse(checkpointCoordinator.isTriggering());
    assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
}
Also used : OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) ExceptionUtils(org.apache.flink.util.ExceptionUtils) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) Assert.assertThat(org.junit.Assert.assertThat) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) TestLogger(org.apache.flink.util.TestLogger) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Assert.fail(org.junit.Assert.fail) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) Nullable(javax.annotation.Nullable) Before(org.junit.Before) Executor(java.util.concurrent.Executor) CheckpointCoordinatorConfigurationBuilder(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder) Predicate(java.util.function.Predicate) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Executors(java.util.concurrent.Executors) ExecutorUtils(org.apache.flink.util.ExecutorUtils) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) List(java.util.List) Rule(org.junit.Rule) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) Assert.assertFalse(org.junit.Assert.assertFalse) Optional(java.util.Optional) Matchers.is(org.hamcrest.Matchers.is) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) Optional(java.util.Optional) ExecutionException(java.util.concurrent.ExecutionException) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Test(org.junit.Test)

Example 24 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointCoordinatorTriggeringTest method testTriggerCheckpointRequestQueuedWithFailure.

@Test
public void testTriggerCheckpointRequestQueuedWithFailure() throws Exception {
    JobVertexID jobVertexID = new JobVertexID();
    CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTaskManagerGateway(gateway).build();
    ExecutionVertex vertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
    ExecutionAttemptID attemptID = vertex.getCurrentExecutionAttempt().getAttemptId();
    // set up the coordinator and validate the initial state
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointIDCounter(new UnstableCheckpointIDCounter(id -> id == 0)).setTimer(manuallyTriggeredScheduledExecutor).build();
    checkpointCoordinator.startCheckpointScheduler();
    // start a periodic checkpoint first
    final CompletableFuture<CompletedCheckpoint> onCompletionPromise1 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
    assertTrue(checkpointCoordinator.isTriggering());
    assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
    // another trigger before the prior one finished
    final CompletableFuture<CompletedCheckpoint> onCompletionPromise2 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
    // another trigger before the first one finished
    final CompletableFuture<CompletedCheckpoint> onCompletionPromise3 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
    assertTrue(checkpointCoordinator.isTriggering());
    assertEquals(2, checkpointCoordinator.getTriggerRequestQueue().size());
    manuallyTriggeredScheduledExecutor.triggerAll();
    // the first triggered checkpoint fails by design through UnstableCheckpointIDCounter
    assertTrue(onCompletionPromise1.isCompletedExceptionally());
    assertFalse(onCompletionPromise2.isCompletedExceptionally());
    assertFalse(onCompletionPromise3.isCompletedExceptionally());
    assertFalse(checkpointCoordinator.isTriggering());
    assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
    assertEquals(2, gateway.getTriggeredCheckpoints(attemptID).size());
}
Also used : OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) ExceptionUtils(org.apache.flink.util.ExceptionUtils) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) Assert.assertThat(org.junit.Assert.assertThat) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) TestLogger(org.apache.flink.util.TestLogger) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Assert.fail(org.junit.Assert.fail) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) Nullable(javax.annotation.Nullable) Before(org.junit.Before) Executor(java.util.concurrent.Executor) CheckpointCoordinatorConfigurationBuilder(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder) Predicate(java.util.function.Predicate) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Executors(java.util.concurrent.Executors) ExecutorUtils(org.apache.flink.util.ExecutorUtils) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) List(java.util.List) Rule(org.junit.Rule) SimpleVersionedSerializer(org.apache.flink.core.io.SimpleVersionedSerializer) Assert.assertFalse(org.junit.Assert.assertFalse) Optional(java.util.Optional) Matchers.is(org.hamcrest.Matchers.is) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 25 with CheckpointCoordinatorBuilder

use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.

the class CheckpointStateRestoreTest method testNonRestoredState.

/**
 * Tests that the allow non restored state flag is correctly handled.
 *
 * <p>The flag only applies for state that is part of the checkpoint.
 */
@Test
public void testNonRestoredState() throws Exception {
    // --- (1) Create tasks to restore checkpoint with ---
    JobVertexID jobVertexId1 = new JobVertexID();
    JobVertexID jobVertexId2 = new JobVertexID();
    OperatorID operatorId1 = OperatorID.fromJobVertexID(jobVertexId1);
    // 1st JobVertex
    ExecutionVertex vertex11 = mockExecutionVertex(mockExecution(), jobVertexId1, 0, 3);
    ExecutionVertex vertex12 = mockExecutionVertex(mockExecution(), jobVertexId1, 1, 3);
    ExecutionVertex vertex13 = mockExecutionVertex(mockExecution(), jobVertexId1, 2, 3);
    // 2nd JobVertex
    ExecutionVertex vertex21 = mockExecutionVertex(mockExecution(), jobVertexId2, 0, 2);
    ExecutionVertex vertex22 = mockExecutionVertex(mockExecution(), jobVertexId2, 1, 2);
    ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexId1, new ExecutionVertex[] { vertex11, vertex12, vertex13 });
    ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexId2, new ExecutionVertex[] { vertex21, vertex22 });
    Set<ExecutionJobVertex> tasks = new HashSet<>();
    tasks.add(jobVertex1);
    tasks.add(jobVertex2);
    CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().build();
    // --- (2) Checkpoint misses state for a jobVertex (should work) ---
    Map<OperatorID, OperatorState> checkpointTaskStates = new HashMap<>();
    {
        OperatorState taskState = new OperatorState(operatorId1, 3, 3);
        taskState.putState(0, OperatorSubtaskState.builder().build());
        taskState.putState(1, OperatorSubtaskState.builder().build());
        taskState.putState(2, OperatorSubtaskState.builder().build());
        checkpointTaskStates.put(operatorId1, taskState);
    }
    CompletedCheckpoint checkpoint = new CompletedCheckpoint(new JobID(), 0, 1, 2, new HashMap<>(checkpointTaskStates), Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    coord.getCheckpointStore().addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
    });
    assertTrue(coord.restoreLatestCheckpointedStateToAll(tasks, false));
    assertTrue(coord.restoreLatestCheckpointedStateToAll(tasks, true));
    // --- (3) JobVertex missing for task state that is part of the checkpoint ---
    JobVertexID newJobVertexID = new JobVertexID();
    OperatorID newOperatorID = OperatorID.fromJobVertexID(newJobVertexID);
    // There is no task for this
    {
        OperatorState taskState = new OperatorState(newOperatorID, 1, 1);
        taskState.putState(0, OperatorSubtaskState.builder().build());
        checkpointTaskStates.put(newOperatorID, taskState);
    }
    checkpoint = new CompletedCheckpoint(new JobID(), 1, 2, 3, new HashMap<>(checkpointTaskStates), Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    coord.getCheckpointStore().addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
    });
    // (i) Allow non restored state (should succeed)
    final boolean restored = coord.restoreLatestCheckpointedStateToAll(tasks, true);
    assertTrue(restored);
    // (ii) Don't allow non restored state (should fail)
    try {
        coord.restoreLatestCheckpointedStateToAll(tasks, false);
        fail("Did not throw the expected Exception.");
    } catch (IllegalStateException ignored) {
    }
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) JobID(org.apache.flink.api.common.JobID) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)46 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)41 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)40 Test (org.junit.Test)37 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)30 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)27 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)22 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)15 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)13 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)13 HashSet (java.util.HashSet)12 CheckpointCoordinatorConfiguration (org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration)12 ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)10 CompletableFuture (java.util.concurrent.CompletableFuture)9 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)9 HashMap (java.util.HashMap)8 CheckpointCoordinatorConfigurationBuilder (org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder)8 List (java.util.List)7 ExecutionException (java.util.concurrent.ExecutionException)7 JobID (org.apache.flink.api.common.JobID)7