Search in sources :

Example 16 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorRestoringTest method testJobGraphModificationsAreCheckedForInitialCheckpoint.

@Test
public void testJobGraphModificationsAreCheckedForInitialCheckpoint() throws Exception {
    final JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID, 1, 1).build();
    CompletedCheckpointStore completedCheckpointStore = new EmbeddedCompletedCheckpointStore();
    CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(graph.getJobID(), 2, System.currentTimeMillis(), System.currentTimeMillis() + 3000, Collections.emptyMap(), Collections.emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    completedCheckpointStore.addCheckpointAndSubsumeOldestOne(completedCheckpoint, new CheckpointsCleaner(), () -> {
    });
    BooleanValue checked = new BooleanValue(false);
    CheckpointCoordinator restoreCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(completedCheckpointStore).setVertexFinishedStateCheckerFactory((vertices, states) -> new VertexFinishedStateChecker(vertices, states) {

        @Override
        public void validateOperatorsFinishedState() {
            checked.set(true);
        }
    }).build();
    restoreCoordinator.restoreInitialCheckpointIfPresent(new HashSet<>(graph.getAllVertices().values()));
    assertTrue("The finished states should be checked when job is restored on startup", checked.get());
}
Also used : CheckpointCoordinatorTestingUtils.generatePartitionableStateHandle(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.generatePartitionableStateHandle) BooleanValue(org.apache.flink.types.BooleanValue) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ArgumentMatchers.eq(org.mockito.ArgumentMatchers.eq) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) SharedStateRegistry(org.apache.flink.runtime.state.SharedStateRegistry) Random(java.util.Random) CheckpointCoordinatorTestingUtils.mockSubtaskState(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.mockSubtaskState) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Collections.singletonList(java.util.Collections.singletonList) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) StateHandleDummyUtil.createNewResultSubpartitionStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewResultSubpartitionStateHandle) Assert.fail(org.junit.Assert.fail) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) CheckpointCoordinatorConfigurationBuilder(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder) Collections.emptyList(java.util.Collections.emptyList) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) StateHandleDummyUtil.createNewInputChannelStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewInputChannelStateHandle) Stream(java.util.stream.Stream) Assert.assertFalse(org.junit.Assert.assertFalse) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) CheckpointCoordinatorTestingUtils.generateChainedPartitionableStateHandle(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.generateChainedPartitionableStateHandle) JobStatus(org.apache.flink.api.common.JobStatus) ArrayList(java.util.ArrayList) Execution(org.apache.flink.runtime.executiongraph.Execution) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) Iterables(org.apache.flink.shaded.guava30.com.google.common.collect.Iterables) Before(org.junit.Before) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Mockito.times(org.mockito.Mockito.times) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair) CheckpointCoordinatorTestingUtils.generateKeyGroupState(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.generateKeyGroupState) File(java.io.File) Mockito.verify(org.mockito.Mockito.verify) CheckpointCoordinatorTestingUtils.comparePartitionableState(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.comparePartitionableState) Executors(org.apache.flink.util.concurrent.Executors) Rule(org.junit.Rule) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Assert(org.junit.Assert) CheckpointCoordinatorTestingUtils.compareKeyedState(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.compareKeyedState) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) CheckpointCoordinatorTestingUtils.verifyStateRestore(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.verifyStateRestore) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) BooleanValue(org.apache.flink.types.BooleanValue) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 17 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testMaxConcurrentAttemptsWithSubsumption.

@Test
public void testMaxConcurrentAttemptsWithSubsumption() throws Exception {
    final int maxConcurrentAttempts = 2;
    JobVertexID jobVertexID1 = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setCheckpointInterval(// periodic interval is 10 ms
    10).setCheckpointTimeout(// timeout is very long (200 s)
    200000).setMinPauseBetweenCheckpoints(// no extra delay
    0L).setMaxConcurrentCheckpoints(maxConcurrentAttempts).build();
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
    checkpointCoordinator.startCheckpointScheduler();
    do {
        manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
        manuallyTriggeredScheduledExecutor.triggerAll();
    } while (checkpointCoordinator.getNumberOfPendingCheckpoints() < maxConcurrentAttempts);
    // validate that the pending checkpoints are there
    assertEquals(maxConcurrentAttempts, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertNotNull(checkpointCoordinator.getPendingCheckpoints().get(1L));
    assertNotNull(checkpointCoordinator.getPendingCheckpoints().get(2L));
    // now we acknowledge the second checkpoint, which should subsume the first checkpoint
    // and allow two more checkpoints to be triggered
    // now, once we acknowledge one checkpoint, it should trigger the next one
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, 2L), TASK_MANAGER_LOCATION_INFO);
    // after a while, there should be the new checkpoints
    do {
        manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
        manuallyTriggeredScheduledExecutor.triggerAll();
    } while (checkpointCoordinator.getNumberOfPendingCheckpoints() < maxConcurrentAttempts);
    // do the final check
    assertEquals(maxConcurrentAttempts, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertNotNull(checkpointCoordinator.getPendingCheckpoints().get(3L));
    assertNotNull(checkpointCoordinator.getPendingCheckpoints().get(4L));
    checkpointCoordinator.shutdown();
}
Also used : AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) Test(org.junit.Test)

Example 18 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testTasksFinishDuringTriggering.

@Test
public void testTasksFinishDuringTriggering() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().setTransitToRunning(false).addJobVertex(jobVertexID1, 1, 256).addJobVertex(jobVertexID2, 1, 256).build();
    ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
    ExecutionVertex taskVertex = jobVertex1.getTaskVertices()[0];
    ExecutionJobVertex jobVertex2 = graph.getJobVertex(jobVertexID2);
    ExecutionVertex taskVertex2 = jobVertex2.getTaskVertices()[0];
    AtomicBoolean checkpointAborted = new AtomicBoolean(false);
    LogicalSlot slot1 = new TestingLogicalSlotBuilder().setTaskManagerGateway(new SimpleAckingTaskManagerGateway() {

        @Override
        public CompletableFuture<Acknowledge> triggerCheckpoint(ExecutionAttemptID executionAttemptID, JobID jobId, long checkpointId, long timestamp, CheckpointOptions checkpointOptions) {
            taskVertex.getCurrentExecutionAttempt().markFinished();
            return FutureUtils.completedExceptionally(new RpcException(""));
        }
    }).createTestingLogicalSlot();
    LogicalSlot slot2 = new TestingLogicalSlotBuilder().setTaskManagerGateway(new SimpleAckingTaskManagerGateway() {

        @Override
        public void notifyCheckpointAborted(ExecutionAttemptID executionAttemptID, JobID jobId, long checkpointId, long latestCompletedCheckpointId, long timestamp) {
            checkpointAborted.set(true);
        }
    }).createTestingLogicalSlot();
    ExecutionGraphTestUtils.setVertexResource(taskVertex, slot1);
    taskVertex.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING);
    ExecutionGraphTestUtils.setVertexResource(taskVertex2, slot2);
    taskVertex2.getCurrentExecutionAttempt().transitionState(ExecutionState.RUNNING);
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).setAllowCheckpointsAfterTasksFinished(true).build();
    // nothing should be happening
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    // trigger the first checkpoint. this will not fail because we allow checkpointing even with
    // finished tasks
    final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertTrue(checkpointFuture.isCompletedExceptionally());
    assertTrue(checkpointAborted.get());
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) RpcException(org.apache.flink.runtime.rpc.exceptions.RpcException) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 19 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testMinCheckpointPause.

@Test
public void testMinCheckpointPause() throws Exception {
    // will use a different thread to allow checkpoint triggering before exiting from
    // receiveAcknowledgeMessage
    ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor();
    CheckpointCoordinator coordinator = null;
    try {
        int pause = 1000;
        JobVertexID jobVertexId = new JobVertexID();
        ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexId).setMainThreadExecutor(ComponentMainThreadExecutorServiceAdapter.forSingleThreadExecutor(new DirectScheduledExecutorService())).build();
        ExecutionVertex vertex = graph.getJobVertex(jobVertexId).getTaskVertices()[0];
        ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
        coordinator = new CheckpointCoordinatorBuilder().setTimer(new ScheduledExecutorServiceAdapter(executorService)).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(pause).setCheckpointTimeout(Long.MAX_VALUE).setMaxConcurrentCheckpoints(1).setMinPauseBetweenCheckpoints(pause).build()).setExecutionGraph(graph).build();
        coordinator.startCheckpointScheduler();
        coordinator.triggerCheckpoint(// trigger, execute, and later complete by receiveAcknowledgeMessage
        true);
        coordinator.triggerCheckpoint(// enqueue and later see if it gets executed in the middle of
        true);
        // receiveAcknowledgeMessage
        while (coordinator.getNumberOfPendingCheckpoints() == 0) {
            // wait for at least 1 request to be fully processed
            Thread.sleep(10);
        }
        coordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptId, 1L), TASK_MANAGER_LOCATION_INFO);
        Thread.sleep(pause / 2);
        assertEquals(0, coordinator.getNumberOfPendingCheckpoints());
        // make sure that the 2nd request is eventually processed
        while (coordinator.getNumberOfPendingCheckpoints() == 0) {
            Thread.sleep(1);
        }
    } finally {
        if (coordinator != null) {
            coordinator.shutdown();
        }
        executorService.shutdownNow();
    }
}
Also used : DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 20 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorTest method testTriggerAndDeclineCheckpointSimple.

/**
 * This test triggers a checkpoint and then sends a decline checkpoint message from one of the
 * tasks. The expected behaviour is that said checkpoint is discarded and a new checkpoint is
 * triggered.
 */
private void testTriggerAndDeclineCheckpointSimple(CheckpointFailureReason checkpointFailureReason) throws Exception {
    final CheckpointException checkpointException = new CheckpointException(checkpointFailureReason);
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).setTaskManagerGateway(gateway).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
    ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
    TestFailJobCallback failJobCallback = new TestFailJobCallback();
    // set up the coordinator and validate the initial state
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setAlignedCheckpointTimeout(Long.MAX_VALUE).setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCheckpointFailureManager(new CheckpointFailureManager(0, failJobCallback)).build();
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    // trigger the first checkpoint. this should succeed
    final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
    // validate that we have a pending checkpoint
    assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    // we have one task scheduled that will cancel after timeout
    assertEquals(1, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
    long checkpointId = checkpointCoordinator.getPendingCheckpoints().entrySet().iterator().next().getKey();
    PendingCheckpoint checkpoint = checkpointCoordinator.getPendingCheckpoints().get(checkpointId);
    assertNotNull(checkpoint);
    assertEquals(checkpointId, checkpoint.getCheckpointId());
    assertEquals(graph.getJobID(), checkpoint.getJobId());
    assertEquals(2, checkpoint.getNumberOfNonAcknowledgedTasks());
    assertEquals(0, checkpoint.getNumberOfAcknowledgedTasks());
    assertEquals(0, checkpoint.getOperatorStates().size());
    assertFalse(checkpoint.isDisposed());
    assertFalse(checkpoint.areTasksFullyAcknowledged());
    // check that the vertices received the trigger checkpoint message
    for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
        CheckpointCoordinatorTestingUtils.TriggeredCheckpoint triggeredCheckpoint = gateway.getOnlyTriggeredCheckpoint(vertex.getCurrentExecutionAttempt().getAttemptId());
        assertEquals(checkpointId, triggeredCheckpoint.checkpointId);
        assertEquals(checkpoint.getCheckpointTimestamp(), triggeredCheckpoint.timestamp);
        assertEquals(CheckpointOptions.forCheckpointWithDefaultLocation(), triggeredCheckpoint.checkpointOptions);
    }
    // acknowledge from one of the tasks
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId), "Unknown location");
    assertEquals(1, checkpoint.getNumberOfAcknowledgedTasks());
    assertEquals(1, checkpoint.getNumberOfNonAcknowledgedTasks());
    assertFalse(checkpoint.isDisposed());
    assertFalse(checkpoint.areTasksFullyAcknowledged());
    // acknowledge the same task again (should not matter)
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId), "Unknown location");
    assertFalse(checkpoint.isDisposed());
    assertFalse(checkpoint.areTasksFullyAcknowledged());
    // decline checkpoint from the other task, this should cancel the checkpoint
    // and trigger a new one
    checkpointCoordinator.receiveDeclineMessage(new DeclineCheckpoint(graph.getJobID(), attemptID1, checkpointId, checkpointException), TASK_MANAGER_LOCATION_INFO);
    assertTrue(checkpoint.isDisposed());
    // the canceler is also removed
    assertEquals(0, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
    // validate that we have no new pending checkpoint
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    // decline again, nothing should happen
    // decline from the other task, nothing should happen
    checkpointCoordinator.receiveDeclineMessage(new DeclineCheckpoint(graph.getJobID(), attemptID1, checkpointId, checkpointException), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.receiveDeclineMessage(new DeclineCheckpoint(graph.getJobID(), attemptID2, checkpointId, checkpointException), TASK_MANAGER_LOCATION_INFO);
    assertTrue(checkpoint.isDisposed());
    assertEquals(1, failJobCallback.getInvokeCounter());
    checkpointCoordinator.shutdown();
}
Also used : DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph)

Aggregations

ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)120 Test (org.junit.Test)96 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)77 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)53 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)40 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)36 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)35 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)31 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)24 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)24 HashMap (java.util.HashMap)20 CompletableFuture (java.util.concurrent.CompletableFuture)19 JobID (org.apache.flink.api.common.JobID)19 ArrayList (java.util.ArrayList)17 HashSet (java.util.HashSet)17 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)17 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)17 ExecutionException (java.util.concurrent.ExecutionException)13 Executor (java.util.concurrent.Executor)13 IOException (java.io.IOException)12