Search in sources :

Example 41 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testSuccessfulCheckpointSubsumesUnsuccessful.

@Test
public void testSuccessfulCheckpointSubsumesUnsuccessful() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    JobVertexID jobVertexID3 = new JobVertexID();
    CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).addJobVertex(jobVertexID3, false).setTaskManagerGateway(gateway).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
    ExecutionVertex vertex3 = graph.getJobVertex(jobVertexID3).getTaskVertices()[0];
    ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
    ExecutionAttemptID attemptID3 = vertex3.getCurrentExecutionAttempt().getAttemptId();
    // set up the coordinator and validate the initial state
    final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(10);
    CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    // trigger the first checkpoint. this should succeed
    final CompletableFuture<CompletedCheckpoint> checkpointFuture1 = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture1);
    assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    PendingCheckpoint pending1 = checkpointCoordinator.getPendingCheckpoints().values().iterator().next();
    long checkpointId1 = pending1.getCheckpointId();
    // trigger messages should have been sent
    for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
        ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
        assertEquals(checkpointId1, gateway.getOnlyTriggeredCheckpoint(attemptId).checkpointId);
    }
    OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
    OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
    OperatorID opID3 = vertex3.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
    TaskStateSnapshot taskOperatorSubtaskStates11 = spy(new TaskStateSnapshot());
    TaskStateSnapshot taskOperatorSubtaskStates12 = spy(new TaskStateSnapshot());
    TaskStateSnapshot taskOperatorSubtaskStates13 = spy(new TaskStateSnapshot());
    OperatorSubtaskState subtaskState11 = mock(OperatorSubtaskState.class);
    OperatorSubtaskState subtaskState12 = mock(OperatorSubtaskState.class);
    OperatorSubtaskState subtaskState13 = mock(OperatorSubtaskState.class);
    taskOperatorSubtaskStates11.putSubtaskStateByOperatorID(opID1, subtaskState11);
    taskOperatorSubtaskStates12.putSubtaskStateByOperatorID(opID2, subtaskState12);
    taskOperatorSubtaskStates13.putSubtaskStateByOperatorID(opID3, subtaskState13);
    // acknowledge one of the three tasks
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId1, new CheckpointMetrics(), taskOperatorSubtaskStates12), TASK_MANAGER_LOCATION_INFO);
    // start the second checkpoint
    gateway.resetCount();
    final CompletableFuture<CompletedCheckpoint> checkpointFuture2 = checkpointCoordinator.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    FutureUtils.throwIfCompletedExceptionally(checkpointFuture2);
    assertEquals(2, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    PendingCheckpoint pending2;
    {
        Iterator<PendingCheckpoint> all = checkpointCoordinator.getPendingCheckpoints().values().iterator();
        PendingCheckpoint cc1 = all.next();
        PendingCheckpoint cc2 = all.next();
        pending2 = pending1 == cc1 ? cc2 : cc1;
    }
    long checkpointId2 = pending2.getCheckpointId();
    TaskStateSnapshot taskOperatorSubtaskStates21 = spy(new TaskStateSnapshot());
    TaskStateSnapshot taskOperatorSubtaskStates22 = spy(new TaskStateSnapshot());
    TaskStateSnapshot taskOperatorSubtaskStates23 = spy(new TaskStateSnapshot());
    OperatorSubtaskState subtaskState21 = mock(OperatorSubtaskState.class);
    OperatorSubtaskState subtaskState22 = mock(OperatorSubtaskState.class);
    OperatorSubtaskState subtaskState23 = mock(OperatorSubtaskState.class);
    taskOperatorSubtaskStates21.putSubtaskStateByOperatorID(opID1, subtaskState21);
    taskOperatorSubtaskStates22.putSubtaskStateByOperatorID(opID2, subtaskState22);
    taskOperatorSubtaskStates23.putSubtaskStateByOperatorID(opID3, subtaskState23);
    // trigger messages should have been sent
    for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
        ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
        assertEquals(checkpointId2, gateway.getOnlyTriggeredCheckpoint(attemptId).checkpointId);
    }
    // we acknowledge one more task from the first checkpoint and the second
    // checkpoint completely. The second checkpoint should then subsume the first checkpoint
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID3, checkpointId2, new CheckpointMetrics(), taskOperatorSubtaskStates23), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId2, new CheckpointMetrics(), taskOperatorSubtaskStates21), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId1, new CheckpointMetrics(), taskOperatorSubtaskStates11), TASK_MANAGER_LOCATION_INFO);
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId2, new CheckpointMetrics(), taskOperatorSubtaskStates22), TASK_MANAGER_LOCATION_INFO);
    // now, the second checkpoint should be confirmed, and the first discarded
    // actually both pending checkpoints are discarded, and the second has been transformed
    // into a successful checkpoint
    assertTrue(pending1.isDisposed());
    assertTrue(pending2.isDisposed());
    assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
    assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
    // validate that all received subtask states in the first checkpoint have been discarded
    verify(subtaskState11, times(1)).discardState();
    verify(subtaskState12, times(1)).discardState();
    // validate that all subtask states in the second checkpoint are not discarded
    verify(subtaskState21, never()).discardState();
    verify(subtaskState22, never()).discardState();
    verify(subtaskState23, never()).discardState();
    // validate the committed checkpoints
    List<CompletedCheckpoint> scs = checkpointCoordinator.getSuccessfulCheckpoints();
    CompletedCheckpoint success = scs.get(0);
    assertEquals(checkpointId2, success.getCheckpointID());
    assertEquals(graph.getJobID(), success.getJobId());
    assertEquals(3, success.getOperatorStates().size());
    // the first confirm message should be out
    for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2, vertex3)) {
        ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
        assertEquals(checkpointId2, gateway.getOnlyNotifiedCompletedCheckpoint(attemptId).checkpointId);
    }
    // send the last remaining ack for the first checkpoint. This should not do anything
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID3, checkpointId1, new CheckpointMetrics(), taskOperatorSubtaskStates13), TASK_MANAGER_LOCATION_INFO);
    verify(subtaskState13, times(1)).discardState();
    checkpointCoordinator.shutdown();
    completedCheckpointStore.shutdown(JobStatus.FINISHED, new CheckpointsCleaner());
    // validate that the states in the second checkpoint have been discarded
    verify(subtaskState21, times(1)).discardState();
    verify(subtaskState22, times(1)).discardState();
    verify(subtaskState23, times(1)).discardState();
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Iterator(java.util.Iterator) Test(org.junit.Test)

Example 42 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testTriggerAndDeclineCheckpointThenFailureManagerThrowsException.

@Test
public void testTriggerAndDeclineCheckpointThenFailureManagerThrowsException() throws Exception {
    JobVertexID jobVertexID1 = new JobVertexID();
    JobVertexID jobVertexID2 = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).build();
    ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
    ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
    final ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
    final ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
    final String errorMsg = "Exceeded checkpoint failure tolerance number!";
    CheckpointFailureManager checkpointFailureManager = getCheckpointFailureManager(errorMsg);
    // set up the coordinator
    CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(graph, checkpointFailureManager);
    try {
        // trigger the checkpoint. this should succeed
        final CompletableFuture<CompletedCheckpoint> checkPointFuture = checkpointCoordinator.triggerCheckpoint(false);
        manuallyTriggeredScheduledExecutor.triggerAll();
        FutureUtils.throwIfCompletedExceptionally(checkPointFuture);
        long checkpointId = checkpointCoordinator.getPendingCheckpoints().entrySet().iterator().next().getKey();
        PendingCheckpoint checkpoint = checkpointCoordinator.getPendingCheckpoints().get(checkpointId);
        // acknowledge from one of the tasks
        checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId), TASK_MANAGER_LOCATION_INFO);
        assertFalse(checkpoint.isDisposed());
        assertFalse(checkpoint.areTasksFullyAcknowledged());
        // decline checkpoint from the other task
        checkpointCoordinator.receiveDeclineMessage(new DeclineCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointException(CHECKPOINT_DECLINED)), TASK_MANAGER_LOCATION_INFO);
        fail("Test failed.");
    } catch (Exception e) {
        ExceptionUtils.assertThrowableWithMessage(e, errorMsg);
    } finally {
        checkpointCoordinator.shutdown();
    }
}
Also used : DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) TriFunctionWithException(org.apache.flink.util.function.TriFunctionWithException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) RpcException(org.apache.flink.runtime.rpc.exceptions.RpcException) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 43 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTriggeringTest method testTriggeringFullSnapshotAfterJobmasterFailover.

@Test
public void testTriggeringFullSnapshotAfterJobmasterFailover() throws Exception {
    CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
    JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTaskManagerGateway(gateway).build();
    ExecutionVertex vertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
    ExecutionAttemptID attemptID = vertex.getCurrentExecutionAttempt().getAttemptId();
    // create a savepoint, we can restore from later
    final CompletedCheckpoint savepoint = takeSavepoint(graph, attemptID);
    // restore from a savepoint in NO_CLAIM mode
    final StandaloneCompletedCheckpointStore checkpointStore = new StandaloneCompletedCheckpointStore(1);
    final StandaloneCheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
    CheckpointCoordinator checkpointCoordinator = createCheckpointCoordinator(graph, checkpointStore, checkpointIDCounter);
    checkpointCoordinator.restoreSavepoint(SavepointRestoreSettings.forPath(savepoint.getExternalPointer(), true, RestoreMode.NO_CLAIM), graph.getAllVertices(), this.getClass().getClassLoader());
    checkpointCoordinator.shutdown();
    // imitate job manager failover
    gateway.resetCount();
    checkpointCoordinator = createCheckpointCoordinator(graph, checkpointStore, checkpointIDCounter);
    checkpointCoordinator.restoreLatestCheckpointedStateToAll(new HashSet<>(graph.getAllVertices().values()), true);
    checkpointCoordinator.startCheckpointScheduler();
    final CompletableFuture<CompletedCheckpoint> checkpoint = checkpointCoordinator.triggerCheckpoint(true);
    manuallyTriggeredScheduledExecutor.triggerAll();
    checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID, 2), TASK_MANAGER_LOCATION_INFO);
    checkpoint.get();
    assertThat(gateway.getOnlyTriggeredCheckpoint(attemptID).checkpointOptions.getCheckpointType(), is(CheckpointType.FULL_CHECKPOINT));
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 44 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTriggeringTest method testMinTimeBetweenCheckpointsInterval.

/**
 * This test verified that after a completed checkpoint a certain time has passed before another
 * is triggered.
 */
@Test
public void testMinTimeBetweenCheckpointsInterval() throws Exception {
    JobVertexID jobVertexID = new JobVertexID();
    CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTaskManagerGateway(gateway).build();
    ExecutionVertex vertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
    ExecutionAttemptID attemptID = vertex.getCurrentExecutionAttempt().getAttemptId();
    final long delay = 50;
    final long checkpointInterval = 12;
    CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = new CheckpointCoordinatorConfigurationBuilder().setCheckpointInterval(// periodic interval is 12 ms
    checkpointInterval).setCheckpointTimeout(// timeout is very long (200 s)
    200_000).setMinPauseBetweenCheckpoints(// 50 ms delay between checkpoints
    delay).setMaxConcurrentCheckpoints(1).build();
    final CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(checkpointCoordinatorConfiguration).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
    try {
        checkpointCoordinator.startCheckpointScheduler();
        manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
        manuallyTriggeredScheduledExecutor.triggerAll();
        // wait until the first checkpoint was triggered
        Long firstCallId = gateway.getTriggeredCheckpoints(attemptID).get(0).checkpointId;
        assertEquals(1L, firstCallId.longValue());
        AcknowledgeCheckpoint ackMsg = new AcknowledgeCheckpoint(graph.getJobID(), attemptID, 1L);
        // tell the coordinator that the checkpoint is done
        final long ackTime = System.nanoTime();
        checkpointCoordinator.receiveAcknowledgeMessage(ackMsg, TASK_MANAGER_LOCATION_INFO);
        gateway.resetCount();
        manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
        manuallyTriggeredScheduledExecutor.triggerAll();
        while (gateway.getTriggeredCheckpoints(attemptID).isEmpty()) {
            // sleeps for a while to simulate periodic scheduling
            Thread.sleep(checkpointInterval);
            manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
            manuallyTriggeredScheduledExecutor.triggerAll();
        }
        // wait until the next checkpoint is triggered
        Long nextCallId = gateway.getTriggeredCheckpoints(attemptID).get(0).checkpointId;
        final long nextCheckpointTime = System.nanoTime();
        assertEquals(2L, nextCallId.longValue());
        final long delayMillis = (nextCheckpointTime - ackTime) / 1_000_000;
        // we need to add one ms here to account for rounding errors
        if (delayMillis + 1 < delay) {
            fail("checkpoint came too early: delay was " + delayMillis + " but should have been at least " + delay);
        }
    } finally {
        checkpointCoordinator.stopCheckpointScheduler();
        checkpointCoordinator.shutdown();
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointCoordinatorConfigurationBuilder(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 45 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorMasterHooksTest method testHooksAreCalledOnTrigger.

// ------------------------------------------------------------------------
// trigger / restore behavior
// ------------------------------------------------------------------------
@Test
public void testHooksAreCalledOnTrigger() throws Exception {
    final String id1 = "id1";
    final String id2 = "id2";
    final String state1 = "the-test-string-state";
    final byte[] state1serialized = new StringSerializer().serialize(state1);
    final long state2 = 987654321L;
    final byte[] state2serialized = new LongSerializer().serialize(state2);
    final MasterTriggerRestoreHook<String> statefulHook1 = mockGeneric(MasterTriggerRestoreHook.class);
    when(statefulHook1.getIdentifier()).thenReturn(id1);
    when(statefulHook1.createCheckpointDataSerializer()).thenReturn(new StringSerializer());
    when(statefulHook1.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenReturn(CompletableFuture.completedFuture(state1));
    final MasterTriggerRestoreHook<Long> statefulHook2 = mockGeneric(MasterTriggerRestoreHook.class);
    when(statefulHook2.getIdentifier()).thenReturn(id2);
    when(statefulHook2.createCheckpointDataSerializer()).thenReturn(new LongSerializer());
    when(statefulHook2.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenReturn(CompletableFuture.completedFuture(state2));
    final MasterTriggerRestoreHook<Void> statelessHook = mockGeneric(MasterTriggerRestoreHook.class);
    when(statelessHook.getIdentifier()).thenReturn("some-id");
    // create the checkpoint coordinator
    JobVertexID jobVertexId = new JobVertexID();
    final ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexId).build();
    final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
    final CheckpointCoordinator cc = instantiateCheckpointCoordinator(graph, manuallyTriggeredScheduledExecutor);
    cc.addMasterHook(statefulHook1);
    cc.addMasterHook(statelessHook);
    cc.addMasterHook(statefulHook2);
    // trigger a checkpoint
    final CompletableFuture<CompletedCheckpoint> checkpointFuture = cc.triggerCheckpoint(false);
    manuallyTriggeredScheduledExecutor.triggerAll();
    assertFalse(checkpointFuture.isCompletedExceptionally());
    assertEquals(1, cc.getNumberOfPendingCheckpoints());
    verify(statefulHook1, times(1)).triggerCheckpoint(anyLong(), anyLong(), any(Executor.class));
    verify(statefulHook2, times(1)).triggerCheckpoint(anyLong(), anyLong(), any(Executor.class));
    verify(statelessHook, times(1)).triggerCheckpoint(anyLong(), anyLong(), any(Executor.class));
    ExecutionAttemptID attemptID = graph.getJobVertex(jobVertexId).getTaskVertices()[0].getCurrentExecutionAttempt().getAttemptId();
    final long checkpointId = cc.getPendingCheckpoints().values().iterator().next().getCheckpointId();
    cc.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID, checkpointId), "Unknown location");
    assertEquals(0, cc.getNumberOfPendingCheckpoints());
    assertEquals(1, cc.getNumberOfRetainedSuccessfulCheckpoints());
    final CompletedCheckpoint chk = cc.getCheckpointStore().getLatestCheckpoint();
    final Collection<MasterState> masterStates = chk.getMasterHookStates();
    assertEquals(2, masterStates.size());
    for (MasterState ms : masterStates) {
        if (ms.name().equals(id1)) {
            assertArrayEquals(state1serialized, ms.bytes());
            assertEquals(StringSerializer.VERSION, ms.version());
        } else if (ms.name().equals(id2)) {
            assertArrayEquals(state2serialized, ms.bytes());
            assertEquals(LongSerializer.VERSION, ms.version());
        } else {
            fail("unrecognized state name: " + ms.name());
        }
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Executor(java.util.concurrent.Executor) Mockito.anyLong(org.mockito.Mockito.anyLong) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) StringSerializer(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.StringSerializer) Test(org.junit.Test)

Aggregations

AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)45 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)35 Test (org.junit.Test)33 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)32 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)29 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)29 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)23 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)18 JobID (org.apache.flink.api.common.JobID)15 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)14 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)13 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)12 HashMap (java.util.HashMap)9 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)8 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)7 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)7 ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)7 HashSet (java.util.HashSet)6