use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTest method testSavepointScheduledInUnalignedMode.
@Test
public void testSavepointScheduledInUnalignedMode() throws Exception {
int maxConcurrentCheckpoints = 1;
int checkpointRequestsToSend = 10;
int activeRequests = 0;
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).build();
CheckpointCoordinator coordinator = new CheckpointCoordinatorBuilder().setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setUnalignedCheckpointsEnabled(true).setMaxConcurrentCheckpoints(maxConcurrentCheckpoints).build()).setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).build();
try {
List<Future<?>> checkpointFutures = new ArrayList<>(checkpointRequestsToSend);
coordinator.startCheckpointScheduler();
while (activeRequests < checkpointRequestsToSend) {
checkpointFutures.add(coordinator.triggerCheckpoint(true));
activeRequests++;
}
assertEquals(activeRequests - maxConcurrentCheckpoints, coordinator.getNumQueuedRequests());
Future<?> savepointFuture = coordinator.triggerSavepoint("/tmp", SavepointFormatType.CANONICAL);
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(++activeRequests - maxConcurrentCheckpoints, coordinator.getNumQueuedRequests());
coordinator.receiveDeclineMessage(new DeclineCheckpoint(graph.getJobID(), new ExecutionAttemptID(), 1L, new CheckpointException(CHECKPOINT_DECLINED)), "none");
manuallyTriggeredScheduledExecutor.triggerAll();
// savepoint triggered
activeRequests--;
assertEquals(activeRequests - maxConcurrentCheckpoints, coordinator.getNumQueuedRequests());
assertEquals(1, checkpointFutures.stream().filter(Future::isDone).count());
assertFalse(savepointFuture.isDone());
assertEquals(maxConcurrentCheckpoints, coordinator.getNumberOfPendingCheckpoints());
CheckpointProperties props = coordinator.getPendingCheckpoints().values().iterator().next().getProps();
assertTrue(props.isSavepoint());
assertFalse(props.forceCheckpoint());
} finally {
coordinator.shutdown();
}
}
use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTest method testStateCleanupForLateOrUnknownMessages.
/**
* Tests that late acknowledge checkpoint messages are properly cleaned up. Furthermore it tests
* that unknown checkpoint messages for the same job a are cleaned up as well. In contrast
* checkpointing messages from other jobs should not be touched. A late acknowledge message is
* an acknowledge message which arrives after the checkpoint has been declined.
*
* @throws Exception
*/
@Test
public void testStateCleanupForLateOrUnknownMessages() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2, false).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
CheckpointCoordinatorConfiguration chkConfig = new CheckpointCoordinatorConfiguration.CheckpointCoordinatorConfigurationBuilder().setMaxConcurrentCheckpoints(1).build();
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(chkConfig).setTimer(manuallyTriggeredScheduledExecutor).build();
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
PendingCheckpoint pendingCheckpoint = checkpointCoordinator.getPendingCheckpoints().values().iterator().next();
long checkpointId = pendingCheckpoint.getCheckpointId();
OperatorID opIDtrigger = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
TaskStateSnapshot taskOperatorSubtaskStatesTrigger = spy(new TaskStateSnapshot());
OperatorSubtaskState subtaskStateTrigger = mock(OperatorSubtaskState.class);
taskOperatorSubtaskStatesTrigger.putSubtaskStateByOperatorID(opIDtrigger, subtaskStateTrigger);
// acknowledge the first trigger vertex
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStatesTrigger), TASK_MANAGER_LOCATION_INFO);
// verify that the subtask state has not been discarded
verify(subtaskStateTrigger, never()).discardState();
TaskStateSnapshot unknownSubtaskState = mock(TaskStateSnapshot.class);
// receive an acknowledge message for an unknown vertex
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), new ExecutionAttemptID(), checkpointId, new CheckpointMetrics(), unknownSubtaskState), TASK_MANAGER_LOCATION_INFO);
// we should discard acknowledge messages from an unknown vertex belonging to our job
verify(unknownSubtaskState, times(1)).discardState();
TaskStateSnapshot differentJobSubtaskState = mock(TaskStateSnapshot.class);
// receive an acknowledge message from an unknown job
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(new JobID(), new ExecutionAttemptID(), checkpointId, new CheckpointMetrics(), differentJobSubtaskState), TASK_MANAGER_LOCATION_INFO);
// we should not interfere with different jobs
verify(differentJobSubtaskState, never()).discardState();
// duplicate acknowledge message for the trigger vertex
TaskStateSnapshot triggerSubtaskState = mock(TaskStateSnapshot.class);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), triggerSubtaskState), TASK_MANAGER_LOCATION_INFO);
// duplicate acknowledge messages for a known vertex should not trigger discarding the state
verify(triggerSubtaskState, never()).discardState();
// let the checkpoint fail at the first ack vertex
reset(subtaskStateTrigger);
checkpointCoordinator.receiveDeclineMessage(new DeclineCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointException(CHECKPOINT_DECLINED)), TASK_MANAGER_LOCATION_INFO);
assertTrue(pendingCheckpoint.isDisposed());
// check that we've cleaned up the already acknowledged state
verify(subtaskStateTrigger, times(1)).discardState();
TaskStateSnapshot ackSubtaskState = mock(TaskStateSnapshot.class);
// late acknowledge message from the second ack vertex
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId, new CheckpointMetrics(), ackSubtaskState), TASK_MANAGER_LOCATION_INFO);
// check that we also cleaned up this state
verify(ackSubtaskState, times(1)).discardState();
// receive an acknowledge message from an unknown job
reset(differentJobSubtaskState);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(new JobID(), new ExecutionAttemptID(), checkpointId, new CheckpointMetrics(), differentJobSubtaskState), TASK_MANAGER_LOCATION_INFO);
// we should not interfere with different jobs
verify(differentJobSubtaskState, never()).discardState();
TaskStateSnapshot unknownSubtaskState2 = mock(TaskStateSnapshot.class);
// receive an acknowledge message for an unknown vertex
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), new ExecutionAttemptID(), checkpointId, new CheckpointMetrics(), unknownSubtaskState2), TASK_MANAGER_LOCATION_INFO);
// we should discard acknowledge messages from an unknown vertex belonging to our job
verify(unknownSubtaskState2, times(1)).discardState();
}
use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTest method testCompleteCheckpointFailureWithExternallyInducedSource.
@Test
public void testCompleteCheckpointFailureWithExternallyInducedSource() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
TaskStateSnapshot taskOperatorSubtaskStates1 = new TaskStateSnapshot();
TaskStateSnapshot taskOperatorSubtaskStates2 = new TaskStateSnapshot();
OperatorSubtaskState subtaskState1 = OperatorSubtaskState.builder().build();
OperatorSubtaskState subtaskState2 = OperatorSubtaskState.builder().build();
taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1);
taskOperatorSubtaskStates2.putSubtaskStateByOperatorID(opID2, subtaskState2);
// Create a mock OperatorCoordinatorCheckpointContext which completes the checkpoint
// immediately.
AtomicBoolean coordCheckpointDone = new AtomicBoolean(false);
OperatorCoordinatorCheckpointContext coordinatorCheckpointContext = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOnCallingCheckpointCoordinator((checkpointId, result) -> {
coordCheckpointDone.set(true);
result.complete(new byte[0]);
}).setOperatorID(opID1).build();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(coordinatorCheckpointContext)).setCheckpointStorage(new JobManagerCheckpointStorage() {
private static final long serialVersionUID = 8134582566514272546L;
// Throw exception when finalizing the checkpoint.
@Override
public CheckpointStorageAccess createCheckpointStorage(JobID jobId) throws IOException {
return new MemoryBackendCheckpointStorageAccess(jobId, null, null, 100) {
@Override
public CheckpointStorageLocation initializeLocationForCheckpoint(long checkpointId) throws IOException {
return new NonPersistentMetadataCheckpointStorageLocation(1000) {
@Override
public CheckpointMetadataOutputStream createMetadataOutputStream() throws IOException {
throw new IOException("Artificial Exception");
}
};
}
};
}
}).build();
AtomicReference<Long> checkpointIdRef = new AtomicReference<>();
// Add a master hook which triggers and acks the task checkpoint immediately.
// In this case the task checkpoints would complete before the job master checkpoint
// completes.
checkpointCoordinator.addMasterHook(new MasterTriggerRestoreHook<Integer>() {
@Override
public String getIdentifier() {
return "anything";
}
@Override
@Nullable
public CompletableFuture<Integer> triggerCheckpoint(long checkpointId, long timestamp, Executor executor) throws Exception {
assertTrue("The coordinator checkpoint should have finished.", coordCheckpointDone.get());
// Acknowledge the checkpoint in the master hooks so the task snapshots
// complete before
// the master state snapshot completes.
checkpointIdRef.set(checkpointId);
AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates1);
AcknowledgeCheckpoint acknowledgeCheckpoint2 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates2);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint2, TASK_MANAGER_LOCATION_INFO);
return null;
}
@Override
public void restoreCheckpoint(long checkpointId, Integer checkpointData) throws Exception {
}
@Override
public SimpleVersionedSerializer<Integer> createCheckpointDataSerializer() {
return new SimpleVersionedSerializer<Integer>() {
@Override
public int getVersion() {
return 0;
}
@Override
public byte[] serialize(Integer obj) throws IOException {
return new byte[0];
}
@Override
public Integer deserialize(int version, byte[] serialized) throws IOException {
return 1;
}
};
}
});
// trigger the first checkpoint. this should succeed
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertTrue(checkpointFuture.isCompletedExceptionally());
assertTrue(checkpointCoordinator.getSuccessfulCheckpoints().isEmpty());
}
use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTest method testSuccessfulCheckpointSubsumesUnsuccessful.
@Test
public void testSuccessfulCheckpointSubsumesUnsuccessful() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
JobVertexID jobVertexID3 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).addJobVertex(jobVertexID3, false).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionVertex vertex3 = graph.getJobVertex(jobVertexID3).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID3 = vertex3.getCurrentExecutionAttempt().getAttemptId();
// set up the coordinator and validate the initial state
final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(10);
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
// trigger the first checkpoint. this should succeed
final CompletableFuture<CompletedCheckpoint> checkpointFuture1 = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture1);
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
PendingCheckpoint pending1 = checkpointCoordinator.getPendingCheckpoints().values().iterator().next();
long checkpointId1 = pending1.getCheckpointId();
// trigger messages should have been sent
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(checkpointId1, gateway.getOnlyTriggeredCheckpoint(attemptId).checkpointId);
}
OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorID opID3 = vertex3.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
TaskStateSnapshot taskOperatorSubtaskStates11 = spy(new TaskStateSnapshot());
TaskStateSnapshot taskOperatorSubtaskStates12 = spy(new TaskStateSnapshot());
TaskStateSnapshot taskOperatorSubtaskStates13 = spy(new TaskStateSnapshot());
OperatorSubtaskState subtaskState11 = mock(OperatorSubtaskState.class);
OperatorSubtaskState subtaskState12 = mock(OperatorSubtaskState.class);
OperatorSubtaskState subtaskState13 = mock(OperatorSubtaskState.class);
taskOperatorSubtaskStates11.putSubtaskStateByOperatorID(opID1, subtaskState11);
taskOperatorSubtaskStates12.putSubtaskStateByOperatorID(opID2, subtaskState12);
taskOperatorSubtaskStates13.putSubtaskStateByOperatorID(opID3, subtaskState13);
// acknowledge one of the three tasks
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId1, new CheckpointMetrics(), taskOperatorSubtaskStates12), TASK_MANAGER_LOCATION_INFO);
// start the second checkpoint
gateway.resetCount();
final CompletableFuture<CompletedCheckpoint> checkpointFuture2 = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture2);
assertEquals(2, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
PendingCheckpoint pending2;
{
Iterator<PendingCheckpoint> all = checkpointCoordinator.getPendingCheckpoints().values().iterator();
PendingCheckpoint cc1 = all.next();
PendingCheckpoint cc2 = all.next();
pending2 = pending1 == cc1 ? cc2 : cc1;
}
long checkpointId2 = pending2.getCheckpointId();
TaskStateSnapshot taskOperatorSubtaskStates21 = spy(new TaskStateSnapshot());
TaskStateSnapshot taskOperatorSubtaskStates22 = spy(new TaskStateSnapshot());
TaskStateSnapshot taskOperatorSubtaskStates23 = spy(new TaskStateSnapshot());
OperatorSubtaskState subtaskState21 = mock(OperatorSubtaskState.class);
OperatorSubtaskState subtaskState22 = mock(OperatorSubtaskState.class);
OperatorSubtaskState subtaskState23 = mock(OperatorSubtaskState.class);
taskOperatorSubtaskStates21.putSubtaskStateByOperatorID(opID1, subtaskState21);
taskOperatorSubtaskStates22.putSubtaskStateByOperatorID(opID2, subtaskState22);
taskOperatorSubtaskStates23.putSubtaskStateByOperatorID(opID3, subtaskState23);
// trigger messages should have been sent
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(checkpointId2, gateway.getOnlyTriggeredCheckpoint(attemptId).checkpointId);
}
// we acknowledge one more task from the first checkpoint and the second
// checkpoint completely. The second checkpoint should then subsume the first checkpoint
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID3, checkpointId2, new CheckpointMetrics(), taskOperatorSubtaskStates23), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId2, new CheckpointMetrics(), taskOperatorSubtaskStates21), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId1, new CheckpointMetrics(), taskOperatorSubtaskStates11), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId2, new CheckpointMetrics(), taskOperatorSubtaskStates22), TASK_MANAGER_LOCATION_INFO);
// now, the second checkpoint should be confirmed, and the first discarded
// actually both pending checkpoints are discarded, and the second has been transformed
// into a successful checkpoint
assertTrue(pending1.isDisposed());
assertTrue(pending2.isDisposed());
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
// validate that all received subtask states in the first checkpoint have been discarded
verify(subtaskState11, times(1)).discardState();
verify(subtaskState12, times(1)).discardState();
// validate that all subtask states in the second checkpoint are not discarded
verify(subtaskState21, never()).discardState();
verify(subtaskState22, never()).discardState();
verify(subtaskState23, never()).discardState();
// validate the committed checkpoints
List<CompletedCheckpoint> scs = checkpointCoordinator.getSuccessfulCheckpoints();
CompletedCheckpoint success = scs.get(0);
assertEquals(checkpointId2, success.getCheckpointID());
assertEquals(graph.getJobID(), success.getJobId());
assertEquals(3, success.getOperatorStates().size());
// the first confirm message should be out
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2, vertex3)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(checkpointId2, gateway.getOnlyNotifiedCompletedCheckpoint(attemptId).checkpointId);
}
// send the last remaining ack for the first checkpoint. This should not do anything
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID3, checkpointId1, new CheckpointMetrics(), taskOperatorSubtaskStates13), TASK_MANAGER_LOCATION_INFO);
verify(subtaskState13, times(1)).discardState();
checkpointCoordinator.shutdown();
completedCheckpointStore.shutdown(JobStatus.FINISHED, new CheckpointsCleaner());
// validate that the states in the second checkpoint have been discarded
verify(subtaskState21, times(1)).discardState();
verify(subtaskState22, times(1)).discardState();
verify(subtaskState23, times(1)).discardState();
}
use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTriggeringTest method testPeriodicTriggering.
@Test
public void testPeriodicTriggering() {
try {
final long start = System.currentTimeMillis();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
JobVertexID jobVertexID = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
ExecutionAttemptID attemptID = vertex.getCurrentExecutionAttempt().getAttemptId();
CheckpointCoordinatorConfiguration checkpointCoordinatorConfiguration = new CheckpointCoordinatorConfigurationBuilder().setCheckpointInterval(// periodic interval is 10 ms
10).setCheckpointTimeout(// timeout is very long (200 s)
200000).setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build();
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(checkpointCoordinatorConfiguration).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(2)).setTimer(manuallyTriggeredScheduledExecutor).build();
checkpointCoordinator.startCheckpointScheduler();
for (int i = 0; i < 5; ++i) {
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
}
checkRecordedTriggeredCheckpoints(5, start, gateway.getTriggeredCheckpoints(attemptID));
checkpointCoordinator.stopCheckpointScheduler();
// no further calls may come.
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(5, gateway.getTriggeredCheckpoints(attemptID).size());
// start another sequence of periodic scheduling
gateway.resetCount();
checkpointCoordinator.startCheckpointScheduler();
for (int i = 0; i < 5; ++i) {
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
}
checkRecordedTriggeredCheckpoints(5, start, gateway.getTriggeredCheckpoints(attemptID));
checkpointCoordinator.stopCheckpointScheduler();
// no further calls may come
manuallyTriggeredScheduledExecutor.triggerPeriodicScheduledTasks();
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(5, gateway.getTriggeredCheckpoints(attemptID).size());
checkpointCoordinator.shutdown();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations