use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTest method testCheckpointStatsTrackerPendingCheckpointCallback.
/**
* Tests that the pending checkpoint stats callbacks are created.
*/
@Test
public void testCheckpointStatsTrackerPendingCheckpointCallback() throws Exception {
// set up the coordinator and validate the initial state
CheckpointStatsTracker tracker = mock(CheckpointStatsTracker.class);
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setTimer(manuallyTriggeredScheduledExecutor).setCheckpointStatsTracker(tracker).build();
when(tracker.reportPendingCheckpoint(anyLong(), anyLong(), any(CheckpointProperties.class), any(Map.class))).thenReturn(mock(PendingCheckpointStats.class));
// Trigger a checkpoint and verify callback
CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
verify(tracker, times(1)).reportPendingCheckpoint(eq(1L), any(Long.class), eq(CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION)), any());
}
use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTest method testSavepointsAreNotSubsumed.
/**
* Triggers a savepoint and two checkpoints. The second checkpoint completes and subsumes the
* first checkpoint, but not the first savepoint. Then we trigger another checkpoint and
* savepoint. The 2nd savepoint completes and subsumes the last checkpoint, but not the first
* savepoint.
*/
@Test
public void testSavepointsAreNotSubsumed() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
StandaloneCheckpointIDCounter counter = new StandaloneCheckpointIDCounter();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = spy(new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setCheckpointIDCounter(counter).setCompletedCheckpointStore(new StandaloneCompletedCheckpointStore(1)).setTimer(manuallyTriggeredScheduledExecutor).build());
String savepointDir = tmpFolder.newFolder().getAbsolutePath();
// Trigger savepoint and checkpoint
CompletableFuture<CompletedCheckpoint> savepointFuture1 = checkpointCoordinator.triggerSavepoint(savepointDir, SavepointFormatType.CANONICAL);
manuallyTriggeredScheduledExecutor.triggerAll();
long savepointId1 = counter.getLast();
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
CompletableFuture<CompletedCheckpoint> checkpointFuture1 = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(2, checkpointCoordinator.getNumberOfPendingCheckpoints());
FutureUtils.throwIfCompletedExceptionally(checkpointFuture1);
CompletableFuture<CompletedCheckpoint> checkpointFuture2 = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture2);
long checkpointId2 = counter.getLast();
assertEquals(3, checkpointCoordinator.getNumberOfPendingCheckpoints());
// 2nd checkpoint should subsume the 1st checkpoint, but not the savepoint
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId2), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId2), TASK_MANAGER_LOCATION_INFO);
// no completed checkpoint before checkpointId2.
verify(checkpointCoordinator, times(1)).sendAcknowledgeMessages(anyList(), eq(checkpointId2), anyLong(), eq(INVALID_CHECKPOINT_ID));
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertFalse(checkpointCoordinator.getPendingCheckpoints().get(savepointId1).isDisposed());
assertFalse(savepointFuture1.isDone());
CompletableFuture<CompletedCheckpoint> checkpointFuture3 = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture3);
assertEquals(2, checkpointCoordinator.getNumberOfPendingCheckpoints());
CompletableFuture<CompletedCheckpoint> savepointFuture2 = checkpointCoordinator.triggerSavepoint(savepointDir, SavepointFormatType.CANONICAL);
manuallyTriggeredScheduledExecutor.triggerAll();
long savepointId2 = counter.getLast();
FutureUtils.throwIfCompletedExceptionally(savepointFuture2);
assertEquals(3, checkpointCoordinator.getNumberOfPendingCheckpoints());
// savepoints should not subsume checkpoints
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, savepointId2), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, savepointId2), TASK_MANAGER_LOCATION_INFO);
// we do not send notify checkpoint complete for savepoints
verify(checkpointCoordinator, times(0)).sendAcknowledgeMessages(anyList(), eq(savepointId2), anyLong(), anyLong());
assertEquals(2, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertFalse(checkpointCoordinator.getPendingCheckpoints().get(savepointId1).isDisposed());
assertFalse(savepointFuture1.isDone());
assertNotNull(savepointFuture2.get());
// Ack first savepoint
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, savepointId1), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, savepointId1), TASK_MANAGER_LOCATION_INFO);
// we do not send notify checkpoint complete for savepoints
verify(checkpointCoordinator, times(0)).sendAcknowledgeMessages(anyList(), eq(savepointId1), anyLong(), anyLong());
assertEquals(1, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertNotNull(savepointFuture1.get());
CompletableFuture<CompletedCheckpoint> checkpointFuture4 = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture4);
long checkpointId4 = counter.getLast();
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId4), TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId4), TASK_MANAGER_LOCATION_INFO);
// checkpoint2 would be subsumed.
verify(checkpointCoordinator, times(1)).sendAcknowledgeMessages(anyList(), eq(checkpointId4), anyLong(), eq(checkpointId2));
}
use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTriggeringTest method testTriggerCheckpointInitializationFailed.
@Test
public void testTriggerCheckpointInitializationFailed() throws Exception {
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setCheckpointIDCounter(new UnstableCheckpointIDCounter(id -> id == 0)).setTimer(manuallyTriggeredScheduledExecutor).build();
checkpointCoordinator.startCheckpointScheduler();
final CompletableFuture<CompletedCheckpoint> onCompletionPromise1 = triggerPeriodicCheckpoint(checkpointCoordinator);
assertTrue(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
manuallyTriggeredScheduledExecutor.triggerAll();
try {
onCompletionPromise1.get();
fail("This checkpoint should fail through UnstableCheckpointIDCounter");
} catch (ExecutionException e) {
final Optional<CheckpointException> checkpointExceptionOptional = ExceptionUtils.findThrowable(e, CheckpointException.class);
assertTrue(checkpointExceptionOptional.isPresent());
assertEquals(CheckpointFailureReason.TRIGGER_CHECKPOINT_FAILURE, checkpointExceptionOptional.get().getCheckpointFailureReason());
}
assertFalse(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
final CompletableFuture<CompletedCheckpoint> onCompletionPromise2 = triggerPeriodicCheckpoint(checkpointCoordinator);
assertTrue(checkpointCoordinator.isTriggering());
manuallyTriggeredScheduledExecutor.triggerAll();
assertFalse(onCompletionPromise2.isCompletedExceptionally());
assertFalse(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
}
use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointCoordinatorTriggeringTest method testTriggerCheckpointRequestQueuedWithFailure.
@Test
public void testTriggerCheckpointRequestQueuedWithFailure() throws Exception {
JobVertexID jobVertexID = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex = graph.getJobVertex(jobVertexID).getTaskVertices()[0];
ExecutionAttemptID attemptID = vertex.getCurrentExecutionAttempt().getAttemptId();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointIDCounter(new UnstableCheckpointIDCounter(id -> id == 0)).setTimer(manuallyTriggeredScheduledExecutor).build();
checkpointCoordinator.startCheckpointScheduler();
// start a periodic checkpoint first
final CompletableFuture<CompletedCheckpoint> onCompletionPromise1 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
assertTrue(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
// another trigger before the prior one finished
final CompletableFuture<CompletedCheckpoint> onCompletionPromise2 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
// another trigger before the first one finished
final CompletableFuture<CompletedCheckpoint> onCompletionPromise3 = triggerNonPeriodicCheckpoint(checkpointCoordinator);
assertTrue(checkpointCoordinator.isTriggering());
assertEquals(2, checkpointCoordinator.getTriggerRequestQueue().size());
manuallyTriggeredScheduledExecutor.triggerAll();
// the first triggered checkpoint fails by design through UnstableCheckpointIDCounter
assertTrue(onCompletionPromise1.isCompletedExceptionally());
assertFalse(onCompletionPromise2.isCompletedExceptionally());
assertFalse(onCompletionPromise3.isCompletedExceptionally());
assertFalse(checkpointCoordinator.isTriggering());
assertEquals(0, checkpointCoordinator.getTriggerRequestQueue().size());
assertEquals(2, gateway.getTriggeredCheckpoints(attemptID).size());
}
use of org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder in project flink by apache.
the class CheckpointStateRestoreTest method testNonRestoredState.
/**
* Tests that the allow non restored state flag is correctly handled.
*
* <p>The flag only applies for state that is part of the checkpoint.
*/
@Test
public void testNonRestoredState() throws Exception {
// --- (1) Create tasks to restore checkpoint with ---
JobVertexID jobVertexId1 = new JobVertexID();
JobVertexID jobVertexId2 = new JobVertexID();
OperatorID operatorId1 = OperatorID.fromJobVertexID(jobVertexId1);
// 1st JobVertex
ExecutionVertex vertex11 = mockExecutionVertex(mockExecution(), jobVertexId1, 0, 3);
ExecutionVertex vertex12 = mockExecutionVertex(mockExecution(), jobVertexId1, 1, 3);
ExecutionVertex vertex13 = mockExecutionVertex(mockExecution(), jobVertexId1, 2, 3);
// 2nd JobVertex
ExecutionVertex vertex21 = mockExecutionVertex(mockExecution(), jobVertexId2, 0, 2);
ExecutionVertex vertex22 = mockExecutionVertex(mockExecution(), jobVertexId2, 1, 2);
ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexId1, new ExecutionVertex[] { vertex11, vertex12, vertex13 });
ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexId2, new ExecutionVertex[] { vertex21, vertex22 });
Set<ExecutionJobVertex> tasks = new HashSet<>();
tasks.add(jobVertex1);
tasks.add(jobVertex2);
CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().build();
// --- (2) Checkpoint misses state for a jobVertex (should work) ---
Map<OperatorID, OperatorState> checkpointTaskStates = new HashMap<>();
{
OperatorState taskState = new OperatorState(operatorId1, 3, 3);
taskState.putState(0, OperatorSubtaskState.builder().build());
taskState.putState(1, OperatorSubtaskState.builder().build());
taskState.putState(2, OperatorSubtaskState.builder().build());
checkpointTaskStates.put(operatorId1, taskState);
}
CompletedCheckpoint checkpoint = new CompletedCheckpoint(new JobID(), 0, 1, 2, new HashMap<>(checkpointTaskStates), Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
coord.getCheckpointStore().addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
});
assertTrue(coord.restoreLatestCheckpointedStateToAll(tasks, false));
assertTrue(coord.restoreLatestCheckpointedStateToAll(tasks, true));
// --- (3) JobVertex missing for task state that is part of the checkpoint ---
JobVertexID newJobVertexID = new JobVertexID();
OperatorID newOperatorID = OperatorID.fromJobVertexID(newJobVertexID);
// There is no task for this
{
OperatorState taskState = new OperatorState(newOperatorID, 1, 1);
taskState.putState(0, OperatorSubtaskState.builder().build());
checkpointTaskStates.put(newOperatorID, taskState);
}
checkpoint = new CompletedCheckpoint(new JobID(), 1, 2, 3, new HashMap<>(checkpointTaskStates), Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
coord.getCheckpointStore().addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
});
// (i) Allow non restored state (should succeed)
final boolean restored = coord.restoreLatestCheckpointedStateToAll(tasks, true);
assertTrue(restored);
// (ii) Don't allow non restored state (should fail)
try {
coord.restoreLatestCheckpointedStateToAll(tasks, false);
fail("Did not throw the expected Exception.");
} catch (IllegalStateException ignored) {
}
}
Aggregations