use of org.apache.flink.runtime.checkpoint.hooks.TestMasterHook in project flink by apache.
the class DefaultSchedulerTest method restoreStateWhenRestartingTasks.
@Test
public void restoreStateWhenRestartingTasks() throws Exception {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
enableCheckpointing(jobGraph);
final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
transitionToRunning(scheduler, attemptId);
final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
// register a stateful master hook to help verify state restore
final TestMasterHook masterHook = TestMasterHook.fromId("testHook");
checkpointCoordinator.addMasterHook(masterHook);
// complete one checkpoint for state restore
checkpointCoordinator.triggerCheckpoint(false);
checkpointTriggeredLatch.await();
final long checkpointId = checkpointCoordinator.getPendingCheckpoints().keySet().iterator().next();
acknowledgePendingCheckpoint(scheduler, checkpointId);
scheduler.updateTaskExecutionState(createFailedTaskExecutionState(attemptId));
taskRestartExecutor.triggerScheduledTasks();
assertThat(masterHook.getRestoreCount(), is(equalTo(1)));
}
use of org.apache.flink.runtime.checkpoint.hooks.TestMasterHook in project flink by apache.
the class DefaultSchedulerTest method failGlobalWhenRestoringStateFails.
@Test
public void failGlobalWhenRestoringStateFails() throws Exception {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);
enableCheckpointing(jobGraph);
final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
transitionToRunning(scheduler, attemptId);
final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
// register a master hook to fail state restore
final TestMasterHook masterHook = TestMasterHook.fromId("testHook");
masterHook.enableFailOnRestore();
checkpointCoordinator.addMasterHook(masterHook);
// complete one checkpoint for state restore
checkpointCoordinator.triggerCheckpoint(false);
checkpointTriggeredLatch.await();
final long checkpointId = checkpointCoordinator.getPendingCheckpoints().keySet().iterator().next();
acknowledgePendingCheckpoint(scheduler, checkpointId);
scheduler.updateTaskExecutionState(createFailedTaskExecutionState(attemptId));
taskRestartExecutor.triggerScheduledTasks();
final List<ExecutionVertexID> deployedExecutionVertices = testExecutionVertexOperations.getDeployedVertices();
// the first task failover should be skipped on state restore failure
final ExecutionVertexID executionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
assertThat(deployedExecutionVertices, contains(executionVertexId));
// a global failure should be triggered on state restore failure
masterHook.disableFailOnRestore();
taskRestartExecutor.triggerScheduledTasks();
assertThat(deployedExecutionVertices, contains(executionVertexId, executionVertexId));
}
Aggregations