use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerLocalRecoveryITCase method assertNonLocalRecoveredTasksEquals.
private void assertNonLocalRecoveredTasksEquals(ArchivedExecutionGraph graph, int expected) {
int nonLocalRecoveredTasks = 0;
for (ArchivedExecutionVertex vertex : graph.getAllExecutionVertices()) {
int currentAttemptNumber = vertex.getCurrentExecutionAttempt().getAttemptNumber();
if (currentAttemptNumber == 0) {
// the task had never restarted and do not need to recover
continue;
}
AllocationID priorAllocation = vertex.getPriorExecutionAttempt(currentAttemptNumber - 1).getAssignedAllocationID();
AllocationID currentAllocation = vertex.getCurrentExecutionAttempt().getAssignedAllocationID();
assertNotNull(priorAllocation);
assertNotNull(currentAllocation);
if (!currentAllocation.equals(priorAllocation)) {
nonLocalRecoveredTasks++;
}
}
assertThat(nonLocalRecoveredTasks, is(expected));
}
use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method failureInfoIsSetAfterTaskFailure.
@Test
public void failureInfoIsSetAfterTaskFailure() {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
final String exceptionMessage = "expected exception";
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId, ExecutionState.FAILED, new RuntimeException(exceptionMessage)));
final ErrorInfo failureInfo = scheduler.requestJob().getArchivedExecutionGraph().getFailureInfo();
assertThat(failureInfo, is(notNullValue()));
assertThat(failureInfo.getExceptionAsString(), containsString(exceptionMessage));
}
use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method restoreStateWhenRestartingTasks.
@Test
public void restoreStateWhenRestartingTasks() throws Exception {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
enableCheckpointing(jobGraph);
final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
transitionToRunning(scheduler, attemptId);
final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
// register a stateful master hook to help verify state restore
final TestMasterHook masterHook = TestMasterHook.fromId("testHook");
checkpointCoordinator.addMasterHook(masterHook);
// complete one checkpoint for state restore
checkpointCoordinator.triggerCheckpoint(false);
checkpointTriggeredLatch.await();
final long checkpointId = checkpointCoordinator.getPendingCheckpoints().keySet().iterator().next();
acknowledgePendingCheckpoint(scheduler, checkpointId);
scheduler.updateTaskExecutionState(createFailedTaskExecutionState(attemptId));
taskRestartExecutor.triggerScheduledTasks();
assertThat(masterHook.getRestoreCount(), is(equalTo(1)));
}
use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method handleGlobalFailureWithLocalFailure.
/**
* This test covers the use-case where a global fail-over is followed by a local task failure.
* It verifies (besides checking the expected deployments) that the assert in the global
* recovery handling of {@link SchedulerBase#restoreState} is not triggered due to version
* updates.
*/
@Test
public void handleGlobalFailureWithLocalFailure() {
final JobGraph jobGraph = singleJobVertexJobGraph(2);
final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);
enableCheckpointing(jobGraph);
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final List<ExecutionAttemptID> attemptIds = StreamSupport.stream(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices().spliterator(), false).map(ArchivedExecutionVertex::getCurrentExecutionAttempt).map(ArchivedExecution::getAttemptId).collect(Collectors.toList());
final ExecutionAttemptID localFailureAttemptId = attemptIds.get(0);
scheduler.handleGlobalFailure(new Exception("global failure"));
// the local failure shouldn't affect the global fail-over
scheduler.updateTaskExecutionState(new TaskExecutionState(localFailureAttemptId, ExecutionState.FAILED, new Exception("local failure")));
for (ExecutionAttemptID attemptId : attemptIds) {
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId, ExecutionState.CANCELED));
}
taskRestartExecutor.triggerScheduledTasks();
final ExecutionVertexID executionVertexId0 = new ExecutionVertexID(onlyJobVertex.getID(), 0);
final ExecutionVertexID executionVertexId1 = new ExecutionVertexID(onlyJobVertex.getID(), 1);
assertThat("The execution vertices should be deployed in a specific order reflecting the scheduling start and the global fail-over afterwards.", testExecutionVertexOperations.getDeployedVertices(), contains(executionVertexId0, executionVertexId1, executionVertexId0, executionVertexId1));
}
use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method handleGlobalFailure.
@Test
public void handleGlobalFailure() {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
final JobVertex onlyJobVertex = getOnlyJobVertex(jobGraph);
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
scheduler.handleGlobalFailure(new Exception("forced failure"));
final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId, ExecutionState.CANCELED));
taskRestartExecutor.triggerScheduledTasks();
final List<ExecutionVertexID> deployedExecutionVertices = testExecutionVertexOperations.getDeployedVertices();
final ExecutionVertexID executionVertexId = new ExecutionVertexID(onlyJobVertex.getID(), 0);
assertThat(deployedExecutionVertices, contains(executionVertexId, executionVertexId));
}
Aggregations