use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method jobStatusIsRestartingIfOneVertexIsWaitingForRestart.
@Test
public void jobStatusIsRestartingIfOneVertexIsWaitingForRestart() {
final JobGraph jobGraph = singleJobVertexJobGraph(2);
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final Iterator<ArchivedExecutionVertex> vertexIterator = scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices().iterator();
final ExecutionAttemptID attemptId1 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
final ExecutionAttemptID attemptId2 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId1, ExecutionState.FAILED, new RuntimeException("expected")));
final JobStatus jobStatusAfterFirstFailure = scheduler.requestJobStatus();
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId2, ExecutionState.FAILED, new RuntimeException("expected")));
taskRestartExecutor.triggerNonPeriodicScheduledTask();
final JobStatus jobStatusWithPendingRestarts = scheduler.requestJobStatus();
taskRestartExecutor.triggerNonPeriodicScheduledTask();
final JobStatus jobStatusAfterRestarts = scheduler.requestJobStatus();
assertThat(jobStatusAfterFirstFailure, equalTo(JobStatus.RESTARTING));
assertThat(jobStatusWithPendingRestarts, equalTo(JobStatus.RESTARTING));
assertThat(jobStatusAfterRestarts, equalTo(JobStatus.RUNNING));
}
use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method abortPendingCheckpointsWhenRestartingTasks.
@Test
public void abortPendingCheckpointsWhenRestartingTasks() throws Exception {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
enableCheckpointing(jobGraph);
final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
transitionToRunning(scheduler, attemptId);
final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
checkpointCoordinator.triggerCheckpoint(false);
checkpointTriggeredLatch.await();
assertThat(checkpointCoordinator.getNumberOfPendingCheckpoints(), is(equalTo(1)));
scheduler.updateTaskExecutionState(createFailedTaskExecutionState(attemptId));
taskRestartExecutor.triggerScheduledTasks();
assertThat(checkpointCoordinator.getNumberOfPendingCheckpoints(), is(equalTo(0)));
}
use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method cancelWhileRestartingShouldWaitForRunningTasks.
@Test
public void cancelWhileRestartingShouldWaitForRunningTasks() {
final JobGraph jobGraph = singleJobVertexJobGraph(2);
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
final SchedulingTopology topology = scheduler.getSchedulingTopology();
final Iterator<ArchivedExecutionVertex> vertexIterator = scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices().iterator();
final ExecutionAttemptID attemptId1 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
final ExecutionAttemptID attemptId2 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
final ExecutionVertexID executionVertex2 = scheduler.getExecutionVertexIdOrThrow(attemptId2);
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId1, ExecutionState.FAILED, new RuntimeException("expected")));
scheduler.cancel();
final ExecutionState vertex2StateAfterCancel = topology.getVertex(executionVertex2).getState();
final JobStatus statusAfterCancelWhileRestarting = scheduler.requestJobStatus();
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId2, ExecutionState.CANCELED, new RuntimeException("expected")));
assertThat(vertex2StateAfterCancel, is(equalTo(ExecutionState.CANCELING)));
assertThat(statusAfterCancelWhileRestarting, is(equalTo(JobStatus.CANCELLING)));
assertThat(scheduler.requestJobStatus(), is(equalTo(JobStatus.CANCELED)));
}
use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method testExceptionHistoryWithRestartableFailure.
@Test
public void testExceptionHistoryWithRestartableFailure() {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
final TestingLogicalSlotBuilder logicalSlotBuilder = new TestingLogicalSlotBuilder();
logicalSlotBuilder.setTaskManagerLocation(taskManagerLocation);
executionSlotAllocatorFactory = new TestExecutionSlotAllocatorFactory(logicalSlotBuilder);
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
// initiate restartable failure
final ArchivedExecutionVertex taskFailureExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
final RuntimeException restartableException = new RuntimeException("restartable exception");
final long updateStateTriggeringRestartTimestamp = initiateFailure(scheduler, taskFailureExecutionVertex.getCurrentExecutionAttempt().getAttemptId(), restartableException);
taskRestartExecutor.triggerNonPeriodicScheduledTask();
// initiate job failure
testRestartBackoffTimeStrategy.setCanRestart(false);
final ExecutionAttemptID failingAttemptId = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices()).getCurrentExecutionAttempt().getAttemptId();
final RuntimeException failingException = new RuntimeException("failing exception");
final long updateStateTriggeringJobFailureTimestamp = initiateFailure(scheduler, failingAttemptId, failingException);
final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = scheduler.getExceptionHistory();
// assert restarted attempt
assertThat(actualExceptionHistory, IsIterableContainingInOrder.contains(ExceptionHistoryEntryMatcher.matchesFailure(restartableException, updateStateTriggeringRestartTimestamp, taskFailureExecutionVertex.getTaskNameWithSubtaskIndex(), taskFailureExecutionVertex.getCurrentAssignedResourceLocation()), ExceptionHistoryEntryMatcher.matchesGlobalFailure(failingException, updateStateTriggeringJobFailureTimestamp)));
}
use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.
the class JobExceptionsHandlerTest method createArchivedExecutionJobVertex.
private static ArchivedExecutionJobVertex createArchivedExecutionJobVertex(JobVertexID jobVertexID) {
final StringifiedAccumulatorResult[] emptyAccumulators = new StringifiedAccumulatorResult[0];
final long[] timestamps = new long[ExecutionState.values().length];
final ExecutionState expectedState = ExecutionState.RUNNING;
final LocalTaskManagerLocation assignedResourceLocation = new LocalTaskManagerLocation();
final AllocationID allocationID = new AllocationID();
final int subtaskIndex = 1;
final int attempt = 2;
return new ArchivedExecutionJobVertex(new ArchivedExecutionVertex[] { new ArchivedExecutionVertex(subtaskIndex, "test task", new ArchivedExecution(new StringifiedAccumulatorResult[0], null, new ExecutionAttemptID(), attempt, expectedState, new ErrorInfo(new RuntimeException("error"), System.currentTimeMillis()), assignedResourceLocation, allocationID, subtaskIndex, timestamps), new EvictingBoundedList<>(0)) }, jobVertexID, jobVertexID.toString(), 1, 1, ResourceProfile.UNKNOWN, emptyAccumulators);
}
Aggregations