Search in sources :

Example 11 with ArchivedExecutionVertex

use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.

the class DefaultSchedulerTest method jobStatusIsRestartingIfOneVertexIsWaitingForRestart.

@Test
public void jobStatusIsRestartingIfOneVertexIsWaitingForRestart() {
    final JobGraph jobGraph = singleJobVertexJobGraph(2);
    final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
    final Iterator<ArchivedExecutionVertex> vertexIterator = scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices().iterator();
    final ExecutionAttemptID attemptId1 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
    final ExecutionAttemptID attemptId2 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
    scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId1, ExecutionState.FAILED, new RuntimeException("expected")));
    final JobStatus jobStatusAfterFirstFailure = scheduler.requestJobStatus();
    scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId2, ExecutionState.FAILED, new RuntimeException("expected")));
    taskRestartExecutor.triggerNonPeriodicScheduledTask();
    final JobStatus jobStatusWithPendingRestarts = scheduler.requestJobStatus();
    taskRestartExecutor.triggerNonPeriodicScheduledTask();
    final JobStatus jobStatusAfterRestarts = scheduler.requestJobStatus();
    assertThat(jobStatusAfterFirstFailure, equalTo(JobStatus.RESTARTING));
    assertThat(jobStatusWithPendingRestarts, equalTo(JobStatus.RESTARTING));
    assertThat(jobStatusAfterRestarts, equalTo(JobStatus.RUNNING));
}
Also used : JobStatus(org.apache.flink.api.common.JobStatus) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) AdaptiveSchedulerTest(org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest) Test(org.junit.Test)

Example 12 with ArchivedExecutionVertex

use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.

the class DefaultSchedulerTest method abortPendingCheckpointsWhenRestartingTasks.

@Test
public void abortPendingCheckpointsWhenRestartingTasks() throws Exception {
    final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
    enableCheckpointing(jobGraph);
    final CountDownLatch checkpointTriggeredLatch = getCheckpointTriggeredLatch();
    final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
    final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
    final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
    transitionToRunning(scheduler, attemptId);
    final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
    checkpointCoordinator.triggerCheckpoint(false);
    checkpointTriggeredLatch.await();
    assertThat(checkpointCoordinator.getNumberOfPendingCheckpoints(), is(equalTo(1)));
    scheduler.updateTaskExecutionState(createFailedTaskExecutionState(attemptId));
    taskRestartExecutor.triggerScheduledTasks();
    assertThat(checkpointCoordinator.getNumberOfPendingCheckpoints(), is(equalTo(0)));
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointCoordinator(org.apache.flink.runtime.checkpoint.CheckpointCoordinator) SchedulerTestingUtils.getCheckpointCoordinator(org.apache.flink.runtime.scheduler.SchedulerTestingUtils.getCheckpointCoordinator) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) CountDownLatch(java.util.concurrent.CountDownLatch) AdaptiveSchedulerTest(org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest) Test(org.junit.Test)

Example 13 with ArchivedExecutionVertex

use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.

the class DefaultSchedulerTest method cancelWhileRestartingShouldWaitForRunningTasks.

@Test
public void cancelWhileRestartingShouldWaitForRunningTasks() {
    final JobGraph jobGraph = singleJobVertexJobGraph(2);
    final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
    final SchedulingTopology topology = scheduler.getSchedulingTopology();
    final Iterator<ArchivedExecutionVertex> vertexIterator = scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices().iterator();
    final ExecutionAttemptID attemptId1 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
    final ExecutionAttemptID attemptId2 = vertexIterator.next().getCurrentExecutionAttempt().getAttemptId();
    final ExecutionVertexID executionVertex2 = scheduler.getExecutionVertexIdOrThrow(attemptId2);
    scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId1, ExecutionState.FAILED, new RuntimeException("expected")));
    scheduler.cancel();
    final ExecutionState vertex2StateAfterCancel = topology.getVertex(executionVertex2).getState();
    final JobStatus statusAfterCancelWhileRestarting = scheduler.requestJobStatus();
    scheduler.updateTaskExecutionState(new TaskExecutionState(attemptId2, ExecutionState.CANCELED, new RuntimeException("expected")));
    assertThat(vertex2StateAfterCancel, is(equalTo(ExecutionState.CANCELING)));
    assertThat(statusAfterCancelWhileRestarting, is(equalTo(JobStatus.CANCELLING)));
    assertThat(scheduler.requestJobStatus(), is(equalTo(JobStatus.CANCELED)));
}
Also used : JobStatus(org.apache.flink.api.common.JobStatus) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) SchedulingTopology(org.apache.flink.runtime.scheduler.strategy.SchedulingTopology) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) AdaptiveSchedulerTest(org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest) Test(org.junit.Test)

Example 14 with ArchivedExecutionVertex

use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.

the class DefaultSchedulerTest method testExceptionHistoryWithRestartableFailure.

@Test
public void testExceptionHistoryWithRestartableFailure() {
    final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
    final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
    final TestingLogicalSlotBuilder logicalSlotBuilder = new TestingLogicalSlotBuilder();
    logicalSlotBuilder.setTaskManagerLocation(taskManagerLocation);
    executionSlotAllocatorFactory = new TestExecutionSlotAllocatorFactory(logicalSlotBuilder);
    final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);
    // initiate restartable failure
    final ArchivedExecutionVertex taskFailureExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
    final RuntimeException restartableException = new RuntimeException("restartable exception");
    final long updateStateTriggeringRestartTimestamp = initiateFailure(scheduler, taskFailureExecutionVertex.getCurrentExecutionAttempt().getAttemptId(), restartableException);
    taskRestartExecutor.triggerNonPeriodicScheduledTask();
    // initiate job failure
    testRestartBackoffTimeStrategy.setCanRestart(false);
    final ExecutionAttemptID failingAttemptId = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices()).getCurrentExecutionAttempt().getAttemptId();
    final RuntimeException failingException = new RuntimeException("failing exception");
    final long updateStateTriggeringJobFailureTimestamp = initiateFailure(scheduler, failingAttemptId, failingException);
    final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = scheduler.getExceptionHistory();
    // assert restarted attempt
    assertThat(actualExceptionHistory, IsIterableContainingInOrder.contains(ExceptionHistoryEntryMatcher.matchesFailure(restartableException, updateStateTriggeringRestartTimestamp, taskFailureExecutionVertex.getTaskNameWithSubtaskIndex(), taskFailureExecutionVertex.getCurrentAssignedResourceLocation()), ExceptionHistoryEntryMatcher.matchesGlobalFailure(failingException, updateStateTriggeringJobFailureTimestamp)));
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) AdaptiveSchedulerTest(org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest) Test(org.junit.Test)

Example 15 with ArchivedExecutionVertex

use of org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex in project flink by apache.

the class JobExceptionsHandlerTest method createArchivedExecutionJobVertex.

private static ArchivedExecutionJobVertex createArchivedExecutionJobVertex(JobVertexID jobVertexID) {
    final StringifiedAccumulatorResult[] emptyAccumulators = new StringifiedAccumulatorResult[0];
    final long[] timestamps = new long[ExecutionState.values().length];
    final ExecutionState expectedState = ExecutionState.RUNNING;
    final LocalTaskManagerLocation assignedResourceLocation = new LocalTaskManagerLocation();
    final AllocationID allocationID = new AllocationID();
    final int subtaskIndex = 1;
    final int attempt = 2;
    return new ArchivedExecutionJobVertex(new ArchivedExecutionVertex[] { new ArchivedExecutionVertex(subtaskIndex, "test task", new ArchivedExecution(new StringifiedAccumulatorResult[0], null, new ExecutionAttemptID(), attempt, expectedState, new ErrorInfo(new RuntimeException("error"), System.currentTimeMillis()), assignedResourceLocation, allocationID, subtaskIndex, timestamps), new EvictingBoundedList<>(0)) }, jobVertexID, jobVertexID.toString(), 1, 1, ResourceProfile.UNKNOWN, emptyAccumulators);
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ArchivedExecutionJobVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionJobVertex) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) ErrorInfo(org.apache.flink.runtime.executiongraph.ErrorInfo) StringifiedAccumulatorResult(org.apache.flink.runtime.accumulators.StringifiedAccumulatorResult) ArchivedExecution(org.apache.flink.runtime.executiongraph.ArchivedExecution) EvictingBoundedList(org.apache.flink.runtime.util.EvictingBoundedList) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation)

Aggregations

ArchivedExecutionVertex (org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex)24 Test (org.junit.Test)21 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)19 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)19 AdaptiveSchedulerTest (org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest)18 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)7 ExecutionVertexID (org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID)7 TaskExecutionState (org.apache.flink.runtime.taskmanager.TaskExecutionState)7 ExecutionState (org.apache.flink.runtime.execution.ExecutionState)5 JobStatus (org.apache.flink.api.common.JobStatus)4 ArchivedExecution (org.apache.flink.runtime.executiongraph.ArchivedExecution)4 LocalTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation)4 JobID (org.apache.flink.api.common.JobID)3 Configuration (org.apache.flink.configuration.Configuration)3 TestingCheckpointRecoveryFactory (org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory)3 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)3 ArchivedExecutionJobVertex (org.apache.flink.runtime.executiongraph.ArchivedExecutionJobVertex)3 ErrorInfo (org.apache.flink.runtime.executiongraph.ErrorInfo)3 RootExceptionHistoryEntry (org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry)3 ArrayList (java.util.ArrayList)2