use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class ExecutionGraphRestartTest method testFailingExecutionAfterRestart.
/**
* Tests that a failing execution does not affect a restarted job. This is important if a
* callback handler fails an execution after it has already reached a final state and the job
* has been restarted.
*/
@Test
public void testFailingExecutionAfterRestart() throws Exception {
JobVertex sender = ExecutionGraphTestUtils.createJobVertex("Task1", 1, NoOpInvokable.class);
JobVertex receiver = ExecutionGraphTestUtils.createJobVertex("Task2", 1, NoOpInvokable.class);
JobGraph jobGraph = JobGraphTestUtils.streamingJobGraph(sender, receiver);
try (SlotPool slotPool = SlotPoolUtils.createDeclarativeSlotPoolBridge()) {
SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(jobGraph, mainThreadExecutor).setExecutionSlotAllocatorFactory(createExecutionSlotAllocatorFactory(slotPool)).setRestartBackoffTimeStrategy(new TestRestartBackoffTimeStrategy(true, Long.MAX_VALUE)).setDelayExecutor(taskRestartExecutor).build();
ExecutionGraph eg = scheduler.getExecutionGraph();
startScheduling(scheduler);
offerSlots(slotPool, 2);
Iterator<ExecutionVertex> executionVertices = eg.getAllExecutionVertices().iterator();
Execution finishedExecution = executionVertices.next().getCurrentExecutionAttempt();
Execution failedExecution = executionVertices.next().getCurrentExecutionAttempt();
finishedExecution.markFinished();
failedExecution.fail(new Exception("Test Exception"));
failedExecution.completeCancelling();
taskRestartExecutor.triggerScheduledTasks();
assertEquals(JobStatus.RUNNING, eg.getState());
// At this point all resources have been assigned
for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
assertNotNull("No assigned resource (test instability).", vertex.getCurrentAssignedResource());
vertex.getCurrentExecutionAttempt().switchToRecovering();
vertex.getCurrentExecutionAttempt().switchToRunning();
}
// fail old finished execution, this should not affect the execution
finishedExecution.fail(new Exception("This should have no effect"));
for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
vertex.getCurrentExecutionAttempt().markFinished();
}
// the state of the finished execution should have not changed since it is terminal
assertEquals(ExecutionState.FINISHED, finishedExecution.getState());
assertEquals(JobStatus.FINISHED, eg.getState());
}
}
use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class ExecutionGraphRestartTest method testFailWhileCanceling.
@Test
public void testFailWhileCanceling() throws Exception {
try (SlotPool slotPool = SlotPoolUtils.createDeclarativeSlotPoolBridge()) {
SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(createJobGraph(), mainThreadExecutor).setExecutionSlotAllocatorFactory(createExecutionSlotAllocatorFactory(slotPool)).setRestartBackoffTimeStrategy(new TestRestartBackoffTimeStrategy(false, Long.MAX_VALUE)).build();
ExecutionGraph graph = scheduler.getExecutionGraph();
startScheduling(scheduler);
offerSlots(slotPool, NUM_TASKS);
assertEquals(JobStatus.RUNNING, graph.getState());
switchAllTasksToRunning(graph);
scheduler.cancel();
assertEquals(JobStatus.CANCELLING, graph.getState());
scheduler.handleGlobalFailure(new Exception("test"));
assertEquals(JobStatus.FAILING, graph.getState());
// let all tasks finish cancelling
completeCanceling(graph);
assertEquals(JobStatus.FAILED, graph.getState());
}
}
use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class ExecutionGraphSuspendTest method createScheduler.
private static SchedulerBase createScheduler(TaskManagerGateway gateway, int parallelism) throws Exception {
final JobVertex vertex = new JobVertex("vertex");
vertex.setInvokableClass(NoOpInvokable.class);
vertex.setParallelism(parallelism);
final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(JobGraphTestUtils.streamingJobGraph(vertex), ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(TestingPhysicalSlotProvider.createWithLimitedAmountOfPhysicalSlots(parallelism, gateway))).build();
return scheduler;
}
use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class ExecutionGraphSuspendTest method testSuspendedOutOfCanceling.
/**
* Suspending from CANCELING goes to SUSPENDED and sends no additional RPC calls.
*/
@Test
public void testSuspendedOutOfCanceling() throws Exception {
final int parallelism = 10;
final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway(parallelism);
final SchedulerBase scheduler = createScheduler(gateway, parallelism);
final ExecutionGraph eg = scheduler.getExecutionGraph();
scheduler.startScheduling();
ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);
scheduler.cancel();
assertEquals(JobStatus.CANCELLING, eg.getState());
validateCancelRpcCalls(gateway, parallelism);
// suspend
scheduler.closeAsync();
assertEquals(JobStatus.SUSPENDED, eg.getState());
ensureCannotLeaveSuspendedState(scheduler, gateway);
}
use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.
the class ExecutionGraphSuspendTest method testSuspendedOutOfRunning.
/**
* Going into SUSPENDED out of RUNNING vertices should cancel all vertices once with RPC calls.
*/
@Test
public void testSuspendedOutOfRunning() throws Exception {
final int parallelism = 10;
final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway(parallelism);
final SchedulerBase scheduler = createScheduler(gateway, parallelism);
final ExecutionGraph eg = scheduler.getExecutionGraph();
scheduler.startScheduling();
ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);
assertEquals(JobStatus.RUNNING, eg.getState());
validateAllVerticesInState(eg, ExecutionState.RUNNING);
// suspend
scheduler.closeAsync();
assertEquals(JobStatus.SUSPENDED, eg.getState());
validateCancelRpcCalls(gateway, parallelism);
ensureCannotLeaveSuspendedState(scheduler, gateway);
}
Aggregations