Search in sources :

Example 16 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class ExecutionGraphRestartTest method testFailingExecutionAfterRestart.

/**
 * Tests that a failing execution does not affect a restarted job. This is important if a
 * callback handler fails an execution after it has already reached a final state and the job
 * has been restarted.
 */
@Test
public void testFailingExecutionAfterRestart() throws Exception {
    JobVertex sender = ExecutionGraphTestUtils.createJobVertex("Task1", 1, NoOpInvokable.class);
    JobVertex receiver = ExecutionGraphTestUtils.createJobVertex("Task2", 1, NoOpInvokable.class);
    JobGraph jobGraph = JobGraphTestUtils.streamingJobGraph(sender, receiver);
    try (SlotPool slotPool = SlotPoolUtils.createDeclarativeSlotPoolBridge()) {
        SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(jobGraph, mainThreadExecutor).setExecutionSlotAllocatorFactory(createExecutionSlotAllocatorFactory(slotPool)).setRestartBackoffTimeStrategy(new TestRestartBackoffTimeStrategy(true, Long.MAX_VALUE)).setDelayExecutor(taskRestartExecutor).build();
        ExecutionGraph eg = scheduler.getExecutionGraph();
        startScheduling(scheduler);
        offerSlots(slotPool, 2);
        Iterator<ExecutionVertex> executionVertices = eg.getAllExecutionVertices().iterator();
        Execution finishedExecution = executionVertices.next().getCurrentExecutionAttempt();
        Execution failedExecution = executionVertices.next().getCurrentExecutionAttempt();
        finishedExecution.markFinished();
        failedExecution.fail(new Exception("Test Exception"));
        failedExecution.completeCancelling();
        taskRestartExecutor.triggerScheduledTasks();
        assertEquals(JobStatus.RUNNING, eg.getState());
        // At this point all resources have been assigned
        for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
            assertNotNull("No assigned resource (test instability).", vertex.getCurrentAssignedResource());
            vertex.getCurrentExecutionAttempt().switchToRecovering();
            vertex.getCurrentExecutionAttempt().switchToRunning();
        }
        // fail old finished execution, this should not affect the execution
        finishedExecution.fail(new Exception("This should have no effect"));
        for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
            vertex.getCurrentExecutionAttempt().markFinished();
        }
        // the state of the finished execution should have not changed since it is terminal
        assertEquals(ExecutionState.FINISHED, finishedExecution.getState());
        assertEquals(JobStatus.FINISHED, eg.getState());
    }
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TestRestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.TestRestartBackoffTimeStrategy) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) IOException(java.io.IOException) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) Test(org.junit.Test)

Example 17 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class ExecutionGraphRestartTest method testFailWhileCanceling.

@Test
public void testFailWhileCanceling() throws Exception {
    try (SlotPool slotPool = SlotPoolUtils.createDeclarativeSlotPoolBridge()) {
        SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(createJobGraph(), mainThreadExecutor).setExecutionSlotAllocatorFactory(createExecutionSlotAllocatorFactory(slotPool)).setRestartBackoffTimeStrategy(new TestRestartBackoffTimeStrategy(false, Long.MAX_VALUE)).build();
        ExecutionGraph graph = scheduler.getExecutionGraph();
        startScheduling(scheduler);
        offerSlots(slotPool, NUM_TASKS);
        assertEquals(JobStatus.RUNNING, graph.getState());
        switchAllTasksToRunning(graph);
        scheduler.cancel();
        assertEquals(JobStatus.CANCELLING, graph.getState());
        scheduler.handleGlobalFailure(new Exception("test"));
        assertEquals(JobStatus.FAILING, graph.getState());
        // let all tasks finish cancelling
        completeCanceling(graph);
        assertEquals(JobStatus.FAILED, graph.getState());
    }
}
Also used : TestRestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.TestRestartBackoffTimeStrategy) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) IOException(java.io.IOException) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) Test(org.junit.Test)

Example 18 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class ExecutionGraphSuspendTest method createScheduler.

private static SchedulerBase createScheduler(TaskManagerGateway gateway, int parallelism) throws Exception {
    final JobVertex vertex = new JobVertex("vertex");
    vertex.setInvokableClass(NoOpInvokable.class);
    vertex.setParallelism(parallelism);
    final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(JobGraphTestUtils.streamingJobGraph(vertex), ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(TestingPhysicalSlotProvider.createWithLimitedAmountOfPhysicalSlots(parallelism, gateway))).build();
    return scheduler;
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase)

Example 19 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class ExecutionGraphSuspendTest method testSuspendedOutOfCanceling.

/**
 * Suspending from CANCELING goes to SUSPENDED and sends no additional RPC calls.
 */
@Test
public void testSuspendedOutOfCanceling() throws Exception {
    final int parallelism = 10;
    final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway(parallelism);
    final SchedulerBase scheduler = createScheduler(gateway, parallelism);
    final ExecutionGraph eg = scheduler.getExecutionGraph();
    scheduler.startScheduling();
    ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);
    scheduler.cancel();
    assertEquals(JobStatus.CANCELLING, eg.getState());
    validateCancelRpcCalls(gateway, parallelism);
    // suspend
    scheduler.closeAsync();
    assertEquals(JobStatus.SUSPENDED, eg.getState());
    ensureCannotLeaveSuspendedState(scheduler, gateway);
}
Also used : SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) Test(org.junit.Test)

Example 20 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class ExecutionGraphSuspendTest method testSuspendedOutOfRunning.

/**
 * Going into SUSPENDED out of RUNNING vertices should cancel all vertices once with RPC calls.
 */
@Test
public void testSuspendedOutOfRunning() throws Exception {
    final int parallelism = 10;
    final InteractionsCountingTaskManagerGateway gateway = new InteractionsCountingTaskManagerGateway(parallelism);
    final SchedulerBase scheduler = createScheduler(gateway, parallelism);
    final ExecutionGraph eg = scheduler.getExecutionGraph();
    scheduler.startScheduling();
    ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);
    assertEquals(JobStatus.RUNNING, eg.getState());
    validateAllVerticesInState(eg, ExecutionState.RUNNING);
    // suspend
    scheduler.closeAsync();
    assertEquals(JobStatus.SUSPENDED, eg.getState());
    validateCancelRpcCalls(gateway, parallelism);
    ensureCannotLeaveSuspendedState(scheduler, gateway);
}
Also used : SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) Test(org.junit.Test)

Aggregations

SchedulerBase (org.apache.flink.runtime.scheduler.SchedulerBase)56 Test (org.junit.Test)49 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)33 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)19 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)10 CompletableFuture (java.util.concurrent.CompletableFuture)8 IOException (java.io.IOException)7 TestingPhysicalSlotProvider (org.apache.flink.runtime.scheduler.TestingPhysicalSlotProvider)7 TestRestartBackoffTimeStrategy (org.apache.flink.runtime.executiongraph.failover.flip1.TestRestartBackoffTimeStrategy)6 TaskExecutionState (org.apache.flink.runtime.taskmanager.TaskExecutionState)6 ArrayList (java.util.ArrayList)5 JobStatus (org.apache.flink.api.common.JobStatus)5 IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)5 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)4 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)4 SlotPool (org.apache.flink.runtime.jobmaster.slotpool.SlotPool)4 TestingPhysicalSlot (org.apache.flink.runtime.scheduler.TestingPhysicalSlot)4 VertexParallelismInformation (org.apache.flink.runtime.scheduler.VertexParallelismInformation)4 VertexParallelismStore (org.apache.flink.runtime.scheduler.VertexParallelismStore)4 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)4