Search in sources :

Example 26 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class FinalizeOnMasterTest method testFinalizeIsCalledUponSuccess.

@Test
public void testFinalizeIsCalledUponSuccess() throws Exception {
    final JobVertex vertex1 = spy(new JobVertex("test vertex 1"));
    vertex1.setInvokableClass(NoOpInvokable.class);
    vertex1.setParallelism(3);
    final JobVertex vertex2 = spy(new JobVertex("test vertex 2"));
    vertex2.setInvokableClass(NoOpInvokable.class);
    vertex2.setParallelism(2);
    final SchedulerBase scheduler = createScheduler(JobGraphTestUtils.streamingJobGraph(vertex1, vertex2), ComponentMainThreadExecutorServiceAdapter.forMainThread());
    scheduler.startScheduling();
    final ExecutionGraph eg = scheduler.getExecutionGraph();
    assertEquals(JobStatus.RUNNING, eg.getState());
    ExecutionGraphTestUtils.switchAllVerticesToRunning(eg);
    // move all vertices to finished state
    ExecutionGraphTestUtils.finishAllVertices(eg);
    assertEquals(JobStatus.FINISHED, eg.waitUntilTerminal());
    verify(vertex1, times(1)).finalizeOnMaster(any(ClassLoader.class));
    verify(vertex2, times(1)).finalizeOnMaster(any(ClassLoader.class));
    assertEquals(0, eg.getRegisteredExecutions().size());
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) Test(org.junit.Test)

Example 27 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class ExecutionGraphCoLocationRestartTest method testConstraintsAfterRestart.

@Test
public void testConstraintsAfterRestart() throws Exception {
    final long timeout = 5000L;
    JobVertex groupVertex = ExecutionGraphTestUtils.createNoOpVertex(NUM_TASKS);
    JobVertex groupVertex2 = ExecutionGraphTestUtils.createNoOpVertex(NUM_TASKS);
    groupVertex2.connectNewDataSetAsInput(groupVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    SlotSharingGroup sharingGroup = new SlotSharingGroup();
    groupVertex.setSlotSharingGroup(sharingGroup);
    groupVertex2.setSlotSharingGroup(sharingGroup);
    groupVertex.setStrictlyCoLocatedWith(groupVertex2);
    // initiate and schedule job
    final JobGraph jobGraph = JobGraphTestUtils.streamingJobGraph(groupVertex, groupVertex2);
    final ManuallyTriggeredScheduledExecutorService delayExecutor = new ManuallyTriggeredScheduledExecutorService();
    final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(jobGraph, ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(TestingPhysicalSlotProvider.create((ignored) -> CompletableFuture.completedFuture(TestingPhysicalSlot.builder().build())))).setDelayExecutor(delayExecutor).setRestartBackoffTimeStrategy(new FixedDelayRestartBackoffTimeStrategy.FixedDelayRestartBackoffTimeStrategyFactory(1, 0).create()).build();
    final ExecutionGraph eg = scheduler.getExecutionGraph();
    // enable the queued scheduling for the slot pool
    assertEquals(JobStatus.CREATED, eg.getState());
    scheduler.startScheduling();
    Predicate<AccessExecution> isDeploying = ExecutionGraphTestUtils.isInExecutionState(ExecutionState.DEPLOYING);
    ExecutionGraphTestUtils.waitForAllExecutionsPredicate(eg, isDeploying, timeout);
    assertEquals(JobStatus.RUNNING, eg.getState());
    // sanity checks
    validateConstraints(eg);
    eg.getAllExecutionVertices().iterator().next().fail(new FlinkException("Test exception"));
    assertEquals(JobStatus.RESTARTING, eg.getState());
    // trigger registration of restartTasks(...) callback to cancelFuture before completing the
    // cancellation. This ensures the restarting actions to be performed in main thread.
    delayExecutor.triggerNonPeriodicScheduledTask();
    for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
        if (vertex.getExecutionState() == ExecutionState.CANCELING) {
            vertex.getCurrentExecutionAttempt().completeCancelling();
        }
    }
    // wait until we have restarted
    ExecutionGraphTestUtils.waitUntilJobStatus(eg, JobStatus.RUNNING, timeout);
    ExecutionGraphTestUtils.waitForAllExecutionsPredicate(eg, isDeploying, timeout);
    // checking execution vertex properties
    validateConstraints(eg);
    ExecutionGraphTestUtils.finishAllVertices(eg);
    assertThat(eg.getState(), is(FINISHED));
}
Also used : ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) TestingPhysicalSlotProvider(org.apache.flink.runtime.scheduler.TestingPhysicalSlotProvider) FlinkException(org.apache.flink.util.FlinkException) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) Predicate(java.util.function.Predicate) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ManuallyTriggeredScheduledExecutorService(org.apache.flink.runtime.concurrent.ManuallyTriggeredScheduledExecutorService) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) Test(org.junit.Test) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) Assert.assertThat(org.junit.Assert.assertThat) FINISHED(org.apache.flink.api.common.JobStatus.FINISHED) TestingPhysicalSlot(org.apache.flink.runtime.scheduler.TestingPhysicalSlot) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) Matchers.is(org.hamcrest.Matchers.is) FixedDelayRestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.FixedDelayRestartBackoffTimeStrategy) SchedulerTestingUtils(org.apache.flink.runtime.scheduler.SchedulerTestingUtils) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Assert.assertEquals(org.junit.Assert.assertEquals) FixedDelayRestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.FixedDelayRestartBackoffTimeStrategy) FlinkException(org.apache.flink.util.FlinkException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ManuallyTriggeredScheduledExecutorService(org.apache.flink.runtime.concurrent.ManuallyTriggeredScheduledExecutorService) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) Test(org.junit.Test)

Example 28 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class ExecutionGraphPartitionReleaseTest method testStrategyNotifiedOfFinishedVerticesAndResultsRespected.

@Test
public void testStrategyNotifiedOfFinishedVerticesAndResultsRespected() throws Exception {
    // setup a simple pipeline of 3 operators with blocking partitions
    final JobVertex sourceVertex = ExecutionGraphTestUtils.createNoOpVertex(1);
    final JobVertex operatorVertex = ExecutionGraphTestUtils.createNoOpVertex(1);
    final JobVertex sinkVertex = ExecutionGraphTestUtils.createNoOpVertex(1);
    operatorVertex.connectNewDataSetAsInput(sourceVertex, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
    sinkVertex.connectNewDataSetAsInput(operatorVertex, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
    // setup partition tracker to intercept partition release calls
    final TestingJobMasterPartitionTracker partitionTracker = new TestingJobMasterPartitionTracker();
    final Queue<ResultPartitionID> releasedPartitions = new ArrayDeque<>();
    partitionTracker.setStopTrackingAndReleasePartitionsConsumer(partitionIds -> releasedPartitions.add(partitionIds.iterator().next()));
    final SchedulerBase scheduler = createScheduler(partitionTracker, sourceVertex, operatorVertex, sinkVertex);
    final ExecutionGraph executionGraph = scheduler.getExecutionGraph();
    // finish vertices one after another, and verify that the appropriate partitions are
    // released
    mainThreadExecutor.execute(() -> {
        final Execution sourceExecution = getCurrentExecution(sourceVertex, executionGraph);
        scheduler.updateTaskExecutionState(new TaskExecutionState(sourceExecution.getAttemptId(), ExecutionState.FINISHED));
        assertThat(releasedPartitions, empty());
    });
    mainThreadExecutor.execute(() -> {
        final Execution sourceExecution = getCurrentExecution(sourceVertex, executionGraph);
        final Execution operatorExecution = getCurrentExecution(operatorVertex, executionGraph);
        scheduler.updateTaskExecutionState(new TaskExecutionState(operatorExecution.getAttemptId(), ExecutionState.FINISHED));
        assertThat(releasedPartitions, hasSize(1));
        assertThat(releasedPartitions.remove(), equalTo(new ResultPartitionID(sourceExecution.getVertex().getProducedPartitions().keySet().iterator().next(), sourceExecution.getAttemptId())));
    });
    mainThreadExecutor.execute(() -> {
        final Execution operatorExecution = getCurrentExecution(operatorVertex, executionGraph);
        final Execution sinkExecution = getCurrentExecution(sinkVertex, executionGraph);
        scheduler.updateTaskExecutionState(new TaskExecutionState(sinkExecution.getAttemptId(), ExecutionState.FINISHED));
        assertThat(releasedPartitions, hasSize(1));
        assertThat(releasedPartitions.remove(), equalTo(new ResultPartitionID(operatorExecution.getVertex().getProducedPartitions().keySet().iterator().next(), operatorExecution.getAttemptId())));
    });
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TestingJobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.TestingJobMasterPartitionTracker) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) ArrayDeque(java.util.ArrayDeque) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) Test(org.junit.Test)

Example 29 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class ExecutionGraphPartitionReleaseTest method createScheduler.

private SchedulerBase createScheduler(final JobMasterPartitionTracker partitionTracker, final JobVertex... vertices) throws Exception {
    final JobGraph jobGraph = JobGraphTestUtils.batchJobGraph(vertices);
    final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(jobGraph, mainThreadExecutor.getMainThreadExecutor()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory()).setPartitionTracker(partitionTracker).build();
    mainThreadExecutor.execute(scheduler::startScheduling);
    return scheduler;
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase)

Example 30 with SchedulerBase

use of org.apache.flink.runtime.scheduler.SchedulerBase in project flink by apache.

the class DefaultExecutionGraphDeploymentTest method setupScheduler.

private SchedulerBase setupScheduler(JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception {
    v1.setParallelism(dop1);
    v2.setParallelism(dop2);
    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);
    DirectScheduledExecutorService executorService = new DirectScheduledExecutorService();
    // execution graph that executes actions synchronously
    final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(JobGraphTestUtils.streamingJobGraph(v1, v2), ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory()).setFutureExecutor(executorService).setBlobWriter(blobWriter).build();
    final ExecutionGraph eg = scheduler.getExecutionGraph();
    checkJobOffloaded((DefaultExecutionGraph) eg);
    // schedule, this triggers mock deployment
    scheduler.startScheduling();
    Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions();
    assertEquals(dop1 + dop2, executions.size());
    return scheduler;
}
Also used : DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase)

Aggregations

SchedulerBase (org.apache.flink.runtime.scheduler.SchedulerBase)56 Test (org.junit.Test)49 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)33 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)19 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)10 CompletableFuture (java.util.concurrent.CompletableFuture)8 IOException (java.io.IOException)7 TestingPhysicalSlotProvider (org.apache.flink.runtime.scheduler.TestingPhysicalSlotProvider)7 TestRestartBackoffTimeStrategy (org.apache.flink.runtime.executiongraph.failover.flip1.TestRestartBackoffTimeStrategy)6 TaskExecutionState (org.apache.flink.runtime.taskmanager.TaskExecutionState)6 ArrayList (java.util.ArrayList)5 JobStatus (org.apache.flink.api.common.JobStatus)5 IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)5 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)4 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)4 SlotPool (org.apache.flink.runtime.jobmaster.slotpool.SlotPool)4 TestingPhysicalSlot (org.apache.flink.runtime.scheduler.TestingPhysicalSlot)4 VertexParallelismInformation (org.apache.flink.runtime.scheduler.VertexParallelismInformation)4 VertexParallelismStore (org.apache.flink.runtime.scheduler.VertexParallelismStore)4 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)4