Search in sources :

Example 26 with Scheduler

use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.

the class ExecutionGraphConstructionTest method testCannotConnectWrongOrder.

@Test
public void testCannotConnectWrongOrder() throws Exception {
    final JobID jobId = new JobID();
    final String jobName = "Test Job Sample Name";
    final Configuration cfg = new Configuration();
    JobVertex v1 = new JobVertex("vertex1");
    JobVertex v2 = new JobVertex("vertex2");
    JobVertex v3 = new JobVertex("vertex3");
    JobVertex v4 = new JobVertex("vertex4");
    JobVertex v5 = new JobVertex("vertex5");
    v1.setParallelism(5);
    v2.setParallelism(7);
    v3.setParallelism(2);
    v4.setParallelism(11);
    v5.setParallelism(4);
    v1.setInvokableClass(AbstractInvokable.class);
    v2.setInvokableClass(AbstractInvokable.class);
    v3.setInvokableClass(AbstractInvokable.class);
    v4.setInvokableClass(AbstractInvokable.class);
    v5.setInvokableClass(AbstractInvokable.class);
    v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    v4.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    v5.connectNewDataSetAsInput(v4, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    v5.connectNewDataSetAsInput(v3, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    List<JobVertex> ordered = new ArrayList<JobVertex>(Arrays.asList(v1, v2, v3, v5, v4));
    ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
    try {
        eg.attachJobGraph(ordered);
        fail("Attached wrong jobgraph");
    } catch (JobException e) {
    // expected
    }
}
Also used : JobException(org.apache.flink.runtime.JobException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Configuration(org.apache.flink.configuration.Configuration) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 27 with Scheduler

use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.

the class ExecutionGraphDeploymentTest method testNoResourceAvailableFailure.

@Test
public /**
	 * Tests that a blocking batch job fails if there are not enough resources left to schedule the
	 * succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks
	 * swallow the fail exception when scheduling a consumer task.
	 */
void testNoResourceAvailableFailure() throws Exception {
    final JobID jobId = new JobID();
    JobVertex v1 = new JobVertex("source");
    JobVertex v2 = new JobVertex("sink");
    int dop1 = 1;
    int dop2 = 1;
    v1.setParallelism(dop1);
    v2.setParallelism(dop2);
    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);
    v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
    Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
    for (int i = 0; i < dop1; i++) {
        scheduler.newInstanceAvailable(ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext()))));
    }
    // execution graph that executes actions synchronously
    ExecutionGraph eg = new ExecutionGraph(new DirectScheduledExecutorService(), TestingUtils.defaultExecutor(), jobId, "failing test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), scheduler);
    eg.setQueuedSchedulingAllowed(false);
    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);
    assertEquals(dop1, scheduler.getNumberOfAvailableSlots());
    // schedule, this triggers mock deployment
    eg.scheduleForExecution();
    ExecutionAttemptID attemptID = eg.getJobVertex(v1.getID()).getTaskVertices()[0].getCurrentExecutionAttempt().getAttemptId();
    eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.RUNNING));
    eg.updateState(new TaskExecutionState(jobId, attemptID, ExecutionState.FINISHED, null));
    assertEquals(JobStatus.FAILED, eg.getState());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 28 with Scheduler

use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.

the class ExecutionGraphDeploymentTest method setupExecution.

private Tuple2<ExecutionGraph, Map<ExecutionAttemptID, Execution>> setupExecution(JobVertex v1, int dop1, JobVertex v2, int dop2) throws Exception {
    final JobID jobId = new JobID();
    v1.setParallelism(dop1);
    v2.setParallelism(dop2);
    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    for (int i = 0; i < dop1 + dop2; i++) {
        scheduler.newInstanceAvailable(ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext()))));
    }
    // execution graph that executes actions synchronously
    ExecutionGraph eg = new ExecutionGraph(new DirectScheduledExecutorService(), TestingUtils.defaultExecutor(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), scheduler);
    eg.setQueuedSchedulingAllowed(false);
    List<JobVertex> ordered = Arrays.asList(v1, v2);
    eg.attachJobGraph(ordered);
    assertEquals(dop1 + dop2, scheduler.getNumberOfAvailableSlots());
    // schedule, this triggers mock deployment
    eg.scheduleForExecution();
    Map<ExecutionAttemptID, Execution> executions = eg.getRegisteredExecutions();
    assertEquals(dop1 + dop2, executions.size());
    return new Tuple2<>(eg, executions);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobID(org.apache.flink.api.common.JobID)

Example 29 with Scheduler

use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.

the class ExecutionGraphRestartTest method testFailWhileRestarting.

@Test
public void testFailWhileRestarting() throws Exception {
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
    scheduler.newInstanceAvailable(instance);
    // Blocking program
    ExecutionGraph executionGraph = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "TestJob", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), // We want to manually control the restart and delay
    new InfiniteDelayRestartStrategy(), scheduler);
    JobVertex jobVertex = new JobVertex("NoOpInvokable");
    jobVertex.setInvokableClass(NoOpInvokable.class);
    jobVertex.setParallelism(NUM_TASKS);
    JobGraph jobGraph = new JobGraph("TestJob", jobVertex);
    executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, executionGraph.getState());
    executionGraph.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, executionGraph.getState());
    // Kill the instance and wait for the job to restart
    instance.markDead();
    Deadline deadline = TestingUtils.TESTING_DURATION().fromNow();
    while (deadline.hasTimeLeft() && executionGraph.getState() != JobStatus.RESTARTING) {
        Thread.sleep(100);
    }
    assertEquals(JobStatus.RESTARTING, executionGraph.getState());
    // The restarting should not fail with an ordinary exception
    executionGraph.fail(new Exception("Test exception"));
    assertEquals(JobStatus.RESTARTING, executionGraph.getState());
    // but it should fail when sending a SuppressRestartsException
    executionGraph.fail(new SuppressRestartsException(new Exception("Test exception")));
    assertEquals(JobStatus.FAILED, executionGraph.getState());
    // The restart has been aborted
    executionGraph.restart();
    assertEquals(JobStatus.FAILED, executionGraph.getState());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) Deadline(scala.concurrent.duration.Deadline) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 30 with Scheduler

use of org.apache.flink.runtime.jobmanager.scheduler.Scheduler in project flink by apache.

the class ExecutionGraphRestartTest method testSuspendWhileRestarting.

/**
	 * Tests that a suspend call while restarting a job, will abort the restarting.
	 *
	 * @throws Exception
	 */
@Test
public void testSuspendWhileRestarting() throws Exception {
    FiniteDuration timeout = new FiniteDuration(1, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex sender = new JobVertex("Task");
    sender.setInvokableClass(NoOpInvokable.class);
    sender.setParallelism(NUM_TASKS);
    JobGraph jobGraph = new JobGraph("Pointwise job", sender);
    ControllableRestartStrategy controllableRestartStrategy = new ControllableRestartStrategy(timeout);
    ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "Test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), controllableRestartStrategy, scheduler);
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    instance.markDead();
    Await.ready(controllableRestartStrategy.getReachedCanRestart(), deadline.timeLeft());
    assertEquals(JobStatus.RESTARTING, eg.getState());
    eg.suspend(new Exception("Test exception"));
    assertEquals(JobStatus.SUSPENDED, eg.getState());
    controllableRestartStrategy.unlockRestart();
    Await.ready(controllableRestartStrategy.getRestartDone(), deadline.timeLeft());
    assertEquals(JobStatus.SUSPENDED, eg.getState());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)40 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)34 Test (org.junit.Test)32 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)29 NoRestartStrategy (org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy)25 JobID (org.apache.flink.api.common.JobID)22 Configuration (org.apache.flink.configuration.Configuration)21 ArrayList (java.util.ArrayList)17 JobException (org.apache.flink.runtime.JobException)17 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)16 Instance (org.apache.flink.runtime.instance.Instance)14 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)11 IOException (java.io.IOException)9 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)9 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)8 SimpleActorGateway (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway)7 SuppressRestartsException (org.apache.flink.runtime.execution.SuppressRestartsException)6 FlinkCompletableFuture (org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture)4 ExecutionGraphTestUtils.getInstance (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance)4 IntermediateDataSet (org.apache.flink.runtime.jobgraph.IntermediateDataSet)4