Search in sources :

Example 1 with Slot

use of org.apache.flink.runtime.instance.Slot in project flink by apache.

the class ExecutionGraphMetricsTest method testExecutionGraphRestartTimeMetric.

/**
	 * This test tests that the restarting time metric correctly displays restarting times.
	 */
@Test
public void testExecutionGraphRestartTimeMetric() throws JobException, IOException, InterruptedException {
    final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
    try {
        // setup execution graph with mocked scheduling logic
        int parallelism = 1;
        JobVertex jobVertex = new JobVertex("TestVertex");
        jobVertex.setParallelism(parallelism);
        jobVertex.setInvokableClass(NoOpInvokable.class);
        JobGraph jobGraph = new JobGraph("Test Job", jobVertex);
        Configuration config = new Configuration();
        config.setString(ConfigConstants.METRICS_REPORTERS_LIST, "test");
        config.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "test." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, TestingReporter.class.getName());
        Configuration jobConfig = new Configuration();
        Time timeout = Time.seconds(10L);
        MetricRegistry metricRegistry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config));
        assertTrue(metricRegistry.getReporters().size() == 1);
        MetricReporter reporter = metricRegistry.getReporters().get(0);
        assertTrue(reporter instanceof TestingReporter);
        TestingReporter testingReporter = (TestingReporter) reporter;
        MetricGroup metricGroup = new JobManagerMetricGroup(metricRegistry, "localhost");
        Scheduler scheduler = mock(Scheduler.class);
        ResourceID taskManagerId = ResourceID.generate();
        TaskManagerLocation taskManagerLocation = mock(TaskManagerLocation.class);
        when(taskManagerLocation.getResourceID()).thenReturn(taskManagerId);
        when(taskManagerLocation.getHostname()).thenReturn("localhost");
        TaskManagerGateway taskManagerGateway = mock(TaskManagerGateway.class);
        Instance instance = mock(Instance.class);
        when(instance.getTaskManagerLocation()).thenReturn(taskManagerLocation);
        when(instance.getTaskManagerID()).thenReturn(taskManagerId);
        when(instance.getTaskManagerGateway()).thenReturn(taskManagerGateway);
        Slot rootSlot = mock(Slot.class);
        AllocatedSlot mockAllocatedSlot = mock(AllocatedSlot.class);
        when(mockAllocatedSlot.getSlotAllocationId()).thenReturn(new AllocationID());
        SimpleSlot simpleSlot = mock(SimpleSlot.class);
        when(simpleSlot.isAlive()).thenReturn(true);
        when(simpleSlot.getTaskManagerLocation()).thenReturn(taskManagerLocation);
        when(simpleSlot.getTaskManagerID()).thenReturn(taskManagerId);
        when(simpleSlot.getTaskManagerGateway()).thenReturn(taskManagerGateway);
        when(simpleSlot.setExecutedVertex(Matchers.any(Execution.class))).thenReturn(true);
        when(simpleSlot.getRoot()).thenReturn(rootSlot);
        when(simpleSlot.getAllocatedSlot()).thenReturn(mockAllocatedSlot);
        FlinkCompletableFuture<SimpleSlot> future = new FlinkCompletableFuture<>();
        future.complete(simpleSlot);
        when(scheduler.allocateSlot(any(ScheduledUnit.class), anyBoolean())).thenReturn(future);
        when(rootSlot.getSlotNumber()).thenReturn(0);
        when(taskManagerGateway.submitTask(any(TaskDeploymentDescriptor.class), any(Time.class))).thenReturn(FlinkCompletableFuture.completed(Acknowledge.get()));
        TestingRestartStrategy testingRestartStrategy = new TestingRestartStrategy();
        ExecutionGraph executionGraph = new ExecutionGraph(executor, executor, jobGraph.getJobID(), jobGraph.getName(), jobConfig, new SerializedValue<ExecutionConfig>(null), timeout, testingRestartStrategy, Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), scheduler, getClass().getClassLoader(), metricGroup);
        // get restarting time metric
        Metric metric = testingReporter.getMetric(ExecutionGraph.RESTARTING_TIME_METRIC_NAME);
        assertNotNull(metric);
        assertTrue(metric instanceof Gauge);
        @SuppressWarnings("unchecked") Gauge<Long> restartingTime = (Gauge<Long>) metric;
        // check that the restarting time is 0 since it's the initial start
        assertTrue(0L == restartingTime.getValue());
        executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
        // start execution
        executionGraph.scheduleForExecution();
        assertTrue(0L == restartingTime.getValue());
        List<ExecutionAttemptID> executionIDs = new ArrayList<>();
        for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
            executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
        }
        // tell execution graph that the tasks are in state running --> job status switches to state running
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
        }
        assertEquals(JobStatus.RUNNING, executionGraph.getState());
        assertTrue(0L == restartingTime.getValue());
        // fail the job so that it goes into state restarting
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
        }
        assertEquals(JobStatus.RESTARTING, executionGraph.getState());
        long firstRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
        // wait some time so that the restarting time gauge shows a value different from 0
        Thread.sleep(50);
        long previousRestartingTime = restartingTime.getValue();
        // check that the restarting time is monotonically increasing
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime >= previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        // check that we have measured some restarting time
        assertTrue(previousRestartingTime > 0);
        // restart job
        testingRestartStrategy.restartExecutionGraph();
        executionIDs.clear();
        for (ExecutionVertex executionVertex : executionGraph.getAllExecutionVertices()) {
            executionIDs.add(executionVertex.getCurrentExecutionAttempt().getAttemptId());
        }
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.RUNNING));
        }
        assertEquals(JobStatus.RUNNING, executionGraph.getState());
        assertTrue(firstRestartingTimestamp != 0);
        previousRestartingTime = restartingTime.getValue();
        // check that the restarting time does not increase after we've reached the running state
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime == previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        // fail job again
        for (ExecutionAttemptID executionID : executionIDs) {
            executionGraph.updateState(new TaskExecutionState(jobGraph.getJobID(), executionID, ExecutionState.FAILED, new Exception()));
        }
        assertEquals(JobStatus.RESTARTING, executionGraph.getState());
        long secondRestartingTimestamp = executionGraph.getStatusTimestamp(JobStatus.RESTARTING);
        assertTrue(firstRestartingTimestamp != secondRestartingTimestamp);
        Thread.sleep(50);
        previousRestartingTime = restartingTime.getValue();
        // check that the restarting time is increasing again
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime >= previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
        assertTrue(previousRestartingTime > 0);
        // now lets fail the job while it is in restarting and see whether the restarting time then stops to increase
        // for this to work, we have to use a SuppressRestartException
        executionGraph.fail(new SuppressRestartsException(new Exception()));
        assertEquals(JobStatus.FAILED, executionGraph.getState());
        previousRestartingTime = restartingTime.getValue();
        for (int i = 0; i < 10; i++) {
            long currentRestartingTime = restartingTime.getValue();
            assertTrue(currentRestartingTime == previousRestartingTime);
            previousRestartingTime = currentRestartingTime;
        }
    } finally {
        executor.shutdownNow();
    }
}
Also used : JobManagerMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup) MetricRegistryConfiguration(org.apache.flink.runtime.metrics.MetricRegistryConfiguration) Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) MetricGroup(org.apache.flink.metrics.MetricGroup) JobManagerMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) ArrayList(java.util.ArrayList) Time(org.apache.flink.api.common.time.Time) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) Gauge(org.apache.flink.metrics.Gauge) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) ScheduledUnit(org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit) MetricReporter(org.apache.flink.metrics.reporter.MetricReporter) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) JobException(org.apache.flink.runtime.JobException) IOException(java.io.IOException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) Slot(org.apache.flink.runtime.instance.Slot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) Metric(org.apache.flink.metrics.Metric) Test(org.junit.Test)

Example 2 with Slot

use of org.apache.flink.runtime.instance.Slot in project flink by apache.

the class ExecutionVertexDeploymentTest method testTddProducedPartitionsLazyScheduling.

/**
	 * Tests that the lazy scheduling flag is correctly forwarded to the produced partition descriptors.
	 */
@Test
public void testTddProducedPartitionsLazyScheduling() throws Exception {
    TestingUtils.QueuedActionExecutionContext context = TestingUtils.queuedActionExecutionContext();
    ExecutionJobVertex jobVertex = getExecutionVertex(new JobVertexID(), context);
    IntermediateResult result = new IntermediateResult(new IntermediateDataSetID(), jobVertex, 1, ResultPartitionType.PIPELINED);
    ExecutionVertex vertex = new ExecutionVertex(jobVertex, 0, new IntermediateResult[] { result }, Time.minutes(1));
    ExecutionEdge mockEdge = createMockExecutionEdge(1);
    result.getPartitions()[0].addConsumerGroup();
    result.getPartitions()[0].addConsumer(mockEdge, 0);
    AllocatedSlot allocatedSlot = mock(AllocatedSlot.class);
    when(allocatedSlot.getSlotAllocationId()).thenReturn(new AllocationID());
    Slot root = mock(Slot.class);
    when(root.getSlotNumber()).thenReturn(1);
    SimpleSlot slot = mock(SimpleSlot.class);
    when(slot.getRoot()).thenReturn(root);
    when(slot.getAllocatedSlot()).thenReturn(allocatedSlot);
    when(root.getAllocatedSlot()).thenReturn(allocatedSlot);
    for (ScheduleMode mode : ScheduleMode.values()) {
        vertex.getExecutionGraph().setScheduleMode(mode);
        TaskDeploymentDescriptor tdd = vertex.createDeploymentDescriptor(new ExecutionAttemptID(), slot, null, 1);
        Collection<ResultPartitionDeploymentDescriptor> producedPartitions = tdd.getProducedPartitions();
        assertEquals(1, producedPartitions.size());
        ResultPartitionDeploymentDescriptor desc = producedPartitions.iterator().next();
        assertEquals(mode.allowLazyDeployment(), desc.sendScheduleOrUpdateConsumersMessage());
    }
}
Also used : AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionGraphTestUtils.getExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex) TestingUtils(org.apache.flink.runtime.testingUtils.TestingUtils) ScheduleMode(org.apache.flink.runtime.jobgraph.ScheduleMode) Slot(org.apache.flink.runtime.instance.Slot) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) Test(org.junit.Test)

Example 3 with Slot

use of org.apache.flink.runtime.instance.Slot in project flink by apache.

the class ExecutionGraphSchedulingTest method testExecutionJobVertexAllocateResourcesReleasesOnException.

/**
	 * Tests that the {@link ExecutionJobVertex#allocateResourcesForAll(SlotProvider, boolean)} method
	 * releases partially acquired resources upon exception.
	 */
@Test
public void testExecutionJobVertexAllocateResourcesReleasesOnException() throws Exception {
    final int parallelism = 8;
    final JobVertex vertex = new JobVertex("vertex");
    vertex.setParallelism(parallelism);
    vertex.setInvokableClass(NoOpInvokable.class);
    final JobID jobId = new JobID();
    final JobGraph jobGraph = new JobGraph(jobId, "test", vertex);
    // set up some available slots and some slot owner that accepts released slots back
    final List<SimpleSlot> returnedSlots = new ArrayList<>();
    final SlotOwner recycler = new SlotOwner() {

        @Override
        public boolean returnAllocatedSlot(Slot slot) {
            returnedSlots.add((SimpleSlot) slot);
            return true;
        }
    };
    // slot provider that hand out parallelism / 3 slots, then throws an exception
    final SlotProvider slotProvider = mock(SlotProvider.class);
    final TaskManagerGateway taskManager = mock(TaskManagerGateway.class);
    final List<SimpleSlot> availableSlots = new ArrayList<>(Arrays.asList(createSlot(taskManager, jobId, recycler), createSlot(taskManager, jobId, recycler), createSlot(taskManager, jobId, recycler)));
    when(slotProvider.allocateSlot(any(ScheduledUnit.class), anyBoolean())).then(new Answer<Future<SimpleSlot>>() {

        @Override
        public Future<SimpleSlot> answer(InvocationOnMock invocation) {
            if (availableSlots.isEmpty()) {
                throw new TestRuntimeException();
            } else {
                return FlinkCompletableFuture.completed(availableSlots.remove(0));
            }
        }
    });
    final ExecutionGraph eg = createExecutionGraph(jobGraph, slotProvider);
    final ExecutionJobVertex ejv = eg.getJobVertex(vertex.getID());
    // acquire resources and check that all are back after the failure
    final int numSlotsToExpectBack = availableSlots.size();
    try {
        ejv.allocateResourcesForAll(slotProvider, false);
        fail("should have failed with an exception");
    } catch (TestRuntimeException e) {
    // expected
    }
    assertEquals(numSlotsToExpectBack, returnedSlots.size());
}
Also used : SlotProvider(org.apache.flink.runtime.instance.SlotProvider) ArrayList(java.util.ArrayList) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) ScheduledUnit(org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) SlotOwner(org.apache.flink.runtime.jobmanager.slots.SlotOwner) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Slot(org.apache.flink.runtime.instance.Slot) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) Future(org.apache.flink.runtime.concurrent.Future) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with Slot

use of org.apache.flink.runtime.instance.Slot in project flink by apache.

the class ExecutionGraphSchedulingTest method testExecutionGraphScheduleReleasesResourcesOnException.

/**
	 * Tests that the {@link ExecutionGraph#scheduleForExecution()} method
	 * releases partially acquired resources upon exception.
	 */
@Test
public void testExecutionGraphScheduleReleasesResourcesOnException() throws Exception {
    //                                            [pipelined]
    //  we construct a simple graph    (source) ----------------> (target)
    final int parallelism = 3;
    final JobVertex sourceVertex = new JobVertex("source");
    sourceVertex.setParallelism(parallelism);
    sourceVertex.setInvokableClass(NoOpInvokable.class);
    final JobVertex targetVertex = new JobVertex("target");
    targetVertex.setParallelism(parallelism);
    targetVertex.setInvokableClass(NoOpInvokable.class);
    targetVertex.connectNewDataSetAsInput(sourceVertex, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    final JobID jobId = new JobID();
    final JobGraph jobGraph = new JobGraph(jobId, "test", sourceVertex, targetVertex);
    // set up some available slots and some slot owner that accepts released slots back
    final List<SimpleSlot> returnedSlots = new ArrayList<>();
    final SlotOwner recycler = new SlotOwner() {

        @Override
        public boolean returnAllocatedSlot(Slot slot) {
            returnedSlots.add((SimpleSlot) slot);
            return true;
        }
    };
    final TaskManagerGateway taskManager = mock(TaskManagerGateway.class);
    final List<SimpleSlot> availableSlots = new ArrayList<>(Arrays.asList(createSlot(taskManager, jobId, recycler), createSlot(taskManager, jobId, recycler), createSlot(taskManager, jobId, recycler), createSlot(taskManager, jobId, recycler), createSlot(taskManager, jobId, recycler)));
    // slot provider that hand out parallelism / 3 slots, then throws an exception
    final SlotProvider slotProvider = mock(SlotProvider.class);
    when(slotProvider.allocateSlot(any(ScheduledUnit.class), anyBoolean())).then(new Answer<Future<SimpleSlot>>() {

        @Override
        public Future<SimpleSlot> answer(InvocationOnMock invocation) {
            if (availableSlots.isEmpty()) {
                throw new TestRuntimeException();
            } else {
                return FlinkCompletableFuture.completed(availableSlots.remove(0));
            }
        }
    });
    final ExecutionGraph eg = createExecutionGraph(jobGraph, slotProvider);
    // acquire resources and check that all are back after the failure
    final int numSlotsToExpectBack = availableSlots.size();
    try {
        eg.setScheduleMode(ScheduleMode.EAGER);
        eg.scheduleForExecution();
        fail("should have failed with an exception");
    } catch (TestRuntimeException e) {
    // expected
    }
    assertEquals(numSlotsToExpectBack, returnedSlots.size());
}
Also used : SlotProvider(org.apache.flink.runtime.instance.SlotProvider) ArrayList(java.util.ArrayList) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) ScheduledUnit(org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) SlotOwner(org.apache.flink.runtime.jobmanager.slots.SlotOwner) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Slot(org.apache.flink.runtime.instance.Slot) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) Future(org.apache.flink.runtime.concurrent.Future) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with Slot

use of org.apache.flink.runtime.instance.Slot in project flink by apache.

the class ExecutionGraphSchedulingTest method testOneSlotFailureAbortsDeploy.

/**
	 * This test verifies that if one slot future fails, the deployment will be aborted.
	 */
@Test
public void testOneSlotFailureAbortsDeploy() throws Exception {
    //                                            [pipelined]
    //  we construct a simple graph    (source) ----------------> (target)
    final int parallelism = 6;
    final JobVertex sourceVertex = new JobVertex("source");
    sourceVertex.setParallelism(parallelism);
    sourceVertex.setInvokableClass(NoOpInvokable.class);
    final JobVertex targetVertex = new JobVertex("target");
    targetVertex.setParallelism(parallelism);
    targetVertex.setInvokableClass(NoOpInvokable.class);
    targetVertex.connectNewDataSetAsInput(sourceVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    final JobID jobId = new JobID();
    final JobGraph jobGraph = new JobGraph(jobId, "test", sourceVertex, targetVertex);
    //
    //  Create the slots, futures, and the slot provider
    final TaskManagerGateway taskManager = mock(TaskManagerGateway.class);
    final SlotOwner slotOwner = mock(SlotOwner.class);
    final SimpleSlot[] sourceSlots = new SimpleSlot[parallelism];
    final SimpleSlot[] targetSlots = new SimpleSlot[parallelism];
    @SuppressWarnings({ "unchecked", "rawtypes" }) final FlinkCompletableFuture<SimpleSlot>[] sourceFutures = new FlinkCompletableFuture[parallelism];
    @SuppressWarnings({ "unchecked", "rawtypes" }) final FlinkCompletableFuture<SimpleSlot>[] targetFutures = new FlinkCompletableFuture[parallelism];
    for (int i = 0; i < parallelism; i++) {
        sourceSlots[i] = createSlot(taskManager, jobId, slotOwner);
        targetSlots[i] = createSlot(taskManager, jobId, slotOwner);
        sourceFutures[i] = new FlinkCompletableFuture<>();
        targetFutures[i] = new FlinkCompletableFuture<>();
    }
    ProgrammedSlotProvider slotProvider = new ProgrammedSlotProvider(parallelism);
    slotProvider.addSlots(sourceVertex.getID(), sourceFutures);
    slotProvider.addSlots(targetVertex.getID(), targetFutures);
    final ExecutionGraph eg = createExecutionGraph(jobGraph, slotProvider);
    TerminalJobStatusListener testListener = new TerminalJobStatusListener();
    eg.registerJobStatusListener(testListener);
    for (int i = 0; i < parallelism; i += 2) {
        sourceFutures[i].complete(sourceSlots[i]);
        targetFutures[i + 1].complete(targetSlots[i + 1]);
    }
    //
    //  kick off the scheduling
    eg.setScheduleMode(ScheduleMode.EAGER);
    eg.setQueuedSchedulingAllowed(true);
    eg.scheduleForExecution();
    // fail one slot
    sourceFutures[1].completeExceptionally(new TestRuntimeException());
    // wait until the job failed as a whole
    testListener.waitForTerminalState(2000);
    // wait until all slots are back
    verify(slotOwner, new Timeout(2000, times(6))).returnAllocatedSlot(any(Slot.class));
    // no deployment calls must have happened
    verify(taskManager, times(0)).submitTask(any(TaskDeploymentDescriptor.class), any(Time.class));
    // all completed futures must have been returns
    for (int i = 0; i < parallelism; i += 2) {
        assertTrue(sourceSlots[i].isCanceled());
        assertTrue(targetSlots[i + 1].isCanceled());
    }
}
Also used : Timeout(org.mockito.verification.Timeout) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) Time(org.apache.flink.api.common.time.Time) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) SlotOwner(org.apache.flink.runtime.jobmanager.slots.SlotOwner) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Slot(org.apache.flink.runtime.instance.Slot) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

Slot (org.apache.flink.runtime.instance.Slot)7 AllocatedSlot (org.apache.flink.runtime.jobmanager.slots.AllocatedSlot)7 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)6 Test (org.junit.Test)6 FlinkCompletableFuture (org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)5 TaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway)5 JobID (org.apache.flink.api.common.JobID)4 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)4 SlotOwner (org.apache.flink.runtime.jobmanager.slots.SlotOwner)4 ArrayList (java.util.ArrayList)3 Time (org.apache.flink.api.common.time.Time)3 ScheduledUnit (org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit)3 IOException (java.io.IOException)2 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)2 Future (org.apache.flink.runtime.concurrent.Future)2 SlotProvider (org.apache.flink.runtime.instance.SlotProvider)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1