Search in sources :

Example 6 with Slot

use of org.apache.flink.runtime.instance.Slot in project flink by apache.

the class JobMaster method requestNextInputSplit.

@RpcMethod
public SerializedInputSplit requestNextInputSplit(final UUID leaderSessionID, final JobVertexID vertexID, final ExecutionAttemptID executionAttempt) throws Exception {
    validateLeaderSessionId(leaderSessionID);
    final Execution execution = executionGraph.getRegisteredExecutions().get(executionAttempt);
    if (execution == null) {
        // but TaskManager get some delay to aware of that situation
        if (log.isDebugEnabled()) {
            log.debug("Can not find Execution for attempt {}.", executionAttempt);
        }
        // but we should TaskManager be aware of this
        throw new Exception("Can not find Execution for attempt " + executionAttempt);
    }
    final ExecutionJobVertex vertex = executionGraph.getJobVertex(vertexID);
    if (vertex == null) {
        log.error("Cannot find execution vertex for vertex ID {}.", vertexID);
        throw new Exception("Cannot find execution vertex for vertex ID " + vertexID);
    }
    final InputSplitAssigner splitAssigner = vertex.getSplitAssigner();
    if (splitAssigner == null) {
        log.error("No InputSplitAssigner for vertex ID {}.", vertexID);
        throw new Exception("No InputSplitAssigner for vertex ID " + vertexID);
    }
    final Slot slot = execution.getAssignedResource();
    final int taskId = execution.getVertex().getParallelSubtaskIndex();
    final String host = slot != null ? slot.getTaskManagerLocation().getHostname() : null;
    final InputSplit nextInputSplit = splitAssigner.getNextInputSplit(host, taskId);
    if (log.isDebugEnabled()) {
        log.debug("Send next input split {}.", nextInputSplit);
    }
    try {
        final byte[] serializedInputSplit = InstantiationUtil.serializeObject(nextInputSplit);
        return new SerializedInputSplit(serializedInputSplit);
    } catch (Exception ex) {
        log.error("Could not serialize the next input split of class {}.", nextInputSplit.getClass(), ex);
        IOException reason = new IOException("Could not serialize the next input split of class " + nextInputSplit.getClass() + ".", ex);
        vertex.fail(reason);
        throw reason;
    }
}
Also used : InputSplitAssigner(org.apache.flink.core.io.InputSplitAssigner) Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Slot(org.apache.flink.runtime.instance.Slot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) IOException(java.io.IOException) InputSplit(org.apache.flink.core.io.InputSplit) TimeoutException(java.util.concurrent.TimeoutException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) LeaderIdMismatchException(org.apache.flink.runtime.highavailability.LeaderIdMismatchException) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) RpcEndpoint(org.apache.flink.runtime.rpc.RpcEndpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) RpcMethod(org.apache.flink.runtime.rpc.RpcMethod)

Example 7 with Slot

use of org.apache.flink.runtime.instance.Slot in project flink by apache.

the class ExecutionGraphSchedulingTest method testTimeoutForSlotAllocation.

/**
	 * This test verifies that the slot allocations times out after a certain time, and that
	 * all slots are released in that case.
	 */
@Test
public void testTimeoutForSlotAllocation() throws Exception {
    //  we construct a simple graph:    (task)
    final int parallelism = 3;
    final JobVertex vertex = new JobVertex("task");
    vertex.setParallelism(parallelism);
    vertex.setInvokableClass(NoOpInvokable.class);
    final JobID jobId = new JobID();
    final JobGraph jobGraph = new JobGraph(jobId, "test", vertex);
    final SlotOwner slotOwner = mock(SlotOwner.class);
    final TaskManagerGateway taskManager = mock(TaskManagerGateway.class);
    final SimpleSlot[] slots = new SimpleSlot[parallelism];
    @SuppressWarnings({ "unchecked", "rawtypes" }) final FlinkCompletableFuture<SimpleSlot>[] slotFutures = new FlinkCompletableFuture[parallelism];
    for (int i = 0; i < parallelism; i++) {
        slots[i] = createSlot(taskManager, jobId, slotOwner);
        slotFutures[i] = new FlinkCompletableFuture<>();
    }
    ProgrammedSlotProvider slotProvider = new ProgrammedSlotProvider(parallelism);
    slotProvider.addSlots(vertex.getID(), slotFutures);
    final ExecutionGraph eg = createExecutionGraph(jobGraph, slotProvider, Time.milliseconds(20));
    final TerminalJobStatusListener statusListener = new TerminalJobStatusListener();
    eg.registerJobStatusListener(statusListener);
    //  we complete one future
    slotFutures[1].complete(slots[1]);
    //  kick off the scheduling
    eg.setScheduleMode(ScheduleMode.EAGER);
    eg.setQueuedSchedulingAllowed(true);
    eg.scheduleForExecution();
    //  we complete another future
    slotFutures[2].complete(slots[2]);
    // since future[0] is still missing the while operation must time out
    // we have no restarts allowed, so the job will go terminal
    statusListener.waitForTerminalState(2000);
    // wait until all slots are back
    verify(slotOwner, new Timeout(2000, times(2))).returnAllocatedSlot(any(Slot.class));
    //  verify that no deployments have happened
    verify(taskManager, times(0)).submitTask(any(TaskDeploymentDescriptor.class), any(Time.class));
    for (Future<SimpleSlot> future : slotFutures) {
        if (future.isDone()) {
            assertTrue(future.get().isCanceled());
        }
    }
}
Also used : Timeout(org.mockito.verification.Timeout) TaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway) Time(org.apache.flink.api.common.time.Time) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) SlotOwner(org.apache.flink.runtime.jobmanager.slots.SlotOwner) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Slot(org.apache.flink.runtime.instance.Slot) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) AllocatedSlot(org.apache.flink.runtime.jobmanager.slots.AllocatedSlot) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

Slot (org.apache.flink.runtime.instance.Slot)7 AllocatedSlot (org.apache.flink.runtime.jobmanager.slots.AllocatedSlot)7 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)6 Test (org.junit.Test)6 FlinkCompletableFuture (org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)5 TaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway)5 JobID (org.apache.flink.api.common.JobID)4 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)4 SlotOwner (org.apache.flink.runtime.jobmanager.slots.SlotOwner)4 ArrayList (java.util.ArrayList)3 Time (org.apache.flink.api.common.time.Time)3 ScheduledUnit (org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit)3 IOException (java.io.IOException)2 AllocationID (org.apache.flink.runtime.clusterframework.types.AllocationID)2 Future (org.apache.flink.runtime.concurrent.Future)2 SlotProvider (org.apache.flink.runtime.instance.SlotProvider)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1