use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class Scheduler method getFreeSlotForTask.
/**
* Gets a suitable instance to schedule the vertex execution to.
* <p>
* NOTE: This method does is not thread-safe, it needs to be synchronized by the caller.
*
* @param vertex The task to run.
* @return The instance to run the vertex on, it {@code null}, if no instance is available.
*/
protected SimpleSlot getFreeSlotForTask(ExecutionVertex vertex, Iterable<TaskManagerLocation> requestedLocations, boolean localOnly) {
// in the set-with-available-instances
while (true) {
Pair<Instance, Locality> instanceLocalityPair = findInstance(requestedLocations, localOnly);
if (instanceLocalityPair == null) {
return null;
}
Instance instanceToUse = instanceLocalityPair.getLeft();
Locality locality = instanceLocalityPair.getRight();
try {
SimpleSlot slot = instanceToUse.allocateSimpleSlot(vertex.getJobId());
// if the instance has further available slots, re-add it to the set of available resources.
if (instanceToUse.hasResourcesAvailable()) {
this.instancesWithAvailableResources.put(instanceToUse.getTaskManagerID(), instanceToUse);
}
if (slot != null) {
slot.setLocality(locality);
return slot;
}
} catch (InstanceDiedException e) {
// the instance died it has not yet been propagated to this scheduler
// remove the instance from the set of available instances
removeInstance(instanceToUse);
}
// if we failed to get a slot, fall through the loop
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class ExecutionGraphSchedulingTest method createSlot.
private SimpleSlot createSlot(TaskManagerGateway taskManager, JobID jobId, SlotOwner slotOwner) {
TaskManagerLocation location = new TaskManagerLocation(ResourceID.generate(), InetAddress.getLoopbackAddress(), 12345);
AllocatedSlot slot = new AllocatedSlot(new AllocationID(), jobId, location, 0, ResourceProfile.UNKNOWN, taskManager);
return new SimpleSlot(slot, slotOwner, 0);
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class ExecutionGraphSchedulingTest method testTimeoutForSlotAllocation.
/**
* This test verifies that the slot allocations times out after a certain time, and that
* all slots are released in that case.
*/
@Test
public void testTimeoutForSlotAllocation() throws Exception {
// we construct a simple graph: (task)
final int parallelism = 3;
final JobVertex vertex = new JobVertex("task");
vertex.setParallelism(parallelism);
vertex.setInvokableClass(NoOpInvokable.class);
final JobID jobId = new JobID();
final JobGraph jobGraph = new JobGraph(jobId, "test", vertex);
final SlotOwner slotOwner = mock(SlotOwner.class);
final TaskManagerGateway taskManager = mock(TaskManagerGateway.class);
final SimpleSlot[] slots = new SimpleSlot[parallelism];
@SuppressWarnings({ "unchecked", "rawtypes" }) final FlinkCompletableFuture<SimpleSlot>[] slotFutures = new FlinkCompletableFuture[parallelism];
for (int i = 0; i < parallelism; i++) {
slots[i] = createSlot(taskManager, jobId, slotOwner);
slotFutures[i] = new FlinkCompletableFuture<>();
}
ProgrammedSlotProvider slotProvider = new ProgrammedSlotProvider(parallelism);
slotProvider.addSlots(vertex.getID(), slotFutures);
final ExecutionGraph eg = createExecutionGraph(jobGraph, slotProvider, Time.milliseconds(20));
final TerminalJobStatusListener statusListener = new TerminalJobStatusListener();
eg.registerJobStatusListener(statusListener);
// we complete one future
slotFutures[1].complete(slots[1]);
// kick off the scheduling
eg.setScheduleMode(ScheduleMode.EAGER);
eg.setQueuedSchedulingAllowed(true);
eg.scheduleForExecution();
// we complete another future
slotFutures[2].complete(slots[2]);
// since future[0] is still missing the while operation must time out
// we have no restarts allowed, so the job will go terminal
statusListener.waitForTerminalState(2000);
// wait until all slots are back
verify(slotOwner, new Timeout(2000, times(2))).returnAllocatedSlot(any(Slot.class));
// verify that no deployments have happened
verify(taskManager, times(0)).submitTask(any(TaskDeploymentDescriptor.class), any(Time.class));
for (Future<SimpleSlot> future : slotFutures) {
if (future.isDone()) {
assertTrue(future.get().isCanceled());
}
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class ExecutionVertexCancelTest method testCancelConcurrentlyToDeploying_CallsOvertaking.
@Test
public void testCancelConcurrentlyToDeploying_CallsOvertaking() {
try {
final JobVertexID jid = new JobVertexID();
final TestingUtils.QueuedActionExecutionContext executionContext = TestingUtils.queuedActionExecutionContext();
final TestingUtils.ActionQueue actions = executionContext.actionQueue();
final ExecutionJobVertex ejv = getExecutionVertex(jid, executionContext);
final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
setVertexState(vertex, ExecutionState.SCHEDULED);
assertEquals(ExecutionState.SCHEDULED, vertex.getExecutionState());
// task manager cancel sequence mock actor
// first return NOT SUCCESS (task not found, cancel call overtook deploy call), then success (cancel call after deploy call)
ActorGateway actorGateway = new CancelSequenceActorGateway(executionContext, 2);
Instance instance = getInstance(new ActorTaskManagerGateway(actorGateway));
SimpleSlot slot = instance.allocateSimpleSlot(new JobID());
vertex.deployToSlot(slot);
assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
vertex.cancel();
assertEquals(ExecutionState.CANCELING, vertex.getExecutionState());
// first action happens (deploy)
Runnable deployAction = actions.popNextAction();
Runnable cancelAction = actions.popNextAction();
// cancel call first
cancelAction.run();
// process onComplete callback
actions.triggerNextAction();
// did not find the task, not properly cancelled, stay in canceling
assertEquals(ExecutionState.CANCELING, vertex.getExecutionState());
// deploy action next
deployAction.run();
// the deploy call found itself in canceling after it returned and needs to send a cancel call
// the call did not yet execute, so it is still in canceling
assertEquals(ExecutionState.CANCELING, vertex.getExecutionState());
vertex.getCurrentExecutionAttempt().cancelingComplete();
assertEquals(ExecutionState.CANCELED, vertex.getExecutionState());
assertTrue(slot.isReleased());
assertNull(vertex.getFailureCause());
assertTrue(vertex.getStateTimestamp(ExecutionState.CREATED) > 0);
assertTrue(vertex.getStateTimestamp(ExecutionState.CANCELING) > 0);
assertTrue(vertex.getStateTimestamp(ExecutionState.CANCELED) > 0);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class ExecutionVertexCancelTest method testActionsWhileCancelling.
@Test
public void testActionsWhileCancelling() {
try {
final JobVertexID jid = new JobVertexID();
final ExecutionJobVertex ejv = getExecutionVertex(jid);
// scheduling while canceling is an illegal state transition
try {
ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
setVertexState(vertex, ExecutionState.CANCELING);
Scheduler scheduler = mock(Scheduler.class);
vertex.scheduleForExecution(scheduler, false);
} catch (Exception e) {
fail("should not throw an exception");
}
// deploying while in canceling state is illegal (should immediately go to canceled)
try {
ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
setVertexState(vertex, ExecutionState.CANCELING);
Instance instance = getInstance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE));
SimpleSlot slot = instance.allocateSimpleSlot(new JobID());
vertex.deployToSlot(slot);
fail("Method should throw an exception");
} catch (IllegalStateException e) {
// that is what we expect
}
// fail while canceling
{
ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
Instance instance = getInstance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE));
SimpleSlot slot = instance.allocateSimpleSlot(new JobID());
setVertexResource(vertex, slot);
setVertexState(vertex, ExecutionState.CANCELING);
Exception failureCause = new Exception("test exception");
vertex.fail(failureCause);
assertEquals(ExecutionState.CANCELED, vertex.getExecutionState());
assertTrue(slot.isReleased());
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations