use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class SchedulerIsolatedTasksTest method testScheduleWithDyingInstances.
@Test
public void testScheduleWithDyingInstances() {
try {
Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
Instance i1 = getRandomInstance(2);
Instance i2 = getRandomInstance(2);
Instance i3 = getRandomInstance(1);
scheduler.newInstanceAvailable(i1);
scheduler.newInstanceAvailable(i2);
scheduler.newInstanceAvailable(i3);
List<SimpleSlot> slots = new ArrayList<SimpleSlot>();
slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
i2.markDead();
for (SimpleSlot slot : slots) {
if (slot.getOwner() == i2) {
assertTrue(slot.isCanceled());
} else {
assertFalse(slot.isCanceled());
}
slot.releaseSlot();
}
assertEquals(3, scheduler.getNumberOfAvailableSlots());
i1.markDead();
i3.markDead();
// cannot get another slot, since all instances are dead
try {
scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
fail("Scheduler served a slot from a dead instance");
} catch (ExecutionException e) {
assertTrue(e.getCause() instanceof NoResourceAvailableException);
} catch (Exception e) {
fail("Wrong exception type.");
}
// now the latest, the scheduler should have noticed (through the lazy mechanisms)
// that all instances have vanished
assertEquals(0, scheduler.getNumberOfInstancesWithAvailableSlots());
assertEquals(0, scheduler.getNumberOfAvailableSlots());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class SchedulerIsolatedTasksTest method testScheduleImmediately.
@Test
public void testScheduleImmediately() {
try {
Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
assertEquals(0, scheduler.getNumberOfAvailableSlots());
scheduler.newInstanceAvailable(getRandomInstance(2));
scheduler.newInstanceAvailable(getRandomInstance(1));
scheduler.newInstanceAvailable(getRandomInstance(2));
assertEquals(5, scheduler.getNumberOfAvailableSlots());
// schedule something into all slots
SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
SimpleSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
SimpleSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
SimpleSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
SimpleSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
// the slots should all be different
assertTrue(areAllDistinct(s1, s2, s3, s4, s5));
try {
scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
fail("Scheduler accepted scheduling request without available resource.");
} catch (ExecutionException e) {
assertTrue(e.getCause() instanceof NoResourceAvailableException);
}
// release some slots again
s3.releaseSlot();
s4.releaseSlot();
assertEquals(2, scheduler.getNumberOfAvailableSlots());
// now we can schedule some more slots
SimpleSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
SimpleSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
assertTrue(areAllDistinct(s1, s2, s3, s4, s5, s6, s7));
// release all
s1.releaseSlot();
s2.releaseSlot();
s5.releaseSlot();
s6.releaseSlot();
s7.releaseSlot();
assertEquals(5, scheduler.getNumberOfAvailableSlots());
// check that slots that are released twice (accidentally) do not mess things up
s1.releaseSlot();
s2.releaseSlot();
s5.releaseSlot();
s6.releaseSlot();
s7.releaseSlot();
assertEquals(5, scheduler.getNumberOfAvailableSlots());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class SchedulerSlotSharingTest method testSequentialAllocateAndRelease.
@Test
public void testSequentialAllocateAndRelease() {
try {
final JobVertexID jid1 = new JobVertexID();
final JobVertexID jid2 = new JobVertexID();
final JobVertexID jid3 = new JobVertexID();
final JobVertexID jid4 = new JobVertexID();
final SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2, jid3, jid4);
final Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
scheduler.newInstanceAvailable(getRandomInstance(4));
// allocate something from group 1 and 2 interleaved with schedule for group 3
SimpleSlot slot_1_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), sharingGroup), false).get();
SimpleSlot slot_1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), sharingGroup), false).get();
SimpleSlot slot_2_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4), sharingGroup), false).get();
SimpleSlot slot_2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4), sharingGroup), false).get();
SimpleSlot slot_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup), false).get();
SimpleSlot slot_1_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false).get();
SimpleSlot slot_1_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), sharingGroup), false).get();
SimpleSlot slot_2_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4), sharingGroup), false).get();
SimpleSlot slot_2_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4), sharingGroup), false).get();
// release groups 1 and 2
slot_1_1.releaseSlot();
slot_1_2.releaseSlot();
slot_1_3.releaseSlot();
slot_1_4.releaseSlot();
slot_2_1.releaseSlot();
slot_2_2.releaseSlot();
slot_2_3.releaseSlot();
slot_2_4.releaseSlot();
// allocate group 4
SimpleSlot slot_4_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4), sharingGroup), false).get();
SimpleSlot slot_4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4), sharingGroup), false).get();
SimpleSlot slot_4_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4), sharingGroup), false).get();
SimpleSlot slot_4_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4), sharingGroup), false).get();
// release groups 3 and 4
slot_3.releaseSlot();
slot_4_1.releaseSlot();
slot_4_2.releaseSlot();
slot_4_3.releaseSlot();
slot_4_4.releaseSlot();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class ExecutionGraph method scheduleEager.
/**
*
*
* @param slotProvider The resource provider from which the slots are allocated
* @param timeout The maximum time that the deployment may take, before a
* TimeoutException is thrown.
*/
private void scheduleEager(SlotProvider slotProvider, final Time timeout) {
checkState(state == JobStatus.RUNNING, "job is not running currently");
// Important: reserve all the space we need up front.
// that way we do not have any operation that can fail between allocating the slots
// and adding them to the list. If we had a failure in between there, that would
// cause the slots to get lost
final ArrayList<ExecutionAndSlot[]> resources = new ArrayList<>(getNumberOfExecutionJobVertices());
final boolean queued = allowQueuedScheduling;
// we use this flag to handle failures in a 'finally' clause
// that allows us to not go through clumsy cast-and-rethrow logic
boolean successful = false;
try {
// collecting all the slots may resize and fail in that operation without slots getting lost
final ArrayList<Future<SimpleSlot>> slotFutures = new ArrayList<>(getNumberOfExecutionJobVertices());
// allocate the slots (obtain all their futures
for (ExecutionJobVertex ejv : getVerticesTopologically()) {
// these calls are not blocking, they only return futures
ExecutionAndSlot[] slots = ejv.allocateResourcesForAll(slotProvider, queued);
// we need to first add the slots to this list, to be safe on release
resources.add(slots);
for (ExecutionAndSlot ens : slots) {
slotFutures.add(ens.slotFuture);
}
}
// this future is complete once all slot futures are complete.
// the future fails once one slot future fails.
final ConjunctFuture allAllocationsComplete = FutureUtils.combineAll(slotFutures);
// make sure that we fail if the allocation timeout was exceeded
final ScheduledFuture<?> timeoutCancelHandle = futureExecutor.schedule(new Runnable() {
@Override
public void run() {
// When the timeout triggers, we try to complete the conjunct future with an exception.
// Note that this is a no-op if the future is already completed
int numTotal = allAllocationsComplete.getNumFuturesTotal();
int numComplete = allAllocationsComplete.getNumFuturesCompleted();
String message = "Could not allocate all requires slots within timeout of " + timeout + ". Slots required: " + numTotal + ", slots allocated: " + numComplete;
allAllocationsComplete.completeExceptionally(new NoResourceAvailableException(message));
}
}, timeout.getSize(), timeout.getUnit());
allAllocationsComplete.handleAsync(new BiFunction<Void, Throwable, Void>() {
@Override
public Void apply(Void ignored, Throwable throwable) {
try {
// we do not need the cancellation timeout any more
timeoutCancelHandle.cancel(false);
if (throwable == null) {
for (ExecutionAndSlot[] jobVertexTasks : resources) {
for (ExecutionAndSlot execAndSlot : jobVertexTasks) {
// the futures must all be ready - this is simply a sanity check
final SimpleSlot slot;
try {
slot = execAndSlot.slotFuture.getNow(null);
checkNotNull(slot);
} catch (ExecutionException | NullPointerException e) {
throw new IllegalStateException("SlotFuture is incomplete " + "or erroneous even though all futures completed");
}
// actual deployment
execAndSlot.executionAttempt.deployToSlot(slot);
}
}
} else {
// let the exception handler deal with this
throw throwable;
}
} catch (Throwable t) {
// we need to go into recovery and make sure to release all slots
try {
fail(t);
} finally {
ExecutionGraphUtils.releaseAllSlotsSilently(resources);
}
}
// return (Void) Unsafe.getUnsafe().allocateInstance(Void.class);
return null;
}
}, futureExecutor);
// from now on, slots will be rescued by the the futures and their completion, or by the timeout
successful = true;
} finally {
if (!successful) {
// we come here only if the 'try' block finished with an exception
// we release the slots (possibly failing some executions on the way) and
// let the exception bubble up
ExecutionGraphUtils.releaseAllSlotsSilently(resources);
}
}
}
use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.
the class ExecutionVertex method getPreferredLocationsBasedOnInputs.
/**
* Gets the location preferences of the vertex's current task execution, as determined by the locations
* of the predecessors from which it receives input data.
* If there are more than MAX_DISTINCT_LOCATIONS_TO_CONSIDER different locations of source data, this
* method returns {@code null} to indicate no location preference.
*
* @return The preferred locations based in input streams, or an empty iterable,
* if there is no input-based preference.
*/
public Iterable<TaskManagerLocation> getPreferredLocationsBasedOnInputs() {
// otherwise, base the preferred locations on the input connections
if (inputEdges == null) {
return Collections.emptySet();
} else {
Set<TaskManagerLocation> locations = new HashSet<>();
Set<TaskManagerLocation> inputLocations = new HashSet<>();
// go over all inputs
for (int i = 0; i < inputEdges.length; i++) {
inputLocations.clear();
ExecutionEdge[] sources = inputEdges[i];
if (sources != null) {
// go over all input sources
for (int k = 0; k < sources.length; k++) {
// look-up assigned slot of input source
SimpleSlot sourceSlot = sources[k].getSource().getProducer().getCurrentAssignedResource();
if (sourceSlot != null) {
// add input location
inputLocations.add(sourceSlot.getTaskManagerLocation());
// inputs which have too many distinct sources are not considered
if (inputLocations.size() > MAX_DISTINCT_LOCATIONS_TO_CONSIDER) {
inputLocations.clear();
break;
}
}
}
}
// keep the locations of the input with the least preferred locations
if (// nothing assigned yet
locations.isEmpty() || (!inputLocations.isEmpty() && inputLocations.size() < locations.size())) {
// current input has fewer preferred locations
locations.clear();
locations.addAll(inputLocations);
}
}
return locations.isEmpty() ? Collections.<TaskManagerLocation>emptyList() : locations;
}
}
Aggregations