Search in sources :

Example 41 with SimpleSlot

use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.

the class SchedulerIsolatedTasksTest method testScheduleWithDyingInstances.

@Test
public void testScheduleWithDyingInstances() {
    try {
        Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
        Instance i1 = getRandomInstance(2);
        Instance i2 = getRandomInstance(2);
        Instance i3 = getRandomInstance(1);
        scheduler.newInstanceAvailable(i1);
        scheduler.newInstanceAvailable(i2);
        scheduler.newInstanceAvailable(i3);
        List<SimpleSlot> slots = new ArrayList<SimpleSlot>();
        slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
        slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
        slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
        slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
        slots.add(scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get());
        i2.markDead();
        for (SimpleSlot slot : slots) {
            if (slot.getOwner() == i2) {
                assertTrue(slot.isCanceled());
            } else {
                assertFalse(slot.isCanceled());
            }
            slot.releaseSlot();
        }
        assertEquals(3, scheduler.getNumberOfAvailableSlots());
        i1.markDead();
        i3.markDead();
        // cannot get another slot, since all instances are dead
        try {
            scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
            fail("Scheduler served a slot from a dead instance");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof NoResourceAvailableException);
        } catch (Exception e) {
            fail("Wrong exception type.");
        }
        // now the latest, the scheduler should have noticed (through the lazy mechanisms)
        // that all instances have vanished
        assertEquals(0, scheduler.getNumberOfInstancesWithAvailableSlots());
        assertEquals(0, scheduler.getNumberOfAvailableSlots());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Instance(org.apache.flink.runtime.instance.Instance) SchedulerTestUtils.getRandomInstance(org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance) ArrayList(java.util.ArrayList) ExecutionException(java.util.concurrent.ExecutionException) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 42 with SimpleSlot

use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.

the class SchedulerIsolatedTasksTest method testScheduleImmediately.

@Test
public void testScheduleImmediately() {
    try {
        Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
        assertEquals(0, scheduler.getNumberOfAvailableSlots());
        scheduler.newInstanceAvailable(getRandomInstance(2));
        scheduler.newInstanceAvailable(getRandomInstance(1));
        scheduler.newInstanceAvailable(getRandomInstance(2));
        assertEquals(5, scheduler.getNumberOfAvailableSlots());
        // schedule something into all slots
        SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
        SimpleSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
        SimpleSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
        SimpleSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
        SimpleSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
        // the slots should all be different
        assertTrue(areAllDistinct(s1, s2, s3, s4, s5));
        try {
            scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
            fail("Scheduler accepted scheduling request without available resource.");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof NoResourceAvailableException);
        }
        // release some slots again
        s3.releaseSlot();
        s4.releaseSlot();
        assertEquals(2, scheduler.getNumberOfAvailableSlots());
        // now we can schedule some more slots
        SimpleSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
        SimpleSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getDummyTask()), false).get();
        assertTrue(areAllDistinct(s1, s2, s3, s4, s5, s6, s7));
        // release all
        s1.releaseSlot();
        s2.releaseSlot();
        s5.releaseSlot();
        s6.releaseSlot();
        s7.releaseSlot();
        assertEquals(5, scheduler.getNumberOfAvailableSlots());
        // check that slots that are released twice (accidentally) do not mess things up
        s1.releaseSlot();
        s2.releaseSlot();
        s5.releaseSlot();
        s6.releaseSlot();
        s7.releaseSlot();
        assertEquals(5, scheduler.getNumberOfAvailableSlots());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionException(java.util.concurrent.ExecutionException) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 43 with SimpleSlot

use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.

the class SchedulerSlotSharingTest method testSequentialAllocateAndRelease.

@Test
public void testSequentialAllocateAndRelease() {
    try {
        final JobVertexID jid1 = new JobVertexID();
        final JobVertexID jid2 = new JobVertexID();
        final JobVertexID jid3 = new JobVertexID();
        final JobVertexID jid4 = new JobVertexID();
        final SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2, jid3, jid4);
        final Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
        scheduler.newInstanceAvailable(getRandomInstance(4));
        // allocate something from group 1 and 2 interleaved with schedule for group 3
        SimpleSlot slot_1_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), sharingGroup), false).get();
        SimpleSlot slot_1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), sharingGroup), false).get();
        SimpleSlot slot_2_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 4), sharingGroup), false).get();
        SimpleSlot slot_2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 4), sharingGroup), false).get();
        SimpleSlot slot_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup), false).get();
        SimpleSlot slot_1_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false).get();
        SimpleSlot slot_1_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), sharingGroup), false).get();
        SimpleSlot slot_2_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 4), sharingGroup), false).get();
        SimpleSlot slot_2_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 4), sharingGroup), false).get();
        // release groups 1 and 2
        slot_1_1.releaseSlot();
        slot_1_2.releaseSlot();
        slot_1_3.releaseSlot();
        slot_1_4.releaseSlot();
        slot_2_1.releaseSlot();
        slot_2_2.releaseSlot();
        slot_2_3.releaseSlot();
        slot_2_4.releaseSlot();
        // allocate group 4
        SimpleSlot slot_4_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 0, 4), sharingGroup), false).get();
        SimpleSlot slot_4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 1, 4), sharingGroup), false).get();
        SimpleSlot slot_4_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 2, 4), sharingGroup), false).get();
        SimpleSlot slot_4_4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, 3, 4), sharingGroup), false).get();
        // release groups 3 and 4
        slot_3.releaseSlot();
        slot_4_1.releaseSlot();
        slot_4_2.releaseSlot();
        slot_4_3.releaseSlot();
        slot_4_4.releaseSlot();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 44 with SimpleSlot

use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.

the class ExecutionGraph method scheduleEager.

/**
	 * 
	 * 
	 * @param slotProvider  The resource provider from which the slots are allocated
	 * @param timeout       The maximum time that the deployment may take, before a
	 *                      TimeoutException is thrown.
	 */
private void scheduleEager(SlotProvider slotProvider, final Time timeout) {
    checkState(state == JobStatus.RUNNING, "job is not running currently");
    // Important: reserve all the space we need up front.
    // that way we do not have any operation that can fail between allocating the slots
    // and adding them to the list. If we had a failure in between there, that would
    // cause the slots to get lost
    final ArrayList<ExecutionAndSlot[]> resources = new ArrayList<>(getNumberOfExecutionJobVertices());
    final boolean queued = allowQueuedScheduling;
    // we use this flag to handle failures in a 'finally' clause
    // that allows us to not go through clumsy cast-and-rethrow logic
    boolean successful = false;
    try {
        // collecting all the slots may resize and fail in that operation without slots getting lost
        final ArrayList<Future<SimpleSlot>> slotFutures = new ArrayList<>(getNumberOfExecutionJobVertices());
        // allocate the slots (obtain all their futures
        for (ExecutionJobVertex ejv : getVerticesTopologically()) {
            // these calls are not blocking, they only return futures
            ExecutionAndSlot[] slots = ejv.allocateResourcesForAll(slotProvider, queued);
            // we need to first add the slots to this list, to be safe on release
            resources.add(slots);
            for (ExecutionAndSlot ens : slots) {
                slotFutures.add(ens.slotFuture);
            }
        }
        // this future is complete once all slot futures are complete.
        // the future fails once one slot future fails.
        final ConjunctFuture allAllocationsComplete = FutureUtils.combineAll(slotFutures);
        // make sure that we fail if the allocation timeout was exceeded
        final ScheduledFuture<?> timeoutCancelHandle = futureExecutor.schedule(new Runnable() {

            @Override
            public void run() {
                // When the timeout triggers, we try to complete the conjunct future with an exception.
                // Note that this is a no-op if the future is already completed
                int numTotal = allAllocationsComplete.getNumFuturesTotal();
                int numComplete = allAllocationsComplete.getNumFuturesCompleted();
                String message = "Could not allocate all requires slots within timeout of " + timeout + ". Slots required: " + numTotal + ", slots allocated: " + numComplete;
                allAllocationsComplete.completeExceptionally(new NoResourceAvailableException(message));
            }
        }, timeout.getSize(), timeout.getUnit());
        allAllocationsComplete.handleAsync(new BiFunction<Void, Throwable, Void>() {

            @Override
            public Void apply(Void ignored, Throwable throwable) {
                try {
                    // we do not need the cancellation timeout any more
                    timeoutCancelHandle.cancel(false);
                    if (throwable == null) {
                        for (ExecutionAndSlot[] jobVertexTasks : resources) {
                            for (ExecutionAndSlot execAndSlot : jobVertexTasks) {
                                // the futures must all be ready - this is simply a sanity check
                                final SimpleSlot slot;
                                try {
                                    slot = execAndSlot.slotFuture.getNow(null);
                                    checkNotNull(slot);
                                } catch (ExecutionException | NullPointerException e) {
                                    throw new IllegalStateException("SlotFuture is incomplete " + "or erroneous even though all futures completed");
                                }
                                // actual deployment
                                execAndSlot.executionAttempt.deployToSlot(slot);
                            }
                        }
                    } else {
                        // let the exception handler deal with this
                        throw throwable;
                    }
                } catch (Throwable t) {
                    // we need to go into recovery and make sure to release all slots
                    try {
                        fail(t);
                    } finally {
                        ExecutionGraphUtils.releaseAllSlotsSilently(resources);
                    }
                }
                // return (Void) Unsafe.getUnsafe().allocateInstance(Void.class);
                return null;
            }
        }, futureExecutor);
        // from now on, slots will be rescued by the the futures and their completion, or by the timeout
        successful = true;
    } finally {
        if (!successful) {
            // we come here only if the 'try' block finished with an exception
            // we release the slots (possibly failing some executions on the way) and
            // let the exception bubble up
            ExecutionGraphUtils.releaseAllSlotsSilently(resources);
        }
    }
}
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) ConjunctFuture(org.apache.flink.runtime.concurrent.FutureUtils.ConjunctFuture) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ScheduledFuture(java.util.concurrent.ScheduledFuture) Future(org.apache.flink.runtime.concurrent.Future) ConjunctFuture(org.apache.flink.runtime.concurrent.FutureUtils.ConjunctFuture) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException)

Example 45 with SimpleSlot

use of org.apache.flink.runtime.instance.SimpleSlot in project flink by apache.

the class ExecutionVertex method getPreferredLocationsBasedOnInputs.

/**
	 * Gets the location preferences of the vertex's current task execution, as determined by the locations
	 * of the predecessors from which it receives input data.
	 * If there are more than MAX_DISTINCT_LOCATIONS_TO_CONSIDER different locations of source data, this
	 * method returns {@code null} to indicate no location preference.
	 *
	 * @return The preferred locations based in input streams, or an empty iterable,
	 *         if there is no input-based preference.
	 */
public Iterable<TaskManagerLocation> getPreferredLocationsBasedOnInputs() {
    // otherwise, base the preferred locations on the input connections
    if (inputEdges == null) {
        return Collections.emptySet();
    } else {
        Set<TaskManagerLocation> locations = new HashSet<>();
        Set<TaskManagerLocation> inputLocations = new HashSet<>();
        // go over all inputs
        for (int i = 0; i < inputEdges.length; i++) {
            inputLocations.clear();
            ExecutionEdge[] sources = inputEdges[i];
            if (sources != null) {
                // go over all input sources
                for (int k = 0; k < sources.length; k++) {
                    // look-up assigned slot of input source
                    SimpleSlot sourceSlot = sources[k].getSource().getProducer().getCurrentAssignedResource();
                    if (sourceSlot != null) {
                        // add input location
                        inputLocations.add(sourceSlot.getTaskManagerLocation());
                        // inputs which have too many distinct sources are not considered
                        if (inputLocations.size() > MAX_DISTINCT_LOCATIONS_TO_CONSIDER) {
                            inputLocations.clear();
                            break;
                        }
                    }
                }
            }
            // keep the locations of the input with the least preferred locations
            if (// nothing assigned yet
            locations.isEmpty() || (!inputLocations.isEmpty() && inputLocations.size() < locations.size())) {
                // current input has fewer preferred locations
                locations.clear();
                locations.addAll(inputLocations);
            }
        }
        return locations.isEmpty() ? Collections.<TaskManagerLocation>emptyList() : locations;
    }
}
Also used : TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) CoLocationConstraint(org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint) HashSet(java.util.HashSet)

Aggregations

SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)78 Test (org.junit.Test)59 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)46 Instance (org.apache.flink.runtime.instance.Instance)38 ExecutionException (java.util.concurrent.ExecutionException)25 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)22 JobID (org.apache.flink.api.common.JobID)20 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)16 TaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.TaskManagerGateway)14 FlinkCompletableFuture (org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture)13 SchedulerTestUtils.getRandomInstance (org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance)13 ExecutionGraphTestUtils.getExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex)12 ExecutionGraphTestUtils.getInstance (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance)12 IOException (java.io.IOException)10 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)8 BaseTestingActorGateway (org.apache.flink.runtime.instance.BaseTestingActorGateway)8 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)8 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)8 AllocatedSlot (org.apache.flink.runtime.jobmanager.slots.AllocatedSlot)8 Future (org.apache.flink.runtime.concurrent.Future)7