Search in sources :

Example 1 with SlotSharingGroup

use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.

the class ExecutionGraphRestartTest method testConstraintsAfterRestart.

@Test
public void testConstraintsAfterRestart() throws Exception {
    //setting up
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex groupVertex = newJobVertex("Task1", NUM_TASKS, NoOpInvokable.class);
    JobVertex groupVertex2 = newJobVertex("Task2", NUM_TASKS, NoOpInvokable.class);
    SlotSharingGroup sharingGroup = new SlotSharingGroup();
    groupVertex.setSlotSharingGroup(sharingGroup);
    groupVertex2.setSlotSharingGroup(sharingGroup);
    groupVertex.setStrictlyCoLocatedWith(groupVertex2);
    //initiate and schedule job
    JobGraph jobGraph = new JobGraph("Pointwise job", groupVertex, groupVertex2);
    ExecutionGraph eg = newExecutionGraph(new FixedDelayRestartStrategy(1, 0L), scheduler);
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    //sanity checks
    validateConstraints(eg);
    //restart automatically
    restartAfterFailure(eg, new FiniteDuration(2, TimeUnit.MINUTES), false);
    //checking execution vertex properties
    validateConstraints(eg);
    haltExecution(eg);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Instance(org.apache.flink.runtime.instance.Instance) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) FiniteDuration(scala.concurrent.duration.FiniteDuration) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Example 2 with SlotSharingGroup

use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.

the class SharedSlotsTest method testReleaseTwoLevelsFromRoot.

/**
	 * We allocate and the structure below and release it from the root.
	 *
	 * <pre>
	 *     Shared(0)(root)
	 *        |
	 *        +-- Simple(2)(sink)
	 *        |
	 *        +-- Shared(1)(co-location-group)
	 *        |      |
	 *        |      +-- Simple(0)(tail)
	 *        |      +-- Simple(1)(head)
	 *        |
	 *        +-- Simple(0)(source)
	 * </pre>
	 */
@Test
public void testReleaseTwoLevelsFromRoot() {
    try {
        JobVertexID sourceId = new JobVertexID();
        JobVertexID headId = new JobVertexID();
        JobVertexID tailId = new JobVertexID();
        JobVertexID sinkId = new JobVertexID();
        JobVertex headVertex = new JobVertex("head", headId);
        JobVertex tailVertex = new JobVertex("tail", tailId);
        SlotSharingGroup sharingGroup = new SlotSharingGroup(sourceId, headId, tailId, sinkId);
        SlotSharingGroupAssignment assignment = sharingGroup.getTaskAssignment();
        assertEquals(0, assignment.getNumberOfSlots());
        CoLocationGroup coLocationGroup = new CoLocationGroup(headVertex, tailVertex);
        CoLocationConstraint constraint = coLocationGroup.getLocationConstraint(0);
        assertFalse(constraint.isAssigned());
        Instance instance = SchedulerTestUtils.getRandomInstance(1);
        // allocate a shared slot
        SharedSlot sharedSlot = instance.allocateSharedSlot(new JobID(), assignment);
        // get the first simple slot
        SimpleSlot sourceSlot = assignment.addSharedSlotAndAllocateSubSlot(sharedSlot, Locality.LOCAL, sourceId);
        SimpleSlot headSlot = assignment.getSlotForTask(constraint, NO_LOCATION);
        constraint.lockLocation();
        SimpleSlot tailSlot = assignment.getSlotForTask(constraint, NO_LOCATION);
        SimpleSlot sinkSlot = assignment.getSlotForTask(sinkId, NO_LOCATION);
        assertEquals(4, sharedSlot.getNumberLeaves());
        // release all
        sourceSlot.releaseSlot();
        headSlot.releaseSlot();
        tailSlot.releaseSlot();
        sinkSlot.releaseSlot();
        assertTrue(sharedSlot.isReleased());
        assertTrue(sourceSlot.isReleased());
        assertTrue(headSlot.isReleased());
        assertTrue(tailSlot.isReleased());
        assertTrue(sinkSlot.isReleased());
        assertTrue(constraint.getSharedSlot().isReleased());
        assertTrue(constraint.isAssigned());
        assertFalse(constraint.isAssignedAndAlive());
        assertEquals(1, instance.getNumberOfAvailableSlots());
        assertEquals(0, instance.getNumberOfAllocatedSlots());
        assertEquals(0, assignment.getNumberOfSlots());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : CoLocationConstraint(org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) CoLocationGroup(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 3 with SlotSharingGroup

use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.

the class SharedSlotsTest method allocateAndReleaseEmptySlot.

@Test
public void allocateAndReleaseEmptySlot() {
    try {
        JobID jobId = new JobID();
        JobVertexID vertexId = new JobVertexID();
        SlotSharingGroup sharingGroup = new SlotSharingGroup(vertexId);
        SlotSharingGroupAssignment assignment = sharingGroup.getTaskAssignment();
        assertEquals(0, assignment.getNumberOfSlots());
        assertEquals(0, assignment.getNumberOfAvailableSlotsForGroup(vertexId));
        Instance instance = SchedulerTestUtils.getRandomInstance(2);
        assertEquals(2, instance.getTotalNumberOfSlots());
        assertEquals(0, instance.getNumberOfAllocatedSlots());
        assertEquals(2, instance.getNumberOfAvailableSlots());
        // allocate a shared slot
        SharedSlot slot = instance.allocateSharedSlot(jobId, assignment);
        assertEquals(2, instance.getTotalNumberOfSlots());
        assertEquals(1, instance.getNumberOfAllocatedSlots());
        assertEquals(1, instance.getNumberOfAvailableSlots());
        // check that the new slot is fresh
        assertTrue(slot.isAlive());
        assertFalse(slot.isCanceled());
        assertFalse(slot.isReleased());
        assertEquals(0, slot.getNumberLeaves());
        assertFalse(slot.hasChildren());
        assertTrue(slot.isRootAndEmpty());
        assertNotNull(slot.toString());
        assertTrue(slot.getSubSlots().isEmpty());
        assertEquals(0, slot.getSlotNumber());
        assertEquals(0, slot.getRootSlotNumber());
        // release the slot immediately.
        slot.releaseSlot();
        assertTrue(slot.isCanceled());
        assertTrue(slot.isReleased());
        // the slot sharing group and instance should not
        assertEquals(2, instance.getTotalNumberOfSlots());
        assertEquals(0, instance.getNumberOfAllocatedSlots());
        assertEquals(2, instance.getNumberOfAvailableSlots());
        assertEquals(0, assignment.getNumberOfSlots());
        assertEquals(0, assignment.getNumberOfAvailableSlotsForGroup(vertexId));
        // we should not be able to allocate any children from this released slot
        assertNull(slot.allocateSharedSlot(new AbstractID()));
        assertNull(slot.allocateSubSlot(new AbstractID()));
        // we cannot add this slot to the assignment group
        assertNull(assignment.addSharedSlotAndAllocateSubSlot(slot, Locality.NON_LOCAL, vertexId));
        assertEquals(0, assignment.getNumberOfSlots());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) AbstractID(org.apache.flink.util.AbstractID) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with SlotSharingGroup

use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.

the class SharedSlotsTest method testAllocateAndReleaseTwoLevels.

/**
	 * We allocate and release the structure below, starting by allocating a simple slot in the
	 * shared slot and finishing by releasing a simple slot.
	 * 
	 * <pre>
	 *     Shared(0)(root)
	 *        |
	 *        +-- Simple(2)(sink)
	 *        |
	 *        +-- Shared(1)(co-location-group)
	 *        |      |
	 *        |      +-- Simple(0)(tail)
	 *        |      +-- Simple(1)(head)
	 *        |
	 *        +-- Simple(0)(source)
	 * </pre>
	 */
@Test
public void testAllocateAndReleaseTwoLevels() {
    try {
        JobVertexID sourceId = new JobVertexID();
        JobVertexID headId = new JobVertexID();
        JobVertexID tailId = new JobVertexID();
        JobVertexID sinkId = new JobVertexID();
        JobVertex headVertex = new JobVertex("head", headId);
        JobVertex tailVertex = new JobVertex("tail", tailId);
        SlotSharingGroup sharingGroup = new SlotSharingGroup(sourceId, headId, tailId, sinkId);
        SlotSharingGroupAssignment assignment = sharingGroup.getTaskAssignment();
        assertEquals(0, assignment.getNumberOfSlots());
        CoLocationGroup coLocationGroup = new CoLocationGroup(headVertex, tailVertex);
        CoLocationConstraint constraint = coLocationGroup.getLocationConstraint(0);
        assertFalse(constraint.isAssigned());
        Instance instance = SchedulerTestUtils.getRandomInstance(1);
        // allocate a shared slot
        SharedSlot sharedSlot = instance.allocateSharedSlot(new JobID(), assignment);
        // get the first simple slot
        SimpleSlot sourceSlot = assignment.addSharedSlotAndAllocateSubSlot(sharedSlot, Locality.LOCAL, sourceId);
        assertEquals(1, sharedSlot.getNumberLeaves());
        // get the first slot in the nested shared slot from the co-location constraint
        SimpleSlot headSlot = assignment.getSlotForTask(constraint, Collections.<TaskManagerLocation>emptySet());
        assertEquals(2, sharedSlot.getNumberLeaves());
        assertNotNull(constraint.getSharedSlot());
        assertTrue(constraint.getSharedSlot().isAlive());
        assertFalse(constraint.isAssigned());
        // we do not immediately lock the location
        headSlot.releaseSlot();
        assertEquals(1, sharedSlot.getNumberLeaves());
        assertNotNull(constraint.getSharedSlot());
        assertTrue(constraint.getSharedSlot().isReleased());
        assertFalse(constraint.isAssigned());
        // re-allocate the head slot
        headSlot = assignment.getSlotForTask(constraint, Collections.<TaskManagerLocation>emptySet());
        constraint.lockLocation();
        assertNotNull(constraint.getSharedSlot());
        assertTrue(constraint.isAssigned());
        assertTrue(constraint.isAssignedAndAlive());
        assertEquals(instance.getTaskManagerLocation(), constraint.getLocation());
        SimpleSlot tailSlot = assignment.getSlotForTask(constraint, Collections.<TaskManagerLocation>emptySet());
        assertEquals(constraint.getSharedSlot(), headSlot.getParent());
        assertEquals(constraint.getSharedSlot(), tailSlot.getParent());
        SimpleSlot sinkSlot = assignment.getSlotForTask(sinkId, Collections.<TaskManagerLocation>emptySet());
        assertEquals(4, sharedSlot.getNumberLeaves());
        // we release our co-location constraint tasks
        headSlot.releaseSlot();
        tailSlot.releaseSlot();
        assertEquals(2, sharedSlot.getNumberLeaves());
        assertTrue(headSlot.isReleased());
        assertTrue(tailSlot.isReleased());
        assertTrue(constraint.isAssigned());
        assertFalse(constraint.isAssignedAndAlive());
        assertEquals(instance.getTaskManagerLocation(), constraint.getLocation());
        // we should have resources again for the co-location constraint
        assertEquals(1, assignment.getNumberOfAvailableSlotsForGroup(constraint.getGroupId()));
        // re-allocate head and tail from the constraint
        headSlot = assignment.getSlotForTask(constraint, NO_LOCATION);
        tailSlot = assignment.getSlotForTask(constraint, NO_LOCATION);
        assertEquals(4, sharedSlot.getNumberLeaves());
        assertEquals(0, assignment.getNumberOfAvailableSlotsForGroup(constraint.getGroupId()));
        // verify some basic properties of the slots
        assertEquals(instance.getTaskManagerID(), sourceSlot.getTaskManagerID());
        assertEquals(instance.getTaskManagerID(), headSlot.getTaskManagerID());
        assertEquals(instance.getTaskManagerID(), tailSlot.getTaskManagerID());
        assertEquals(instance.getTaskManagerID(), sinkSlot.getTaskManagerID());
        assertEquals(sourceId, sourceSlot.getGroupID());
        assertEquals(sinkId, sinkSlot.getGroupID());
        assertNull(headSlot.getGroupID());
        assertNull(tailSlot.getGroupID());
        assertEquals(constraint.getGroupId(), constraint.getSharedSlot().getGroupID());
        // release all
        sourceSlot.releaseSlot();
        headSlot.releaseSlot();
        tailSlot.releaseSlot();
        sinkSlot.releaseSlot();
        assertTrue(sharedSlot.isReleased());
        assertTrue(sourceSlot.isReleased());
        assertTrue(headSlot.isReleased());
        assertTrue(tailSlot.isReleased());
        assertTrue(sinkSlot.isReleased());
        assertTrue(constraint.getSharedSlot().isReleased());
        assertTrue(constraint.isAssigned());
        assertFalse(constraint.isAssignedAndAlive());
        assertEquals(1, instance.getNumberOfAvailableSlots());
        assertEquals(0, assignment.getNumberOfSlots());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : CoLocationConstraint(org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) CoLocationGroup(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with SlotSharingGroup

use of org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup in project flink by apache.

the class ExecutionGraphConstructionTest method testCoLocationConstraintCreation.

@Test
public void testCoLocationConstraintCreation() {
    try {
        final JobID jobId = new JobID();
        final String jobName = "Co-Location Constraint Sample Job";
        final Configuration cfg = new Configuration();
        // simple group of two, cyclic
        JobVertex v1 = new JobVertex("vertex1");
        JobVertex v2 = new JobVertex("vertex2");
        v1.setParallelism(6);
        v2.setParallelism(4);
        v1.setInvokableClass(AbstractInvokable.class);
        v2.setInvokableClass(AbstractInvokable.class);
        SlotSharingGroup sl1 = new SlotSharingGroup();
        v1.setSlotSharingGroup(sl1);
        v2.setSlotSharingGroup(sl1);
        v2.setStrictlyCoLocatedWith(v1);
        v1.setStrictlyCoLocatedWith(v2);
        // complex forked dependency pattern
        JobVertex v3 = new JobVertex("vertex3");
        JobVertex v4 = new JobVertex("vertex4");
        JobVertex v5 = new JobVertex("vertex5");
        JobVertex v6 = new JobVertex("vertex6");
        JobVertex v7 = new JobVertex("vertex7");
        v3.setParallelism(3);
        v4.setParallelism(3);
        v5.setParallelism(3);
        v6.setParallelism(3);
        v7.setParallelism(3);
        v3.setInvokableClass(AbstractInvokable.class);
        v4.setInvokableClass(AbstractInvokable.class);
        v5.setInvokableClass(AbstractInvokable.class);
        v6.setInvokableClass(AbstractInvokable.class);
        v7.setInvokableClass(AbstractInvokable.class);
        SlotSharingGroup sl2 = new SlotSharingGroup();
        v3.setSlotSharingGroup(sl2);
        v4.setSlotSharingGroup(sl2);
        v5.setSlotSharingGroup(sl2);
        v6.setSlotSharingGroup(sl2);
        v7.setSlotSharingGroup(sl2);
        v4.setStrictlyCoLocatedWith(v3);
        v5.setStrictlyCoLocatedWith(v4);
        v6.setStrictlyCoLocatedWith(v3);
        v3.setStrictlyCoLocatedWith(v7);
        // isolated vertex
        JobVertex v8 = new JobVertex("vertex8");
        v8.setParallelism(2);
        v8.setInvokableClass(AbstractInvokable.class);
        JobGraph jg = new JobGraph(jobId, jobName, v1, v2, v3, v4, v5, v6, v7, v8);
        ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, jobName, cfg, new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
        eg.attachJobGraph(jg.getVerticesSortedTopologicallyFromSources());
        // check the v1 / v2 co location hints ( assumes parallelism(v1) >= parallelism(v2) )
        {
            ExecutionVertex[] v1s = eg.getJobVertex(v1.getID()).getTaskVertices();
            ExecutionVertex[] v2s = eg.getJobVertex(v2.getID()).getTaskVertices();
            Set<CoLocationConstraint> all = new HashSet<CoLocationConstraint>();
            for (int i = 0; i < v2.getParallelism(); i++) {
                assertNotNull(v1s[i].getLocationConstraint());
                assertNotNull(v2s[i].getLocationConstraint());
                assertTrue(v1s[i].getLocationConstraint() == v2s[i].getLocationConstraint());
                all.add(v1s[i].getLocationConstraint());
            }
            for (int i = v2.getParallelism(); i < v1.getParallelism(); i++) {
                assertNotNull(v1s[i].getLocationConstraint());
                all.add(v1s[i].getLocationConstraint());
            }
            assertEquals("not all co location constraints are distinct", v1.getParallelism(), all.size());
        }
        // check the v1 / v2 co location hints ( assumes parallelism(v1) >= parallelism(v2) )
        {
            ExecutionVertex[] v3s = eg.getJobVertex(v3.getID()).getTaskVertices();
            ExecutionVertex[] v4s = eg.getJobVertex(v4.getID()).getTaskVertices();
            ExecutionVertex[] v5s = eg.getJobVertex(v5.getID()).getTaskVertices();
            ExecutionVertex[] v6s = eg.getJobVertex(v6.getID()).getTaskVertices();
            ExecutionVertex[] v7s = eg.getJobVertex(v7.getID()).getTaskVertices();
            Set<CoLocationConstraint> all = new HashSet<CoLocationConstraint>();
            for (int i = 0; i < v3.getParallelism(); i++) {
                assertNotNull(v3s[i].getLocationConstraint());
                assertTrue(v3s[i].getLocationConstraint() == v4s[i].getLocationConstraint());
                assertTrue(v4s[i].getLocationConstraint() == v5s[i].getLocationConstraint());
                assertTrue(v5s[i].getLocationConstraint() == v6s[i].getLocationConstraint());
                assertTrue(v6s[i].getLocationConstraint() == v7s[i].getLocationConstraint());
                all.add(v3s[i].getLocationConstraint());
            }
            assertEquals("not all co location constraints are distinct", v3.getParallelism(), all.size());
        }
        // check the v8 has no co location hints
        {
            ExecutionVertex[] v8s = eg.getJobVertex(v8.getID()).getTaskVertices();
            for (int i = 0; i < v8.getParallelism(); i++) {
                assertNull(v8s[i].getLocationConstraint());
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : CoLocationConstraint(org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint) HashSet(java.util.HashSet) Set(java.util.Set) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) Configuration(org.apache.flink.configuration.Configuration) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobException(org.apache.flink.runtime.JobException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

SlotSharingGroup (org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup)53 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)35 Test (org.junit.Test)30 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)18 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)14 JobID (org.apache.flink.api.common.JobID)11 HashMap (java.util.HashMap)8 Configuration (org.apache.flink.configuration.Configuration)8 ArrayList (java.util.ArrayList)7 HashSet (java.util.HashSet)6 Map (java.util.Map)6 Set (java.util.Set)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 ResultPartitionType (org.apache.flink.runtime.io.network.partition.ResultPartitionType)6 CoLocationGroup (org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup)6 IOException (java.io.IOException)5 Arrays (java.util.Arrays)5 IdentityHashMap (java.util.IdentityHashMap)5 Collections (java.util.Collections)4 Comparator (java.util.Comparator)4