Search in sources :

Example 6 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class JobManagerTest method testKvStateMessages.

/**
	 * Tests that the JobManager handles {@link org.apache.flink.runtime.query.KvStateMessage}
	 * instances as expected.
	 */
@Test
public void testKvStateMessages() throws Exception {
    Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow();
    Configuration config = new Configuration();
    config.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100ms");
    UUID leaderSessionId = null;
    ActorGateway jobManager = new AkkaActorGateway(JobManager.startJobManagerActors(config, system, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), TestingJobManager.class, MemoryArchivist.class)._1(), leaderSessionId);
    LeaderRetrievalService leaderRetrievalService = new StandaloneLeaderRetrievalService(AkkaUtils.getAkkaURL(system, jobManager.actor()));
    Configuration tmConfig = new Configuration();
    tmConfig.setInteger(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, 4);
    tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 8);
    ActorRef taskManager = TaskManager.startTaskManagerComponentsAndActor(tmConfig, ResourceID.generate(), system, "localhost", scala.Option.<String>empty(), scala.Option.apply(leaderRetrievalService), true, TestingTaskManager.class);
    Future<Object> registrationFuture = jobManager.ask(new NotifyWhenAtLeastNumTaskManagerAreRegistered(1), deadline.timeLeft());
    Await.ready(registrationFuture, deadline.timeLeft());
    //
    // Location lookup
    //
    LookupKvStateLocation lookupNonExistingJob = new LookupKvStateLocation(new JobID(), "any-name");
    Future<KvStateLocation> lookupFuture = jobManager.ask(lookupNonExistingJob, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    try {
        Await.result(lookupFuture, deadline.timeLeft());
        fail("Did not throw expected Exception");
    } catch (IllegalStateException ignored) {
    // Expected
    }
    JobGraph jobGraph = new JobGraph("croissant");
    JobVertex jobVertex1 = new JobVertex("cappuccino");
    jobVertex1.setParallelism(4);
    jobVertex1.setMaxParallelism(16);
    jobVertex1.setInvokableClass(BlockingNoOpInvokable.class);
    JobVertex jobVertex2 = new JobVertex("americano");
    jobVertex2.setParallelism(4);
    jobVertex2.setMaxParallelism(16);
    jobVertex2.setInvokableClass(BlockingNoOpInvokable.class);
    jobGraph.addVertex(jobVertex1);
    jobGraph.addVertex(jobVertex2);
    Future<JobSubmitSuccess> submitFuture = jobManager.ask(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobSubmitSuccess>apply(JobSubmitSuccess.class));
    Await.result(submitFuture, deadline.timeLeft());
    Object lookupUnknownRegistrationName = new LookupKvStateLocation(jobGraph.getJobID(), "unknown");
    lookupFuture = jobManager.ask(lookupUnknownRegistrationName, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    try {
        Await.result(lookupFuture, deadline.timeLeft());
        fail("Did not throw expected Exception");
    } catch (UnknownKvStateLocation ignored) {
    // Expected
    }
    //
    // Registration
    //
    NotifyKvStateRegistered registerNonExistingJob = new NotifyKvStateRegistered(new JobID(), new JobVertexID(), new KeyGroupRange(0, 0), "any-name", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1233));
    jobManager.tell(registerNonExistingJob);
    LookupKvStateLocation lookupAfterRegistration = new LookupKvStateLocation(registerNonExistingJob.getJobId(), registerNonExistingJob.getRegistrationName());
    lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    try {
        Await.result(lookupFuture, deadline.timeLeft());
        fail("Did not throw expected Exception");
    } catch (IllegalStateException ignored) {
    // Expected
    }
    NotifyKvStateRegistered registerForExistingJob = new NotifyKvStateRegistered(jobGraph.getJobID(), jobVertex1.getID(), new KeyGroupRange(0, 0), "register-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
    jobManager.tell(registerForExistingJob);
    lookupAfterRegistration = new LookupKvStateLocation(registerForExistingJob.getJobId(), registerForExistingJob.getRegistrationName());
    lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    KvStateLocation location = Await.result(lookupFuture, deadline.timeLeft());
    assertNotNull(location);
    assertEquals(jobGraph.getJobID(), location.getJobId());
    assertEquals(jobVertex1.getID(), location.getJobVertexId());
    assertEquals(jobVertex1.getMaxParallelism(), location.getNumKeyGroups());
    assertEquals(1, location.getNumRegisteredKeyGroups());
    KeyGroupRange keyGroupRange = registerForExistingJob.getKeyGroupRange();
    assertEquals(1, keyGroupRange.getNumberOfKeyGroups());
    assertEquals(registerForExistingJob.getKvStateId(), location.getKvStateID(keyGroupRange.getStartKeyGroup()));
    assertEquals(registerForExistingJob.getKvStateServerAddress(), location.getKvStateServerAddress(keyGroupRange.getStartKeyGroup()));
    //
    // Unregistration
    //
    NotifyKvStateUnregistered unregister = new NotifyKvStateUnregistered(registerForExistingJob.getJobId(), registerForExistingJob.getJobVertexId(), registerForExistingJob.getKeyGroupRange(), registerForExistingJob.getRegistrationName());
    jobManager.tell(unregister);
    lookupFuture = jobManager.ask(lookupAfterRegistration, deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<KvStateLocation>apply(KvStateLocation.class));
    try {
        Await.result(lookupFuture, deadline.timeLeft());
        fail("Did not throw expected Exception");
    } catch (UnknownKvStateLocation ignored) {
    // Expected
    }
    //
    // Duplicate registration fails task
    //
    NotifyKvStateRegistered register = new NotifyKvStateRegistered(jobGraph.getJobID(), jobVertex1.getID(), new KeyGroupRange(0, 0), "duplicate-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
    NotifyKvStateRegistered duplicate = new NotifyKvStateRegistered(jobGraph.getJobID(), // <--- different operator, but...
    jobVertex2.getID(), new KeyGroupRange(0, 0), // ...same name
    "duplicate-me", new KvStateID(), new KvStateServerAddress(InetAddress.getLocalHost(), 1293));
    Future<TestingJobManagerMessages.JobStatusIs> failedFuture = jobManager.ask(new NotifyWhenJobStatus(jobGraph.getJobID(), JobStatus.FAILED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobStatusIs>apply(JobStatusIs.class));
    jobManager.tell(register);
    jobManager.tell(duplicate);
    // Wait for failure
    JobStatusIs jobStatus = Await.result(failedFuture, deadline.timeLeft());
    assertEquals(JobStatus.FAILED, jobStatus.state());
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) Configuration(org.apache.flink.configuration.Configuration) UnknownKvStateLocation(org.apache.flink.runtime.query.UnknownKvStateLocation) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) KvStateServerAddress(org.apache.flink.runtime.query.KvStateServerAddress) LookupKvStateLocation(org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation) KvStateLocation(org.apache.flink.runtime.query.KvStateLocation) UnknownKvStateLocation(org.apache.flink.runtime.query.UnknownKvStateLocation) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) JobSubmitSuccess(org.apache.flink.runtime.messages.JobManagerMessages.JobSubmitSuccess) KvStateID(org.apache.flink.runtime.query.KvStateID) UUID(java.util.UUID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) NotifyKvStateRegistered(org.apache.flink.runtime.query.KvStateMessage.NotifyKvStateRegistered) NotifyKvStateUnregistered(org.apache.flink.runtime.query.KvStateMessage.NotifyKvStateUnregistered) JobStatusIs(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.JobStatusIs) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) LookupKvStateLocation(org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation) NotifyWhenAtLeastNumTaskManagerAreRegistered(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenAtLeastNumTaskManagerAreRegistered) JobID(org.apache.flink.api.common.JobID) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) Test(org.junit.Test)

Example 7 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class SchedulerSlotSharingTest method scheduleSingleVertexType.

@Test
public void scheduleSingleVertexType() {
    try {
        JobVertexID jid1 = new JobVertexID();
        SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1);
        Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
        Instance i1 = getRandomInstance(2);
        Instance i2 = getRandomInstance(2);
        scheduler.newInstanceAvailable(i1);
        scheduler.newInstanceAvailable(i2);
        // schedule 4 tasks from the first vertex group
        SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 8), sharingGroup), false).get();
        SimpleSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 8), sharingGroup), false).get();
        SimpleSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 8), sharingGroup), false).get();
        SimpleSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 8), sharingGroup), false).get();
        assertNotNull(s1);
        assertNotNull(s2);
        assertNotNull(s3);
        assertNotNull(s4);
        assertTrue(areAllDistinct(s1, s2, s3, s4));
        // we cannot schedule another task from the first vertex group
        try {
            scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 8), sharingGroup), false).get();
            fail("Scheduler accepted too many tasks at the same time");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof NoResourceAvailableException);
        } catch (Exception e) {
            fail("Wrong exception.");
        }
        // release something
        s3.releaseSlot();
        // allocate another slot from that group
        SimpleSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 8), sharingGroup), false).get();
        assertNotNull(s5);
        // release all old slots
        s1.releaseSlot();
        s2.releaseSlot();
        s4.releaseSlot();
        SimpleSlot s6 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 5, 8), sharingGroup), false).get();
        SimpleSlot s7 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 6, 8), sharingGroup), false).get();
        SimpleSlot s8 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 7, 8), sharingGroup), false).get();
        assertNotNull(s6);
        assertNotNull(s7);
        assertNotNull(s8);
        // make sure we have two slots on the first instance, and two on the second
        int c = 0;
        c += (s5.getTaskManagerID().equals(i1.getTaskManagerID())) ? 1 : -1;
        c += (s6.getTaskManagerID().equals(i1.getTaskManagerID())) ? 1 : -1;
        c += (s7.getTaskManagerID().equals(i1.getTaskManagerID())) ? 1 : -1;
        c += (s8.getTaskManagerID().equals(i1.getTaskManagerID())) ? 1 : -1;
        assertEquals(0, c);
        // release all
        s5.releaseSlot();
        s6.releaseSlot();
        s7.releaseSlot();
        s8.releaseSlot();
        // test that everything is released
        assertEquals(4, scheduler.getNumberOfAvailableSlots());
        // check the scheduler's bookkeeping
        assertEquals(0, scheduler.getNumberOfLocalizedAssignments());
        assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments());
        assertEquals(8, scheduler.getNumberOfUnconstrainedAssignments());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Instance(org.apache.flink.runtime.instance.Instance) SchedulerTestUtils.getRandomInstance(org.apache.flink.runtime.jobmanager.scheduler.SchedulerTestUtils.getRandomInstance) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionException(java.util.concurrent.ExecutionException) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 8 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class SchedulerSlotSharingTest method testConcurrentAllocateAndRelease.

@Test
public void testConcurrentAllocateAndRelease() {
    final ExecutorService executor = Executors.newFixedThreadPool(20);
    try {
        for (int run = 0; run < 50; run++) {
            final JobVertexID jid1 = new JobVertexID();
            final JobVertexID jid2 = new JobVertexID();
            final JobVertexID jid3 = new JobVertexID();
            final JobVertexID jid4 = new JobVertexID();
            final SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2, jid3, jid4);
            final Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
            scheduler.newInstanceAvailable(getRandomInstance(4));
            final AtomicInteger enumerator1 = new AtomicInteger();
            final AtomicInteger enumerator2 = new AtomicInteger();
            final AtomicBoolean flag3 = new AtomicBoolean();
            final AtomicInteger enumerator4 = new AtomicInteger();
            final Random rnd = new Random();
            // use atomic boolean as a mutable boolean reference
            final AtomicBoolean failed = new AtomicBoolean(false);
            // use atomic integer as a mutable integer reference
            final AtomicInteger completed = new AtomicInteger();
            final Runnable deploy4 = new Runnable() {

                @Override
                public void run() {
                    try {
                        SimpleSlot slot = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid4, enumerator4.getAndIncrement(), 4), sharingGroup), false).get();
                        sleepUninterruptibly(rnd.nextInt(5));
                        slot.releaseSlot();
                        if (completed.incrementAndGet() == 13) {
                            synchronized (completed) {
                                completed.notifyAll();
                            }
                        }
                    } catch (Throwable t) {
                        t.printStackTrace();
                        failed.set(true);
                    }
                }
            };
            final Runnable deploy3 = new Runnable() {

                @Override
                public void run() {
                    try {
                        if (flag3.compareAndSet(false, true)) {
                            SimpleSlot slot = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 1), sharingGroup), false).get();
                            sleepUninterruptibly(5);
                            executor.execute(deploy4);
                            executor.execute(deploy4);
                            executor.execute(deploy4);
                            executor.execute(deploy4);
                            slot.releaseSlot();
                            if (completed.incrementAndGet() == 13) {
                                synchronized (completed) {
                                    completed.notifyAll();
                                }
                            }
                        }
                    } catch (Throwable t) {
                        t.printStackTrace();
                        failed.set(true);
                    }
                }
            };
            final Runnable deploy2 = new Runnable() {

                @Override
                public void run() {
                    try {
                        SimpleSlot slot = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, enumerator2.getAndIncrement(), 4), sharingGroup), false).get();
                        // wait a bit till scheduling the successor
                        sleepUninterruptibly(rnd.nextInt(5));
                        executor.execute(deploy3);
                        // wait a bit until release
                        sleepUninterruptibly(rnd.nextInt(5));
                        slot.releaseSlot();
                        if (completed.incrementAndGet() == 13) {
                            synchronized (completed) {
                                completed.notifyAll();
                            }
                        }
                    } catch (Throwable t) {
                        t.printStackTrace();
                        failed.set(true);
                    }
                }
            };
            final Runnable deploy1 = new Runnable() {

                @Override
                public void run() {
                    try {
                        SimpleSlot slot = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, enumerator1.getAndIncrement(), 4), sharingGroup), false).get();
                        // wait a bit till scheduling the successor
                        sleepUninterruptibly(rnd.nextInt(5));
                        executor.execute(deploy2);
                        // wait a bit until release
                        sleepUninterruptibly(rnd.nextInt(5));
                        slot.releaseSlot();
                        if (completed.incrementAndGet() == 13) {
                            synchronized (completed) {
                                completed.notifyAll();
                            }
                        }
                    } catch (Throwable t) {
                        t.printStackTrace();
                        failed.set(true);
                    }
                }
            };
            final Runnable deploy0 = new Runnable() {

                @Override
                public void run() {
                    sleepUninterruptibly(rnd.nextInt(10));
                    executor.execute(deploy1);
                }
            };
            executor.execute(deploy0);
            executor.execute(deploy0);
            executor.execute(deploy0);
            executor.execute(deploy0);
            //noinspection SynchronizationOnLocalVariableOrMethodParameter
            synchronized (completed) {
                while (!failed.get() && completed.get() < 13) {
                    completed.wait(1000);
                }
            }
            assertFalse("Thread failed", failed.get());
            while (scheduler.getNumberOfAvailableSlots() < 4) {
                sleepUninterruptibly(5);
            }
            assertEquals(1, scheduler.getNumberOfAvailableInstances());
            assertEquals(1, scheduler.getNumberOfInstancesWithAvailableSlots());
            assertEquals(4, scheduler.getNumberOfAvailableSlots());
            assertEquals(13, scheduler.getNumberOfUnconstrainedAssignments());
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionException(java.util.concurrent.ExecutionException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Random(java.util.Random) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ExecutorService(java.util.concurrent.ExecutorService) Test(org.junit.Test)

Example 9 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class SchedulerSlotSharingTest method allocateSlotWithTemprarilyEmptyVertexGroup.

@Test
public void allocateSlotWithTemprarilyEmptyVertexGroup() {
    try {
        JobVertexID jid1 = new JobVertexID();
        JobVertexID jid2 = new JobVertexID();
        JobVertexID jid3 = new JobVertexID();
        SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2, jid3);
        Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
        scheduler.newInstanceAvailable(getRandomInstance(2));
        scheduler.newInstanceAvailable(getRandomInstance(2));
        // schedule 4 tasks from the first vertex group
        SimpleSlot s1_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 4), sharingGroup), false).get();
        SimpleSlot s2_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 4), sharingGroup), false).get();
        SimpleSlot s3_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 4), sharingGroup), false).get();
        SimpleSlot s4_1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 4), sharingGroup), false).get();
        assertNotNull(s1_1);
        assertNotNull(s2_1);
        assertNotNull(s3_1);
        assertNotNull(s4_1);
        assertTrue(areAllDistinct(s1_1, s2_1, s3_1, s4_1));
        // schedule 4 tasks from the second vertex group
        SimpleSlot s1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 7), sharingGroup), false).get();
        SimpleSlot s2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 7), sharingGroup), false).get();
        SimpleSlot s3_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 7), sharingGroup), false).get();
        SimpleSlot s4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 7), sharingGroup), false).get();
        assertNotNull(s1_2);
        assertNotNull(s2_2);
        assertNotNull(s3_2);
        assertNotNull(s4_2);
        assertTrue(areAllDistinct(s1_2, s2_2, s3_2, s4_2));
        // schedule 4 tasks from the third vertex group
        SimpleSlot s1_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 0, 4), sharingGroup), false).get();
        SimpleSlot s2_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 1, 4), sharingGroup), false).get();
        SimpleSlot s3_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 2, 4), sharingGroup), false).get();
        SimpleSlot s4_3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid3, 3, 4), sharingGroup), false).get();
        assertNotNull(s1_3);
        assertNotNull(s2_3);
        assertNotNull(s3_3);
        assertNotNull(s4_3);
        assertTrue(areAllDistinct(s1_3, s2_3, s3_3, s4_3));
        // we cannot schedule another task from the second vertex group
        try {
            scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5), sharingGroup), false).get();
            fail("Scheduler accepted too many tasks at the same time");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof NoResourceAvailableException);
        } catch (Exception e) {
            fail("Wrong exception.");
        }
        // release the second vertex group
        s1_2.releaseSlot();
        s2_2.releaseSlot();
        s3_2.releaseSlot();
        s4_2.releaseSlot();
        SimpleSlot s5_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 5, 7), sharingGroup), false).get();
        SimpleSlot s6_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 6, 7), sharingGroup), false).get();
        SimpleSlot s7_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 7, 7), sharingGroup), false).get();
        assertNotNull(s5_2);
        assertNotNull(s6_2);
        assertNotNull(s7_2);
        // release the slots
        s1_1.releaseSlot();
        s2_1.releaseSlot();
        s3_1.releaseSlot();
        s4_1.releaseSlot();
        s5_2.releaseSlot();
        s6_2.releaseSlot();
        s7_2.releaseSlot();
        // test that everything is released
        assertEquals(0, scheduler.getNumberOfAvailableSlots());
        s1_3.releaseSlot();
        s2_3.releaseSlot();
        s3_3.releaseSlot();
        s4_3.releaseSlot();
        // test that everything is released
        assertEquals(4, scheduler.getNumberOfAvailableSlots());
        // check the scheduler's bookkeeping
        assertEquals(0, scheduler.getNumberOfLocalizedAssignments());
        assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments());
        assertEquals(15, scheduler.getNumberOfUnconstrainedAssignments());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionException(java.util.concurrent.ExecutionException) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 10 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class SchedulerSlotSharingTest method allocateSlotWithSharing.

@Test
public void allocateSlotWithSharing() {
    try {
        JobVertexID jid1 = new JobVertexID();
        JobVertexID jid2 = new JobVertexID();
        SlotSharingGroup sharingGroup = new SlotSharingGroup(jid1, jid2);
        Scheduler scheduler = new Scheduler(TestingUtils.directExecutionContext());
        scheduler.newInstanceAvailable(getRandomInstance(2));
        scheduler.newInstanceAvailable(getRandomInstance(2));
        // schedule 4 tasks from the first vertex group
        SimpleSlot s1 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 0, 5), sharingGroup), false).get();
        SimpleSlot s2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 1, 5), sharingGroup), false).get();
        SimpleSlot s3 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 2, 5), sharingGroup), false).get();
        SimpleSlot s4 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 3, 5), sharingGroup), false).get();
        assertNotNull(s1);
        assertNotNull(s2);
        assertNotNull(s3);
        assertNotNull(s4);
        assertTrue(areAllDistinct(s1, s2, s3, s4));
        // we cannot schedule another task from the first vertex group
        try {
            scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5), sharingGroup), false).get();
            fail("Scheduler accepted too many tasks at the same time");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof NoResourceAvailableException);
        } catch (Exception e) {
            fail("Wrong exception.");
        }
        // schedule some tasks from the second ID group
        SimpleSlot s1_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 0, 5), sharingGroup), false).get();
        SimpleSlot s2_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 1, 5), sharingGroup), false).get();
        SimpleSlot s3_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 2, 5), sharingGroup), false).get();
        SimpleSlot s4_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 3, 5), sharingGroup), false).get();
        assertNotNull(s1_2);
        assertNotNull(s2_2);
        assertNotNull(s3_2);
        assertNotNull(s4_2);
        // we cannot schedule another task from the second vertex group
        try {
            scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5), sharingGroup), false).get();
            fail("Scheduler accepted too many tasks at the same time");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof NoResourceAvailableException);
        } catch (Exception e) {
            fail("Wrong exception.");
        }
        // now, we release some vertices (sub-slots) from the first group.
        // that should allow us to schedule more vertices from the first group
        s1.releaseSlot();
        s4.releaseSlot();
        assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfSlots());
        assertEquals(2, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1));
        assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2));
        // we can still not schedule anything from the second group of vertices
        try {
            scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5), sharingGroup), false).get();
            fail("Scheduler accepted too many tasks at the same time");
        } catch (ExecutionException e) {
            assertTrue(e.getCause() instanceof NoResourceAvailableException);
        } catch (Exception e) {
            fail("Wrong exception.");
        }
        // we can schedule something from the first vertex group
        SimpleSlot s5 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid1, 4, 5), sharingGroup), false).get();
        assertNotNull(s5);
        assertEquals(4, sharingGroup.getTaskAssignment().getNumberOfSlots());
        assertEquals(1, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid1));
        assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfAvailableSlotsForGroup(jid2));
        // now we release a slot from the second vertex group and schedule another task from that group
        s2_2.releaseSlot();
        SimpleSlot s5_2 = scheduler.allocateSlot(new ScheduledUnit(getTestVertex(jid2, 4, 5), sharingGroup), false).get();
        assertNotNull(s5_2);
        // release all slots
        s2.releaseSlot();
        s3.releaseSlot();
        s5.releaseSlot();
        s1_2.releaseSlot();
        s3_2.releaseSlot();
        s4_2.releaseSlot();
        s5_2.releaseSlot();
        // test that everything is released
        assertEquals(0, sharingGroup.getTaskAssignment().getNumberOfSlots());
        assertEquals(4, scheduler.getNumberOfAvailableSlots());
        // check the scheduler's bookkeeping
        assertEquals(0, scheduler.getNumberOfLocalizedAssignments());
        assertEquals(0, scheduler.getNumberOfNonLocalizedAssignments());
        assertEquals(10, scheduler.getNumberOfUnconstrainedAssignments());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionException(java.util.concurrent.ExecutionException) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Aggregations

JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)191 Test (org.junit.Test)145 JobID (org.apache.flink.api.common.JobID)88 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)46 HashMap (java.util.HashMap)38 Configuration (org.apache.flink.configuration.Configuration)33 Instance (org.apache.flink.runtime.instance.Instance)33 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)30 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)30 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)28 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)27 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)25 IOException (java.io.IOException)24 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)24 ExecutionException (java.util.concurrent.ExecutionException)23 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)22 ArrayList (java.util.ArrayList)20 ActorRef (akka.actor.ActorRef)18 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)18 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)15