Search in sources :

Example 16 with TestingLeaderRetrievalService

use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.

the class UtilsTest method testYarnFlinkResourceManagerJobManagerLostLeadership.

@Test
public void testYarnFlinkResourceManagerJobManagerLostLeadership() throws Exception {
    new JavaTestKit(system) {

        {
            final Deadline deadline = new FiniteDuration(3, TimeUnit.MINUTES).fromNow();
            Configuration flinkConfig = new Configuration();
            YarnConfiguration yarnConfig = new YarnConfiguration();
            TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
            String applicationMasterHostName = "localhost";
            String webInterfaceURL = "foobar";
            ContaineredTaskManagerParameters taskManagerParameters = new ContaineredTaskManagerParameters(1l, 1l, 1l, 1, new HashMap<String, String>());
            ContainerLaunchContext taskManagerLaunchContext = mock(ContainerLaunchContext.class);
            int yarnHeartbeatIntervalMillis = 1000;
            int maxFailedContainers = 10;
            int numInitialTaskManagers = 5;
            final YarnResourceManagerCallbackHandler callbackHandler = new YarnResourceManagerCallbackHandler();
            AMRMClientAsync<AMRMClient.ContainerRequest> resourceManagerClient = mock(AMRMClientAsync.class);
            NMClient nodeManagerClient = mock(NMClient.class);
            UUID leaderSessionID = UUID.randomUUID();
            final List<Container> containerList = new ArrayList<>();
            for (int i = 0; i < numInitialTaskManagers; i++) {
                containerList.add(new TestingContainer("container_" + i, "localhost"));
            }
            doAnswer(new Answer() {

                int counter = 0;

                @Override
                public Object answer(InvocationOnMock invocation) throws Throwable {
                    if (counter < containerList.size()) {
                        callbackHandler.onContainersAllocated(Collections.singletonList(containerList.get(counter++)));
                    }
                    return null;
                }
            }).when(resourceManagerClient).addContainerRequest(Matchers.any(AMRMClient.ContainerRequest.class));
            ActorRef resourceManager = null;
            ActorRef leader1;
            try {
                leader1 = system.actorOf(Props.create(TestingUtils.ForwardingActor.class, getRef(), Option.apply(leaderSessionID)));
                resourceManager = system.actorOf(Props.create(TestingYarnFlinkResourceManager.class, flinkConfig, yarnConfig, leaderRetrievalService, applicationMasterHostName, webInterfaceURL, taskManagerParameters, taskManagerLaunchContext, yarnHeartbeatIntervalMillis, maxFailedContainers, numInitialTaskManagers, callbackHandler, resourceManagerClient, nodeManagerClient));
                leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID);
                final AkkaActorGateway leader1Gateway = new AkkaActorGateway(leader1, leaderSessionID);
                final AkkaActorGateway resourceManagerGateway = new AkkaActorGateway(resourceManager, leaderSessionID);
                doAnswer(new Answer() {

                    @Override
                    public Object answer(InvocationOnMock invocation) throws Throwable {
                        Container container = (Container) invocation.getArguments()[0];
                        resourceManagerGateway.tell(new NotifyResourceStarted(YarnFlinkResourceManager.extractResourceID(container)), leader1Gateway);
                        return null;
                    }
                }).when(nodeManagerClient).startContainer(Matchers.any(Container.class), Matchers.any(ContainerLaunchContext.class));
                expectMsgClass(deadline.timeLeft(), RegisterResourceManager.class);
                resourceManagerGateway.tell(new RegisterResourceManagerSuccessful(leader1, Collections.EMPTY_LIST));
                for (int i = 0; i < containerList.size(); i++) {
                    expectMsgClass(deadline.timeLeft(), Acknowledge.class);
                }
                Future<Object> taskManagerRegisteredFuture = resourceManagerGateway.ask(new NotifyWhenResourcesRegistered(numInitialTaskManagers), deadline.timeLeft());
                Await.ready(taskManagerRegisteredFuture, deadline.timeLeft());
                leaderRetrievalService.notifyListener(null, null);
                leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID);
                expectMsgClass(deadline.timeLeft(), RegisterResourceManager.class);
                resourceManagerGateway.tell(new RegisterResourceManagerSuccessful(leader1, Collections.EMPTY_LIST));
                for (Container container : containerList) {
                    resourceManagerGateway.tell(new NotifyResourceStarted(YarnFlinkResourceManager.extractResourceID(container)), leader1Gateway);
                }
                for (int i = 0; i < containerList.size(); i++) {
                    expectMsgClass(deadline.timeLeft(), Acknowledge.class);
                }
                Future<Object> numberOfRegisteredResourcesFuture = resourceManagerGateway.ask(RequestNumberOfRegisteredResources.Instance, deadline.timeLeft());
                int numberOfRegisteredResources = (Integer) Await.result(numberOfRegisteredResourcesFuture, deadline.timeLeft());
                assertEquals(numInitialTaskManagers, numberOfRegisteredResources);
            } finally {
                if (resourceManager != null) {
                    resourceManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
                }
            }
        }
    };
}
Also used : AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) ActorRef(akka.actor.ActorRef) ArrayList(java.util.ArrayList) ContaineredTaskManagerParameters(org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters) Container(org.apache.hadoop.yarn.api.records.Container) TestingUtils(org.apache.flink.runtime.testingUtils.TestingUtils) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) RegisterResourceManagerSuccessful(org.apache.flink.runtime.clusterframework.messages.RegisterResourceManagerSuccessful) UUID(java.util.UUID) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) NotifyResourceStarted(org.apache.flink.runtime.clusterframework.messages.NotifyResourceStarted) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) NMClient(org.apache.hadoop.yarn.client.api.NMClient) JavaTestKit(akka.testkit.JavaTestKit) NotifyWhenResourcesRegistered(org.apache.flink.yarn.messages.NotifyWhenResourcesRegistered) Test(org.junit.Test)

Example 17 with TestingLeaderRetrievalService

use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.

the class AkkaKvStateLocationLookupServiceTest method testNoJobManagerRegistered.

/**
	 * Tests responses if no leader notification has been reported or leadership
	 * has been lost (leaderAddress = <code>null</code>).
	 */
@Test
public void testNoJobManagerRegistered() throws Exception {
    TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
    Queue<LookupKvStateLocation> received = new LinkedBlockingQueue<>();
    AkkaKvStateLocationLookupService lookupService = new AkkaKvStateLocationLookupService(leaderRetrievalService, testActorSystem, TIMEOUT, new AkkaKvStateLocationLookupService.DisabledLookupRetryStrategyFactory());
    lookupService.start();
    //
    try {
        JobID jobId = new JobID();
        String name = "coffee";
        Future<KvStateLocation> locationFuture = lookupService.getKvStateLookupInfo(jobId, name);
        Await.result(locationFuture, TIMEOUT);
        fail("Did not throw expected Exception");
    } catch (UnknownJobManager ignored) {
    // Expected
    }
    assertEquals("Received unexpected lookup", 0, received.size());
    //
    // Leader registration => communicate with new leader
    //
    UUID leaderSessionId = null;
    KvStateLocation expected = new KvStateLocation(new JobID(), new JobVertexID(), 8282, "tea");
    ActorRef testActor = LookupResponseActor.create(received, leaderSessionId, expected);
    String testActorAddress = AkkaUtils.getAkkaURL(testActorSystem, testActor);
    // Notify the service about a leader
    leaderRetrievalService.notifyListener(testActorAddress, leaderSessionId);
    JobID jobId = new JobID();
    String name = "tea";
    // Verify that the leader response is handled
    KvStateLocation location = Await.result(lookupService.getKvStateLookupInfo(jobId, name), TIMEOUT);
    assertEquals(expected, location);
    // Verify that the correct message was sent to the leader
    assertEquals(1, received.size());
    verifyLookupMsg(received.poll(), jobId, name);
    //
    // Leader loss => fail with UnknownJobManager
    //
    leaderRetrievalService.notifyListener(null, null);
    try {
        Future<KvStateLocation> locationFuture = lookupService.getKvStateLookupInfo(new JobID(), "coffee");
        Await.result(locationFuture, TIMEOUT);
        fail("Did not throw expected Exception");
    } catch (UnknownJobManager ignored) {
    // Expected
    }
    // No new messages received
    assertEquals(0, received.size());
}
Also used : TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) ActorRef(akka.actor.ActorRef) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) LookupKvStateLocation(org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation) LookupKvStateLocation(org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 18 with TestingLeaderRetrievalService

use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.

the class JobLeaderIdServiceTest method jobTimeoutAfterLostLeadership.

/**
	 * Tests that a timeout get cancelled once a job leader has been found. Furthermore, it tests
	 * that a new timeout is registered after the jobmanager has lost leadership.
	 */
@Test(timeout = 10000)
public void jobTimeoutAfterLostLeadership() throws Exception {
    final JobID jobId = new JobID();
    final String address = "foobar";
    final UUID leaderId = UUID.randomUUID();
    TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
    TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
    highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
    ScheduledFuture<?> timeout1 = mock(ScheduledFuture.class);
    ScheduledFuture<?> timeout2 = mock(ScheduledFuture.class);
    final Queue<ScheduledFuture<?>> timeoutQueue = new ArrayDeque<>(Arrays.asList(timeout1, timeout2));
    ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    final AtomicReference<Runnable> lastRunnable = new AtomicReference<>();
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            lastRunnable.set((Runnable) invocation.getArguments()[0]);
            return timeoutQueue.poll();
        }
    }).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
    Time timeout = Time.milliseconds(5000L);
    JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
    final AtomicReference<UUID> lastTimeoutId = new AtomicReference<>();
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            lastTimeoutId.set((UUID) invocation.getArguments()[1]);
            return null;
        }
    }).when(jobLeaderIdActions).notifyJobTimeout(eq(jobId), any(UUID.class));
    JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
    jobLeaderIdService.start(jobLeaderIdActions);
    jobLeaderIdService.addJob(jobId);
    Future<UUID> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
    // notify the leader id service about the new leader
    leaderRetrievalService.notifyListener(address, leaderId);
    assertEquals(leaderId, leaderIdFuture.get());
    assertTrue(jobLeaderIdService.containsJob(jobId));
    // check that the first timeout got cancelled
    verify(timeout1, times(1)).cancel(anyBoolean());
    verify(scheduledExecutor, times(1)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
    // initial timeout runnable which should no longer have an effect
    Runnable runnable = lastRunnable.get();
    assertNotNull(runnable);
    runnable.run();
    verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), any(UUID.class));
    // the timeout should no longer be valid
    assertFalse(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
    // lose leadership
    leaderRetrievalService.notifyListener("", null);
    verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
    // the second runnable should be the new timeout
    runnable = lastRunnable.get();
    assertNotNull(runnable);
    runnable.run();
    verify(jobLeaderIdActions, times(2)).notifyJobTimeout(eq(jobId), any(UUID.class));
    // the new timeout should be valid
    assertTrue(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
}
Also used : TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) AtomicReference(java.util.concurrent.atomic.AtomicReference) Time(org.apache.flink.api.common.time.Time) ArrayDeque(java.util.ArrayDeque) ScheduledFuture(java.util.concurrent.ScheduledFuture) ScheduledExecutor(org.apache.flink.runtime.concurrent.ScheduledExecutor) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Answer(org.mockito.stubbing.Answer) Mockito.doAnswer(org.mockito.Mockito.doAnswer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) TimeUnit(java.util.concurrent.TimeUnit) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 19 with TestingLeaderRetrievalService

use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.

the class JobLeaderIdServiceTest method testInitialJobTimeout.

/**
	 * Tests that the initial job registration registers a timeout which will call
	 * {@link JobLeaderIdActions#notifyJobTimeout(JobID, UUID)} when executed.
	 */
@Test
public void testInitialJobTimeout() throws Exception {
    final JobID jobId = new JobID();
    TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
    TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
    highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
    ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    Time timeout = Time.milliseconds(5000L);
    JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
    JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
    jobLeaderIdService.start(jobLeaderIdActions);
    jobLeaderIdService.addJob(jobId);
    assertTrue(jobLeaderIdService.containsJob(jobId));
    ArgumentCaptor<Runnable> runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class);
    verify(scheduledExecutor).schedule(runnableArgumentCaptor.capture(), anyLong(), any(TimeUnit.class));
    Runnable timeoutRunnable = runnableArgumentCaptor.getValue();
    timeoutRunnable.run();
    ArgumentCaptor<UUID> timeoutIdArgumentCaptor = ArgumentCaptor.forClass(UUID.class);
    verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), timeoutIdArgumentCaptor.capture());
    assertTrue(jobLeaderIdService.isValidTimeout(jobId, timeoutIdArgumentCaptor.getValue()));
}
Also used : TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) TimeUnit(java.util.concurrent.TimeUnit) Time(org.apache.flink.api.common.time.Time) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) ScheduledExecutor(org.apache.flink.runtime.concurrent.ScheduledExecutor) Test(org.junit.Test)

Example 20 with TestingLeaderRetrievalService

use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.

the class JobLeaderIdServiceTest method testRemovingJob.

/**
	 * Tests that removing a job completes the job leader id future exceptionally
	 */
@Test(timeout = 10000)
public void testRemovingJob() throws Exception {
    final JobID jobId = new JobID();
    TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
    TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
    highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
    ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    Time timeout = Time.milliseconds(5000L);
    JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
    JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
    jobLeaderIdService.start(jobLeaderIdActions);
    jobLeaderIdService.addJob(jobId);
    Future<UUID> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
    // remove the job before we could find a leader
    jobLeaderIdService.removeJob(jobId);
    assertFalse(jobLeaderIdService.containsJob(jobId));
    try {
        leaderIdFuture.get();
        fail("The leader id future should be completed exceptionally.");
    } catch (ExecutionException ignored) {
    // expected exception
    }
}
Also used : TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) Time(org.apache.flink.api.common.time.Time) UUID(java.util.UUID) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) ScheduledExecutor(org.apache.flink.runtime.concurrent.ScheduledExecutor) Test(org.junit.Test)

Aggregations

TestingLeaderRetrievalService (org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService)22 Test (org.junit.Test)21 UUID (java.util.UUID)17 ActorRef (akka.actor.ActorRef)13 JobID (org.apache.flink.api.common.JobID)11 FiniteDuration (scala.concurrent.duration.FiniteDuration)10 Props (akka.actor.Props)7 Timeout (akka.util.Timeout)7 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)7 Time (org.apache.flink.api.common.time.Time)6 ScheduledExecutor (org.apache.flink.runtime.concurrent.ScheduledExecutor)5 JobClientMessages (org.apache.flink.runtime.messages.JobClientMessages)5 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)4 Configuration (org.apache.flink.configuration.Configuration)4 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)4 LookupKvStateLocation (org.apache.flink.runtime.query.KvStateMessage.LookupKvStateLocation)4 TestingFatalErrorHandler (org.apache.flink.runtime.util.TestingFatalErrorHandler)4 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)3 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)3 TestingLeaderElectionService (org.apache.flink.runtime.leaderelection.TestingLeaderElectionService)3