use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.
the class UtilsTest method testYarnFlinkResourceManagerJobManagerLostLeadership.
@Test
public void testYarnFlinkResourceManagerJobManagerLostLeadership() throws Exception {
new JavaTestKit(system) {
{
final Deadline deadline = new FiniteDuration(3, TimeUnit.MINUTES).fromNow();
Configuration flinkConfig = new Configuration();
YarnConfiguration yarnConfig = new YarnConfiguration();
TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
String applicationMasterHostName = "localhost";
String webInterfaceURL = "foobar";
ContaineredTaskManagerParameters taskManagerParameters = new ContaineredTaskManagerParameters(1l, 1l, 1l, 1, new HashMap<String, String>());
ContainerLaunchContext taskManagerLaunchContext = mock(ContainerLaunchContext.class);
int yarnHeartbeatIntervalMillis = 1000;
int maxFailedContainers = 10;
int numInitialTaskManagers = 5;
final YarnResourceManagerCallbackHandler callbackHandler = new YarnResourceManagerCallbackHandler();
AMRMClientAsync<AMRMClient.ContainerRequest> resourceManagerClient = mock(AMRMClientAsync.class);
NMClient nodeManagerClient = mock(NMClient.class);
UUID leaderSessionID = UUID.randomUUID();
final List<Container> containerList = new ArrayList<>();
for (int i = 0; i < numInitialTaskManagers; i++) {
containerList.add(new TestingContainer("container_" + i, "localhost"));
}
doAnswer(new Answer() {
int counter = 0;
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
if (counter < containerList.size()) {
callbackHandler.onContainersAllocated(Collections.singletonList(containerList.get(counter++)));
}
return null;
}
}).when(resourceManagerClient).addContainerRequest(Matchers.any(AMRMClient.ContainerRequest.class));
ActorRef resourceManager = null;
ActorRef leader1;
try {
leader1 = system.actorOf(Props.create(TestingUtils.ForwardingActor.class, getRef(), Option.apply(leaderSessionID)));
resourceManager = system.actorOf(Props.create(TestingYarnFlinkResourceManager.class, flinkConfig, yarnConfig, leaderRetrievalService, applicationMasterHostName, webInterfaceURL, taskManagerParameters, taskManagerLaunchContext, yarnHeartbeatIntervalMillis, maxFailedContainers, numInitialTaskManagers, callbackHandler, resourceManagerClient, nodeManagerClient));
leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID);
final AkkaActorGateway leader1Gateway = new AkkaActorGateway(leader1, leaderSessionID);
final AkkaActorGateway resourceManagerGateway = new AkkaActorGateway(resourceManager, leaderSessionID);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
Container container = (Container) invocation.getArguments()[0];
resourceManagerGateway.tell(new NotifyResourceStarted(YarnFlinkResourceManager.extractResourceID(container)), leader1Gateway);
return null;
}
}).when(nodeManagerClient).startContainer(Matchers.any(Container.class), Matchers.any(ContainerLaunchContext.class));
expectMsgClass(deadline.timeLeft(), RegisterResourceManager.class);
resourceManagerGateway.tell(new RegisterResourceManagerSuccessful(leader1, Collections.EMPTY_LIST));
for (int i = 0; i < containerList.size(); i++) {
expectMsgClass(deadline.timeLeft(), Acknowledge.class);
}
Future<Object> taskManagerRegisteredFuture = resourceManagerGateway.ask(new NotifyWhenResourcesRegistered(numInitialTaskManagers), deadline.timeLeft());
Await.ready(taskManagerRegisteredFuture, deadline.timeLeft());
leaderRetrievalService.notifyListener(null, null);
leaderRetrievalService.notifyListener(leader1.path().toString(), leaderSessionID);
expectMsgClass(deadline.timeLeft(), RegisterResourceManager.class);
resourceManagerGateway.tell(new RegisterResourceManagerSuccessful(leader1, Collections.EMPTY_LIST));
for (Container container : containerList) {
resourceManagerGateway.tell(new NotifyResourceStarted(YarnFlinkResourceManager.extractResourceID(container)), leader1Gateway);
}
for (int i = 0; i < containerList.size(); i++) {
expectMsgClass(deadline.timeLeft(), Acknowledge.class);
}
Future<Object> numberOfRegisteredResourcesFuture = resourceManagerGateway.ask(RequestNumberOfRegisteredResources.Instance, deadline.timeLeft());
int numberOfRegisteredResources = (Integer) Await.result(numberOfRegisteredResourcesFuture, deadline.timeLeft());
assertEquals(numInitialTaskManagers, numberOfRegisteredResources);
} finally {
if (resourceManager != null) {
resourceManager.tell(PoisonPill.getInstance(), ActorRef.noSender());
}
}
}
};
}
use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.
the class AkkaKvStateLocationLookupServiceTest method testNoJobManagerRegistered.
/**
* Tests responses if no leader notification has been reported or leadership
* has been lost (leaderAddress = <code>null</code>).
*/
@Test
public void testNoJobManagerRegistered() throws Exception {
TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
Queue<LookupKvStateLocation> received = new LinkedBlockingQueue<>();
AkkaKvStateLocationLookupService lookupService = new AkkaKvStateLocationLookupService(leaderRetrievalService, testActorSystem, TIMEOUT, new AkkaKvStateLocationLookupService.DisabledLookupRetryStrategyFactory());
lookupService.start();
//
try {
JobID jobId = new JobID();
String name = "coffee";
Future<KvStateLocation> locationFuture = lookupService.getKvStateLookupInfo(jobId, name);
Await.result(locationFuture, TIMEOUT);
fail("Did not throw expected Exception");
} catch (UnknownJobManager ignored) {
// Expected
}
assertEquals("Received unexpected lookup", 0, received.size());
//
// Leader registration => communicate with new leader
//
UUID leaderSessionId = null;
KvStateLocation expected = new KvStateLocation(new JobID(), new JobVertexID(), 8282, "tea");
ActorRef testActor = LookupResponseActor.create(received, leaderSessionId, expected);
String testActorAddress = AkkaUtils.getAkkaURL(testActorSystem, testActor);
// Notify the service about a leader
leaderRetrievalService.notifyListener(testActorAddress, leaderSessionId);
JobID jobId = new JobID();
String name = "tea";
// Verify that the leader response is handled
KvStateLocation location = Await.result(lookupService.getKvStateLookupInfo(jobId, name), TIMEOUT);
assertEquals(expected, location);
// Verify that the correct message was sent to the leader
assertEquals(1, received.size());
verifyLookupMsg(received.poll(), jobId, name);
//
// Leader loss => fail with UnknownJobManager
//
leaderRetrievalService.notifyListener(null, null);
try {
Future<KvStateLocation> locationFuture = lookupService.getKvStateLookupInfo(new JobID(), "coffee");
Await.result(locationFuture, TIMEOUT);
fail("Did not throw expected Exception");
} catch (UnknownJobManager ignored) {
// Expected
}
// No new messages received
assertEquals(0, received.size());
}
use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.
the class JobLeaderIdServiceTest method jobTimeoutAfterLostLeadership.
/**
* Tests that a timeout get cancelled once a job leader has been found. Furthermore, it tests
* that a new timeout is registered after the jobmanager has lost leadership.
*/
@Test(timeout = 10000)
public void jobTimeoutAfterLostLeadership() throws Exception {
final JobID jobId = new JobID();
final String address = "foobar";
final UUID leaderId = UUID.randomUUID();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledFuture<?> timeout1 = mock(ScheduledFuture.class);
ScheduledFuture<?> timeout2 = mock(ScheduledFuture.class);
final Queue<ScheduledFuture<?>> timeoutQueue = new ArrayDeque<>(Arrays.asList(timeout1, timeout2));
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
final AtomicReference<Runnable> lastRunnable = new AtomicReference<>();
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
lastRunnable.set((Runnable) invocation.getArguments()[0]);
return timeoutQueue.poll();
}
}).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
final AtomicReference<UUID> lastTimeoutId = new AtomicReference<>();
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
lastTimeoutId.set((UUID) invocation.getArguments()[1]);
return null;
}
}).when(jobLeaderIdActions).notifyJobTimeout(eq(jobId), any(UUID.class));
JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
jobLeaderIdService.start(jobLeaderIdActions);
jobLeaderIdService.addJob(jobId);
Future<UUID> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
// notify the leader id service about the new leader
leaderRetrievalService.notifyListener(address, leaderId);
assertEquals(leaderId, leaderIdFuture.get());
assertTrue(jobLeaderIdService.containsJob(jobId));
// check that the first timeout got cancelled
verify(timeout1, times(1)).cancel(anyBoolean());
verify(scheduledExecutor, times(1)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
// initial timeout runnable which should no longer have an effect
Runnable runnable = lastRunnable.get();
assertNotNull(runnable);
runnable.run();
verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), any(UUID.class));
// the timeout should no longer be valid
assertFalse(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
// lose leadership
leaderRetrievalService.notifyListener("", null);
verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
// the second runnable should be the new timeout
runnable = lastRunnable.get();
assertNotNull(runnable);
runnable.run();
verify(jobLeaderIdActions, times(2)).notifyJobTimeout(eq(jobId), any(UUID.class));
// the new timeout should be valid
assertTrue(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
}
use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.
the class JobLeaderIdServiceTest method testInitialJobTimeout.
/**
* Tests that the initial job registration registers a timeout which will call
* {@link JobLeaderIdActions#notifyJobTimeout(JobID, UUID)} when executed.
*/
@Test
public void testInitialJobTimeout() throws Exception {
final JobID jobId = new JobID();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
jobLeaderIdService.start(jobLeaderIdActions);
jobLeaderIdService.addJob(jobId);
assertTrue(jobLeaderIdService.containsJob(jobId));
ArgumentCaptor<Runnable> runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor).schedule(runnableArgumentCaptor.capture(), anyLong(), any(TimeUnit.class));
Runnable timeoutRunnable = runnableArgumentCaptor.getValue();
timeoutRunnable.run();
ArgumentCaptor<UUID> timeoutIdArgumentCaptor = ArgumentCaptor.forClass(UUID.class);
verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), timeoutIdArgumentCaptor.capture());
assertTrue(jobLeaderIdService.isValidTimeout(jobId, timeoutIdArgumentCaptor.getValue()));
}
use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.
the class JobLeaderIdServiceTest method testRemovingJob.
/**
* Tests that removing a job completes the job leader id future exceptionally
*/
@Test(timeout = 10000)
public void testRemovingJob() throws Exception {
final JobID jobId = new JobID();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
jobLeaderIdService.start(jobLeaderIdActions);
jobLeaderIdService.addJob(jobId);
Future<UUID> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
// remove the job before we could find a leader
jobLeaderIdService.removeJob(jobId);
assertFalse(jobLeaderIdService.containsJob(jobId));
try {
leaderIdFuture.get();
fail("The leader id future should be completed exceptionally.");
} catch (ExecutionException ignored) {
// expected exception
}
}
Aggregations