use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class ResourceManagerTest method testJobMasterBecomesUnreachableTriggersDisconnect.
@Test
public void testJobMasterBecomesUnreachableTriggersDisconnect() throws Exception {
final JobID jobId = new JobID();
final ResourceID jobMasterResourceId = ResourceID.generate();
final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).setResourceManagerHeartbeatFunction(resourceId -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "task executor is unreachable"))).setDisconnectResourceManagerConsumer(disconnectFuture::complete).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
assertThat(requestedJobId, is(equalTo(jobId)));
return jobMasterLeaderRetrievalService;
});
runHeartbeatTargetBecomesUnreachableTest((ignore) -> {
}, resourceManagerGateway -> {
final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
}, resourceManagerResourceId -> assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId))));
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class ResourceManagerTest method testDisconnectJobManager.
private void testDisconnectJobManager(JobStatus jobStatus) throws Exception {
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final OneShotLatch jobAdded = new OneShotLatch();
final OneShotLatch jobRemoved = new OneShotLatch();
final JobLeaderIdService jobLeaderIdService = TestingJobLeaderIdService.newBuilder().setAddJobConsumer(ignored -> jobAdded.trigger()).setRemoveJobConsumer(ignored -> jobRemoved.trigger()).build();
resourceManager = new ResourceManagerBuilder().withJobLeaderIdService(jobLeaderIdService).buildAndStart();
highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()));
final JobID jobId = JobID.generate();
final ResourceManagerGateway resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class);
resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), ResourceID.generate(), jobMasterGateway.getAddress(), jobId, TIMEOUT);
jobAdded.await();
resourceManagerGateway.disconnectJobManager(jobId, jobStatus, new FlinkException("Test exception"));
if (jobStatus.isGloballyTerminalState()) {
jobRemoved.await();
} else {
// job should not get removed
try {
jobRemoved.await(10L, TimeUnit.MILLISECONDS);
fail("We should not have removed the job.");
} catch (TimeoutException expected) {
}
}
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class ResourceManagerTest method testHeartbeatTimeoutWithJobMaster.
@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setResourceManagerHeartbeatFunction(resourceId -> {
heartbeatRequestFuture.complete(resourceId);
return FutureUtils.completedVoidFuture();
}).setDisconnectResourceManagerConsumer(disconnectFuture::complete).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final JobID jobId = new JobID();
final ResourceID jobMasterResourceId = ResourceID.generate();
final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
assertThat(requestedJobId, is(equalTo(jobId)));
return jobMasterLeaderRetrievalService;
});
runHeartbeatTimeoutTest((ignore) -> {
}, resourceManagerGateway -> {
final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
}, resourceManagerResourceId -> {
// might have been completed or not depending whether the timeout was triggered
// first
final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
});
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class DefaultJobLeaderIdServiceTest method testLeaderFutureWaitsForValidLeader.
/**
* Tests that the leaderId future is only completed once the service is notified about an actual
* leader being elected. Specifically, it tests that the future is not completed if the
* leadership was revoked without a new leader having been elected.
*/
@Test(timeout = 10000)
public void testLeaderFutureWaitsForValidLeader() throws Exception {
final JobID jobId = new JobID();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, new ManuallyTriggeredScheduledExecutor(), Time.milliseconds(5000L));
jobLeaderIdService.start(new NoOpJobLeaderIdActions());
jobLeaderIdService.addJob(jobId);
// elect some leader
leaderRetrievalService.notifyListener("foo", UUID.randomUUID());
// notify about leadership loss
leaderRetrievalService.notifyListener(null, null);
final CompletableFuture<JobMasterId> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
// there is currently no leader, so this should not be completed
assertThat(leaderIdFuture.isDone(), is(false));
// elect a new leader
final UUID newLeaderId = UUID.randomUUID();
leaderRetrievalService.notifyListener("foo", newLeaderId);
assertThat(leaderIdFuture.get(), is(JobMasterId.fromUuidOrNull(newLeaderId)));
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class DefaultJobLeaderIdServiceTest method testIsStarted.
/**
* Tests that whether the service has been started.
*/
@Test
public void testIsStarted() throws Exception {
final JobID jobId = new JobID();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
DefaultJobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
assertFalse(jobLeaderIdService.isStarted());
jobLeaderIdService.start(jobLeaderIdActions);
assertTrue(jobLeaderIdService.isStarted());
jobLeaderIdService.stop();
assertFalse(jobLeaderIdService.isStarted());
}
Aggregations