use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class DefaultJobLeaderIdServiceTest method jobTimeoutAfterLostLeadership.
/**
* Tests that a timeout get cancelled once a job leader has been found. Furthermore, it tests
* that a new timeout is registered after the jobmanager has lost leadership.
*/
@Test(timeout = 10000)
public void jobTimeoutAfterLostLeadership() throws Exception {
final JobID jobId = new JobID();
final String address = "foobar";
final JobMasterId leaderId = JobMasterId.generate();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledFuture<?> timeout1 = mock(ScheduledFuture.class);
ScheduledFuture<?> timeout2 = mock(ScheduledFuture.class);
final Queue<ScheduledFuture<?>> timeoutQueue = new ArrayDeque<>(Arrays.asList(timeout1, timeout2));
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
final AtomicReference<Runnable> lastRunnable = new AtomicReference<>();
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
lastRunnable.set((Runnable) invocation.getArguments()[0]);
return timeoutQueue.poll();
}
}).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
final AtomicReference<UUID> lastTimeoutId = new AtomicReference<>();
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
lastTimeoutId.set((UUID) invocation.getArguments()[1]);
return null;
}
}).when(jobLeaderIdActions).notifyJobTimeout(eq(jobId), any(UUID.class));
JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
jobLeaderIdService.start(jobLeaderIdActions);
jobLeaderIdService.addJob(jobId);
CompletableFuture<JobMasterId> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
// notify the leader id service about the new leader
leaderRetrievalService.notifyListener(address, leaderId.toUUID());
assertEquals(leaderId, leaderIdFuture.get());
assertTrue(jobLeaderIdService.containsJob(jobId));
// check that the first timeout got cancelled
verify(timeout1, times(1)).cancel(anyBoolean());
verify(scheduledExecutor, times(1)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
// initial timeout runnable which should no longer have an effect
Runnable runnable = lastRunnable.get();
assertNotNull(runnable);
runnable.run();
verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), any(UUID.class));
// the timeout should no longer be valid
assertFalse(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
// lose leadership
leaderRetrievalService.notifyListener("", null);
verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
// the second runnable should be the new timeout
runnable = lastRunnable.get();
assertNotNull(runnable);
runnable.run();
verify(jobLeaderIdActions, times(2)).notifyJobTimeout(eq(jobId), any(UUID.class));
// the new timeout should be valid
assertTrue(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class LeaderGatewayRetrieverTest method testGatewayRetrievalFailures.
/**
* Tests that the gateway retrieval is retried in case of a failure.
*/
@Test
public void testGatewayRetrievalFailures() throws Exception {
final String address = "localhost";
final UUID leaderId = UUID.randomUUID();
RpcGateway rpcGateway = mock(RpcGateway.class);
TestingLeaderGatewayRetriever leaderGatewayRetriever = new TestingLeaderGatewayRetriever(rpcGateway);
SettableLeaderRetrievalService settableLeaderRetrievalService = new SettableLeaderRetrievalService();
settableLeaderRetrievalService.start(leaderGatewayRetriever);
CompletableFuture<RpcGateway> gatewayFuture = leaderGatewayRetriever.getFuture();
// this triggers the first gateway retrieval attempt
settableLeaderRetrievalService.notifyListener(address, leaderId);
// check that the first future has been failed
try {
gatewayFuture.get();
fail("The first future should have been failed.");
} catch (ExecutionException ignored) {
// that's what we expect
}
// the second attempt should fail as well
assertFalse((leaderGatewayRetriever.getNow().isPresent()));
// the third attempt should succeed
assertEquals(rpcGateway, leaderGatewayRetriever.getNow().get());
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class AbstractDispatcherTest method setUp.
@Before
public void setUp() throws Exception {
heartbeatServices = new HeartbeatServices(1000L, 10000L);
haServices = new TestingHighAvailabilityServices();
haServices.setCheckpointRecoveryFactory(new StandaloneCheckpointRecoveryFactory());
haServices.setResourceManagerLeaderRetriever(new SettableLeaderRetrievalService());
haServices.setJobGraphStore(new StandaloneJobGraphStore());
haServices.setJobResultStore(new EmbeddedJobResultStore());
configuration = new Configuration();
blobServer = new BlobServer(configuration, temporaryFolder.newFolder(), new VoidBlobStore());
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class JobMasterTest method setup.
@Before
public void setup() throws IOException {
configuration = new Configuration();
haServices = new TestingHighAvailabilityServices();
jobMasterId = JobMasterId.generate();
jmResourceId = ResourceID.generate();
testingFatalErrorHandler = new TestingFatalErrorHandler();
haServices.setCheckpointRecoveryFactory(new StandaloneCheckpointRecoveryFactory());
rmLeaderRetrievalService = new SettableLeaderRetrievalService(null, null);
haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
configuration.setString(BlobServerOptions.STORAGE_DIRECTORY, temporaryFolder.newFolder().getAbsolutePath());
}
use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.
the class DefaultJobLeaderServiceTest method handlesConcurrentJobAdditionsAndLeaderChanges.
/**
* Tests that we can concurrently modify the JobLeaderService and complete the leader retrieval
* operation. See FLINK-16373.
*/
@Test
public void handlesConcurrentJobAdditionsAndLeaderChanges() throws Exception {
final JobLeaderService jobLeaderService = new DefaultJobLeaderService(new LocalUnresolvedTaskManagerLocation(), RetryingRegistrationConfiguration.defaultConfiguration());
final TestingJobLeaderListener jobLeaderListener = new TestingJobLeaderListener();
final int numberOperations = 20;
final BlockingQueue<SettableLeaderRetrievalService> instantiatedLeaderRetrievalServices = new ArrayBlockingQueue<>(numberOperations);
final HighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(leaderForJobId -> {
final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
instantiatedLeaderRetrievalServices.offer(leaderRetrievalService);
return leaderRetrievalService;
}).build();
jobLeaderService.start("foobar", rpcServiceResource.getTestingRpcService(), haServices, jobLeaderListener);
final CheckedThread addJobAction = new CheckedThread() {
@Override
public void go() throws Exception {
for (int i = 0; i < numberOperations; i++) {
final JobID jobId = JobID.generate();
jobLeaderService.addJob(jobId, "foobar");
Thread.yield();
jobLeaderService.removeJob(jobId);
}
}
};
addJobAction.start();
for (int i = 0; i < numberOperations; i++) {
final SettableLeaderRetrievalService leaderRetrievalService = instantiatedLeaderRetrievalServices.take();
leaderRetrievalService.notifyListener("foobar", UUID.randomUUID());
}
addJobAction.sync();
}
Aggregations