use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.
the class JobMasterTest method testHeartbeatTimeoutWithTaskManager.
@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
haServices.setCheckpointRecoveryFactory(mock(CheckpointRecoveryFactory.class));
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
final String jobManagerAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
final String taskManagerAddress = "tm";
final ResourceID tmResourceId = new ResourceID(taskManagerAddress);
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
final TestingSerialRpcService rpc = new TestingSerialRpcService();
rpc.registerGateway(taskManagerAddress, taskExecutorGateway);
final long heartbeatInterval = 1L;
final long heartbeatTimeout = 5L;
final ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
final HeartbeatServices heartbeatServices = new TestingHeartbeatServices(heartbeatInterval, heartbeatTimeout, scheduledExecutor);
final JobGraph jobGraph = new JobGraph();
try {
final JobMaster jobMaster = new JobMaster(jmResourceId, jobGraph, new Configuration(), rpc, haServices, heartbeatServices, Executors.newScheduledThreadPool(1), mock(BlobLibraryCacheManager.class), mock(RestartStrategyFactory.class), Time.of(10, TimeUnit.SECONDS), null, mock(OnCompletionActions.class), testingFatalErrorHandler, new FlinkUserCodeClassLoader(new URL[0]));
// also start the heartbeat manager in job manager
jobMaster.start(jmLeaderId);
// register task manager will trigger monitoring heartbeat target, schedule heartbeat request in interval time
jobMaster.registerTaskManager(taskManagerAddress, taskManagerLocation, jmLeaderId);
ArgumentCaptor<Runnable> heartbeatRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor, times(1)).scheduleAtFixedRate(heartbeatRunnableCaptor.capture(), eq(0L), eq(heartbeatInterval), eq(TimeUnit.MILLISECONDS));
Runnable heartbeatRunnable = heartbeatRunnableCaptor.getValue();
ArgumentCaptor<Runnable> timeoutRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor).schedule(timeoutRunnableCaptor.capture(), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
Runnable timeoutRunnable = timeoutRunnableCaptor.getValue();
// run the first heartbeat request
heartbeatRunnable.run();
verify(taskExecutorGateway, times(1)).heartbeatFromJobManager(eq(jmResourceId));
// run the timeout runnable to simulate a heartbeat timeout
timeoutRunnable.run();
verify(taskExecutorGateway).disconnectJobManager(eq(jobGraph.getJobID()), any(TimeoutException.class));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService in project flink by apache.
the class AkkaKvStateLocationLookupServiceTest method testRetryOnUnknownJobManager.
/**
* Tests that lookups are retried when no leader notification is available.
*/
@Test
public void testRetryOnUnknownJobManager() throws Exception {
final Queue<LookupRetryStrategy> retryStrategies = new LinkedBlockingQueue<>();
LookupRetryStrategyFactory retryStrategy = new LookupRetryStrategyFactory() {
@Override
public LookupRetryStrategy createRetryStrategy() {
return retryStrategies.poll();
}
};
final TestingLeaderRetrievalService leaderRetrievalService = new TestingLeaderRetrievalService();
AkkaKvStateLocationLookupService lookupService = new AkkaKvStateLocationLookupService(leaderRetrievalService, testActorSystem, TIMEOUT, retryStrategy);
lookupService.start();
//
// Test call to retry
//
final AtomicBoolean hasRetried = new AtomicBoolean();
retryStrategies.add(new LookupRetryStrategy() {
@Override
public FiniteDuration getRetryDelay() {
return FiniteDuration.Zero();
}
@Override
public boolean tryRetry() {
if (hasRetried.compareAndSet(false, true)) {
return true;
}
return false;
}
});
Future<KvStateLocation> locationFuture = lookupService.getKvStateLookupInfo(new JobID(), "yessir");
Await.ready(locationFuture, TIMEOUT);
assertTrue("Did not retry ", hasRetried.get());
//
// Test leader notification after retry
//
Queue<LookupKvStateLocation> received = new LinkedBlockingQueue<>();
KvStateLocation expected = new KvStateLocation(new JobID(), new JobVertexID(), 12122, "garlic");
ActorRef testActor = LookupResponseActor.create(received, null, expected);
final String testActorAddress = AkkaUtils.getAkkaURL(testActorSystem, testActor);
retryStrategies.add(new LookupRetryStrategy() {
@Override
public FiniteDuration getRetryDelay() {
return FiniteDuration.apply(100, TimeUnit.MILLISECONDS);
}
@Override
public boolean tryRetry() {
leaderRetrievalService.notifyListener(testActorAddress, null);
return true;
}
});
KvStateLocation location = Await.result(lookupService.getKvStateLookupInfo(new JobID(), "yessir"), TIMEOUT);
assertEquals(expected, location);
}
Aggregations