use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class SlotProtocolTest method testSlotsUnavailableRequest.
/**
* Tests whether
* 1) SlotRequest is routed to the SlotManager
* 2) SlotRequest is confirmed
* 3) SlotRequest leads to a container allocation
* 4) Slot becomes available and TaskExecutor gets a SlotRequest
*/
@Test
public void testSlotsUnavailableRequest() throws Exception {
final String rmAddress = "/rm1";
final String jmAddress = "/jm1";
final JobID jobID = new JobID();
testRpcService.registerGateway(jmAddress, mock(JobMasterGateway.class));
final TestingHighAvailabilityServices testingHaServices = new TestingHighAvailabilityServices();
final UUID rmLeaderID = UUID.randomUUID();
final UUID jmLeaderID = UUID.randomUUID();
TestingLeaderElectionService rmLeaderElectionService = configureHA(testingHaServices, jobID, rmAddress, rmLeaderID, jmAddress, jmLeaderID);
ResourceManagerConfiguration resourceManagerConfiguration = new ResourceManagerConfiguration(Time.seconds(5L), Time.seconds(5L), Time.minutes(5L));
JobLeaderIdService jobLeaderIdService = new JobLeaderIdService(testingHaServices, testRpcService.getScheduledExecutor(), resourceManagerConfiguration.getJobTimeout());
final TestingSlotManagerFactory slotManagerFactory = new TestingSlotManagerFactory();
SpiedResourceManager resourceManager = new SpiedResourceManager(testRpcService, resourceManagerConfiguration, testingHaServices, slotManagerFactory, mock(MetricRegistry.class), jobLeaderIdService, mock(FatalErrorHandler.class));
resourceManager.start();
rmLeaderElectionService.isLeader(rmLeaderID);
Future<RegistrationResponse> registrationFuture = resourceManager.registerJobManager(rmLeaderID, jmLeaderID, jmAddress, jobID);
try {
registrationFuture.get(5, TimeUnit.SECONDS);
} catch (Exception e) {
Assert.fail("JobManager registration Future didn't become ready.");
}
final SlotManager slotManager = slotManagerFactory.slotManager;
final AllocationID allocationID = new AllocationID();
final ResourceProfile resourceProfile = new ResourceProfile(1.0, 100);
SlotRequest slotRequest = new SlotRequest(jobID, allocationID, resourceProfile);
RMSlotRequestReply slotRequestReply = resourceManager.requestSlot(jmLeaderID, rmLeaderID, slotRequest);
// 1) SlotRequest is routed to the SlotManager
verify(slotManager).requestSlot(slotRequest);
// 2) SlotRequest is confirmed
Assert.assertEquals(slotRequestReply.getAllocationID(), allocationID);
// 3) SlotRequest leads to a container allocation
Assert.assertEquals(1, resourceManager.startNewWorkerCalled);
Assert.assertFalse(slotManager.isAllocated(allocationID));
// slot becomes available
final String tmAddress = "/tm1";
TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
Mockito.when(taskExecutorGateway.requestSlot(any(SlotID.class), any(JobID.class), any(AllocationID.class), any(String.class), any(UUID.class), any(Time.class))).thenReturn(new FlinkCompletableFuture<TMSlotRequestReply>());
testRpcService.registerGateway(tmAddress, taskExecutorGateway);
final ResourceID resourceID = ResourceID.generate();
final SlotID slotID = new SlotID(resourceID, 0);
final SlotStatus slotStatus = new SlotStatus(slotID, resourceProfile);
final SlotReport slotReport = new SlotReport(Collections.singletonList(slotStatus));
// register slot at SlotManager
slotManager.registerTaskExecutor(resourceID, new TaskExecutorRegistration(taskExecutorGateway), slotReport);
// 4) Slot becomes available and TaskExecutor gets a SlotRequest
verify(taskExecutorGateway, timeout(5000)).requestSlot(eq(slotID), eq(jobID), eq(allocationID), any(String.class), any(UUID.class), any(Time.class));
}
use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class JobMasterTest method testHeartbeatTimeoutWithTaskManager.
@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
haServices.setCheckpointRecoveryFactory(mock(CheckpointRecoveryFactory.class));
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
final String jobManagerAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
final String taskManagerAddress = "tm";
final ResourceID tmResourceId = new ResourceID(taskManagerAddress);
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
final TestingSerialRpcService rpc = new TestingSerialRpcService();
rpc.registerGateway(taskManagerAddress, taskExecutorGateway);
final long heartbeatInterval = 1L;
final long heartbeatTimeout = 5L;
final ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
final HeartbeatServices heartbeatServices = new TestingHeartbeatServices(heartbeatInterval, heartbeatTimeout, scheduledExecutor);
final JobGraph jobGraph = new JobGraph();
try {
final JobMaster jobMaster = new JobMaster(jmResourceId, jobGraph, new Configuration(), rpc, haServices, heartbeatServices, Executors.newScheduledThreadPool(1), mock(BlobLibraryCacheManager.class), mock(RestartStrategyFactory.class), Time.of(10, TimeUnit.SECONDS), null, mock(OnCompletionActions.class), testingFatalErrorHandler, new FlinkUserCodeClassLoader(new URL[0]));
// also start the heartbeat manager in job manager
jobMaster.start(jmLeaderId);
// register task manager will trigger monitoring heartbeat target, schedule heartbeat request in interval time
jobMaster.registerTaskManager(taskManagerAddress, taskManagerLocation, jmLeaderId);
ArgumentCaptor<Runnable> heartbeatRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor, times(1)).scheduleAtFixedRate(heartbeatRunnableCaptor.capture(), eq(0L), eq(heartbeatInterval), eq(TimeUnit.MILLISECONDS));
Runnable heartbeatRunnable = heartbeatRunnableCaptor.getValue();
ArgumentCaptor<Runnable> timeoutRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor).schedule(timeoutRunnableCaptor.capture(), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
Runnable timeoutRunnable = timeoutRunnableCaptor.getValue();
// run the first heartbeat request
heartbeatRunnable.run();
verify(taskExecutorGateway, times(1)).heartbeatFromJobManager(eq(jmResourceId));
// run the timeout runnable to simulate a heartbeat timeout
timeoutRunnable.run();
verify(taskExecutorGateway).disconnectJobManager(eq(jobGraph.getJobID()), any(TimeoutException.class));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class DefaultJobLeaderIdServiceTest method testInitialJobTimeout.
/**
* Tests that the initial job registration registers a timeout which will call {@link
* JobLeaderIdActions#notifyJobTimeout(JobID, UUID)} when executed.
*/
@Test
public void testInitialJobTimeout() throws Exception {
final JobID jobId = new JobID();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
jobLeaderIdService.start(jobLeaderIdActions);
jobLeaderIdService.addJob(jobId);
assertTrue(jobLeaderIdService.containsJob(jobId));
ArgumentCaptor<Runnable> runnableArgumentCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor).schedule(runnableArgumentCaptor.capture(), anyLong(), any(TimeUnit.class));
Runnable timeoutRunnable = runnableArgumentCaptor.getValue();
timeoutRunnable.run();
ArgumentCaptor<UUID> timeoutIdArgumentCaptor = ArgumentCaptor.forClass(UUID.class);
verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), timeoutIdArgumentCaptor.capture());
assertTrue(jobLeaderIdService.isValidTimeout(jobId, timeoutIdArgumentCaptor.getValue()));
}
use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class DefaultJobLeaderIdServiceTest method jobTimeoutAfterLostLeadership.
/**
* Tests that a timeout get cancelled once a job leader has been found. Furthermore, it tests
* that a new timeout is registered after the jobmanager has lost leadership.
*/
@Test(timeout = 10000)
public void jobTimeoutAfterLostLeadership() throws Exception {
final JobID jobId = new JobID();
final String address = "foobar";
final JobMasterId leaderId = JobMasterId.generate();
TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
ScheduledFuture<?> timeout1 = mock(ScheduledFuture.class);
ScheduledFuture<?> timeout2 = mock(ScheduledFuture.class);
final Queue<ScheduledFuture<?>> timeoutQueue = new ArrayDeque<>(Arrays.asList(timeout1, timeout2));
ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
final AtomicReference<Runnable> lastRunnable = new AtomicReference<>();
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
lastRunnable.set((Runnable) invocation.getArguments()[0]);
return timeoutQueue.poll();
}
}).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
Time timeout = Time.milliseconds(5000L);
JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
final AtomicReference<UUID> lastTimeoutId = new AtomicReference<>();
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
lastTimeoutId.set((UUID) invocation.getArguments()[1]);
return null;
}
}).when(jobLeaderIdActions).notifyJobTimeout(eq(jobId), any(UUID.class));
JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
jobLeaderIdService.start(jobLeaderIdActions);
jobLeaderIdService.addJob(jobId);
CompletableFuture<JobMasterId> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
// notify the leader id service about the new leader
leaderRetrievalService.notifyListener(address, leaderId.toUUID());
assertEquals(leaderId, leaderIdFuture.get());
assertTrue(jobLeaderIdService.containsJob(jobId));
// check that the first timeout got cancelled
verify(timeout1, times(1)).cancel(anyBoolean());
verify(scheduledExecutor, times(1)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
// initial timeout runnable which should no longer have an effect
Runnable runnable = lastRunnable.get();
assertNotNull(runnable);
runnable.run();
verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), any(UUID.class));
// the timeout should no longer be valid
assertFalse(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
// lose leadership
leaderRetrievalService.notifyListener("", null);
verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
// the second runnable should be the new timeout
runnable = lastRunnable.get();
assertNotNull(runnable);
runnable.run();
verify(jobLeaderIdActions, times(2)).notifyJobTimeout(eq(jobId), any(UUID.class));
// the new timeout should be valid
assertTrue(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
}
use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices in project flink by apache.
the class ResourceManagerTest method setup.
@Before
public void setup() throws Exception {
highAvailabilityServices = new TestingHighAvailabilityServices();
highAvailabilityServices.setResourceManagerLeaderElectionService(new TestingLeaderElectionService());
testingFatalErrorHandler = new TestingFatalErrorHandler();
resourceManagerResourceId = ResourceID.generate();
}
Aggregations