use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class TaskExecutorTest method testJobManagerBecomesUnreachableTriggersDisconnect.
@Test
public void testJobManagerBecomesUnreachableTriggersDisconnect() throws Exception {
final ResourceID jmResourceId = ResourceID.generate();
runJobManagerHeartbeatTest(jmResourceId, failedRpcEnabledHeartbeatServices, jobMasterGatewayBuilder -> jobMasterGatewayBuilder.setTaskManagerHeartbeatFunction((resourceID, taskExecutorToJobManagerHeartbeatPayload) -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "job manager is unreachable."))), (ignoredTaskExecutorResourceId, taskExecutorGateway, allocationId) -> taskExecutorGateway.heartbeatFromJobManager(jmResourceId, new AllocatedSlotReport(jobId, Collections.singleton(new AllocatedSlotInfo(0, allocationId)))));
}
use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class HeartbeatManagerTest method testHeartbeatManagerIgnoresRecipientUnreachableExceptionIfDisabled.
@Test
public void testHeartbeatManagerIgnoresRecipientUnreachableExceptionIfDisabled() throws Exception {
final long heartbeatTimeout = 10000L;
final ResourceID someTargetId = ResourceID.generate();
final HeartbeatTarget<Object> someHeartbeatTarget = new TestingHeartbeatTargetBuilder<>().setReceiveHeartbeatFunction((ignoredA, ignoredB) -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "could not receive heartbeat"))).createTestingHeartbeatTarget();
final CompletableFuture<ResourceID> unreachableTargetFuture = new CompletableFuture<>();
final HeartbeatListener<Object, Object> testingHeartbeatListener = new TestingHeartbeatListenerBuilder<>().setNotifyTargetUnreachableConsumer(unreachableTargetFuture::complete).createNewTestingHeartbeatListener();
final HeartbeatManager<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, // disable rpc request checking
-1, ResourceID.generate(), testingHeartbeatListener, TestingUtils.defaultScheduledExecutor(), LOG);
try {
heartbeatManager.monitorTarget(someTargetId, someHeartbeatTarget);
for (int i = 0; i < 10; i++) {
heartbeatManager.requestHeartbeat(someTargetId, null);
}
assertThat(unreachableTargetFuture, FlinkMatchers.willNotComplete(willNotCompleteWithin));
} finally {
heartbeatManager.stop();
}
}
use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class HeartbeatManagerTest method testHeartbeatManagerSenderMarksTargetUnreachableOnRecipientUnreachableException.
@Test
public void testHeartbeatManagerSenderMarksTargetUnreachableOnRecipientUnreachableException() {
final long heartbeatPeriod = 20;
final long heartbeatTimeout = 10000L;
final ResourceID someTargetId = ResourceID.generate();
final HeartbeatTarget<Object> someHeartbeatTarget = new TestingHeartbeatTargetBuilder<>().setRequestHeartbeatFunction((ignoredA, ignoredB) -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "could not receive heartbeat"))).createTestingHeartbeatTarget();
final CompletableFuture<ResourceID> unreachableTargetFuture = new CompletableFuture<>();
final HeartbeatListener<Object, Object> testingHeartbeatListener = new TestingHeartbeatListenerBuilder<>().setNotifyTargetUnreachableConsumer(unreachableTargetFuture::complete).createNewTestingHeartbeatListener();
final HeartbeatManager<Object, Object> heartbeatManager = new HeartbeatManagerSenderImpl<>(heartbeatPeriod, heartbeatTimeout, 1, ResourceID.generate(), testingHeartbeatListener, TestingUtils.defaultScheduledExecutor(), LOG);
try {
heartbeatManager.monitorTarget(someTargetId, someHeartbeatTarget);
// the target should become unreachable when requesting a heartbeat
unreachableTargetFuture.join();
} finally {
heartbeatManager.stop();
}
}
use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class JobMasterTest method testResourceManagerBecomesUnreachableTriggersDisconnect.
@Test
public void testResourceManagerBecomesUnreachableTriggersDisconnect() throws Exception {
final String resourceManagerAddress = "rm";
final ResourceManagerId resourceManagerId = ResourceManagerId.generate();
final ResourceID rmResourceId = new ResourceID(resourceManagerAddress);
final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(resourceManagerId, rmResourceId, resourceManagerAddress, "localhost");
final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>();
final CountDownLatch registrationAttempts = new CountDownLatch(2);
final Queue<CompletableFuture<RegistrationResponse>> connectionResponses = new ArrayDeque<>(2);
connectionResponses.add(CompletableFuture.completedFuture(resourceManagerGateway.getJobMasterRegistrationSuccess()));
connectionResponses.add(new CompletableFuture<>());
resourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> {
registrationAttempts.countDown();
return connectionResponses.poll();
});
resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0));
resourceManagerGateway.setJobMasterHeartbeatFunction(ignored -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "resource manager is unreachable")));
rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway);
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withJobMasterId(jobMasterId).withResourceId(jmResourceId).withConfiguration(configuration).withHighAvailabilityServices(haServices).withHeartbeatServices(heartbeatServices).createJobMaster();
jobMaster.start();
try {
// define a leader and see that a registration happens
rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID());
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
CommonTestUtils.waitUntilCondition(() -> {
jobMasterGateway.heartbeatFromResourceManager(rmResourceId);
return disconnectedJobManagerFuture.isDone();
}, Deadline.fromNow(TimeUtils.toDuration(testingTimeout)), 50L);
// heartbeat timeout should trigger disconnect JobManager from ResourceManager
assertThat(disconnectedJobManagerFuture.join(), equalTo(jobGraph.getJobID()));
// the JobMaster should try to reconnect to the RM
registrationAttempts.await();
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
Aggregations