use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class HeartbeatManagerTest method testHeartbeatManagerMarksTargetUnreachableOnRecipientUnreachableException.
@Test
public void testHeartbeatManagerMarksTargetUnreachableOnRecipientUnreachableException() {
final long heartbeatTimeout = 10000L;
final ResourceID someTargetId = ResourceID.generate();
final HeartbeatTarget<Object> someHeartbeatTarget = new TestingHeartbeatTargetBuilder<>().setReceiveHeartbeatFunction((ignoredA, ignoredB) -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "could not receive heartbeat"))).createTestingHeartbeatTarget();
final CompletableFuture<ResourceID> unreachableTargetFuture = new CompletableFuture<>();
final HeartbeatListener<Object, Object> testingHeartbeatListener = new TestingHeartbeatListenerBuilder<>().setNotifyTargetUnreachableConsumer(unreachableTargetFuture::complete).createNewTestingHeartbeatListener();
final int failedRpcRequestsUntilUnreachable = 5;
final HeartbeatManager<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, failedRpcRequestsUntilUnreachable, ResourceID.generate(), testingHeartbeatListener, TestingUtils.defaultScheduledExecutor(), LOG);
try {
heartbeatManager.monitorTarget(someTargetId, someHeartbeatTarget);
for (int i = 0; i < failedRpcRequestsUntilUnreachable - 1; i++) {
heartbeatManager.requestHeartbeat(someTargetId, null);
assertThat(unreachableTargetFuture, FlinkMatchers.willNotComplete(willNotCompleteWithin));
}
heartbeatManager.requestHeartbeat(someTargetId, null);
// the target should be unreachable now
unreachableTargetFuture.join();
} finally {
heartbeatManager.stop();
}
}
use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class HeartbeatManagerTest method testHeartbeatManagerResetsFailedRpcCountOnSuccessfulRpc.
@Test
public void testHeartbeatManagerResetsFailedRpcCountOnSuccessfulRpc() throws Exception {
final long heartbeatTimeout = 10000L;
final ResourceID someTargetId = ResourceID.generate();
final RecipientUnreachableException unreachableException = new RecipientUnreachableException("sender", "recipient", "could not receive heartbeat");
final Queue<CompletableFuture<Void>> heartbeatResponses = new ArrayDeque<>(Arrays.asList(FutureUtils.completedExceptionally(unreachableException), FutureUtils.completedExceptionally(unreachableException), CompletableFuture.completedFuture(null), FutureUtils.completedExceptionally(unreachableException)));
final HeartbeatTarget<Object> someHeartbeatTarget = new TestingHeartbeatTargetBuilder<>().setReceiveHeartbeatFunction((ignoredA, ignoredB) -> heartbeatResponses.poll()).createTestingHeartbeatTarget();
final CompletableFuture<ResourceID> unreachableTargetFuture = new CompletableFuture<>();
final HeartbeatListener<Object, Object> testingHeartbeatListener = new TestingHeartbeatListenerBuilder<>().setNotifyTargetUnreachableConsumer(unreachableTargetFuture::complete).createNewTestingHeartbeatListener();
final HeartbeatManager<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, 3, ResourceID.generate(), testingHeartbeatListener, TestingUtils.defaultScheduledExecutor(), LOG);
try {
heartbeatManager.monitorTarget(someTargetId, someHeartbeatTarget);
for (int i = 0; i < heartbeatResponses.size(); i++) {
heartbeatManager.requestHeartbeat(someTargetId, null);
}
assertThat(unreachableTargetFuture, FlinkMatchers.willNotComplete(willNotCompleteWithin));
} finally {
heartbeatManager.stop();
}
}
use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class ResourceManagerTest method testJobMasterBecomesUnreachableTriggersDisconnect.
@Test
public void testJobMasterBecomesUnreachableTriggersDisconnect() throws Exception {
final JobID jobId = new JobID();
final ResourceID jobMasterResourceId = ResourceID.generate();
final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).setResourceManagerHeartbeatFunction(resourceId -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "task executor is unreachable"))).setDisconnectResourceManagerConsumer(disconnectFuture::complete).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
assertThat(requestedJobId, is(equalTo(jobId)));
return jobMasterLeaderRetrievalService;
});
runHeartbeatTargetBecomesUnreachableTest((ignore) -> {
}, resourceManagerGateway -> {
final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
}, resourceManagerResourceId -> assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId))));
}
use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class AkkaInvocationHandler method resolveTimeoutException.
static Throwable resolveTimeoutException(Throwable exception, @Nullable Throwable callStackCapture, String recipient, RpcInvocation rpcInvocation) {
if (!(exception instanceof akka.pattern.AskTimeoutException)) {
return exception;
}
final Exception newException;
if (AkkaRpcServiceUtils.isRecipientTerminatedException(exception)) {
newException = new RecipientUnreachableException("unknown", recipient, rpcInvocation.toString());
} else {
newException = new TimeoutException(String.format("Invocation of [%s] at recipient [%s] timed out. This is usually caused by: 1) Akka failed sending " + "the message silently, due to problems like oversized payload or serialization failures. " + "In that case, you should find detailed error information in the logs. 2) The recipient needs " + "more time for responding, due to problems like slow machines or network jitters. In that case, you can try to increase %s.", rpcInvocation, recipient, AkkaOptions.ASK_TIMEOUT_DURATION.key()));
}
newException.initCause(exception);
if (callStackCapture != null) {
// remove the stack frames coming from the proxy interface invocation
final StackTraceElement[] stackTrace = callStackCapture.getStackTrace();
newException.setStackTrace(Arrays.copyOfRange(stackTrace, 3, stackTrace.length));
}
return newException;
}
use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.
the class ResourceManagerTest method testTaskExecutorBecomesUnreachableTriggersDisconnect.
@Test
public void testTaskExecutorBecomesUnreachableTriggersDisconnect() throws Exception {
final ResourceID taskExecutorId = ResourceID.generate();
final CompletableFuture<Exception> disconnectFuture = new CompletableFuture<>();
final CompletableFuture<ResourceID> stopWorkerFuture = new CompletableFuture<>();
final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setAddress(UUID.randomUUID().toString()).setDisconnectResourceManagerConsumer(disconnectFuture::complete).setHeartbeatResourceManagerFunction(resourceId -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "task executor is unreachable"))).createTestingTaskExecutorGateway();
rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
runHeartbeatTargetBecomesUnreachableTest(builder -> builder.withStopWorkerFunction((worker) -> {
stopWorkerFuture.complete(worker);
return true;
}), resourceManagerGateway -> registerTaskExecutor(resourceManagerGateway, taskExecutorId, taskExecutorGateway.getAddress()), resourceManagerResourceId -> {
assertThat(disconnectFuture.get(), instanceOf(ResourceManagerException.class));
assertThat(stopWorkerFuture.get(), is(taskExecutorId));
});
}
Aggregations