Search in sources :

Example 1 with RecipientUnreachableException

use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.

the class HeartbeatManagerTest method testHeartbeatManagerMarksTargetUnreachableOnRecipientUnreachableException.

@Test
public void testHeartbeatManagerMarksTargetUnreachableOnRecipientUnreachableException() {
    final long heartbeatTimeout = 10000L;
    final ResourceID someTargetId = ResourceID.generate();
    final HeartbeatTarget<Object> someHeartbeatTarget = new TestingHeartbeatTargetBuilder<>().setReceiveHeartbeatFunction((ignoredA, ignoredB) -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "could not receive heartbeat"))).createTestingHeartbeatTarget();
    final CompletableFuture<ResourceID> unreachableTargetFuture = new CompletableFuture<>();
    final HeartbeatListener<Object, Object> testingHeartbeatListener = new TestingHeartbeatListenerBuilder<>().setNotifyTargetUnreachableConsumer(unreachableTargetFuture::complete).createNewTestingHeartbeatListener();
    final int failedRpcRequestsUntilUnreachable = 5;
    final HeartbeatManager<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, failedRpcRequestsUntilUnreachable, ResourceID.generate(), testingHeartbeatListener, TestingUtils.defaultScheduledExecutor(), LOG);
    try {
        heartbeatManager.monitorTarget(someTargetId, someHeartbeatTarget);
        for (int i = 0; i < failedRpcRequestsUntilUnreachable - 1; i++) {
            heartbeatManager.requestHeartbeat(someTargetId, null);
            assertThat(unreachableTargetFuture, FlinkMatchers.willNotComplete(willNotCompleteWithin));
        }
        heartbeatManager.requestHeartbeat(someTargetId, null);
        // the target should be unreachable now
        unreachableTargetFuture.join();
    } finally {
        heartbeatManager.stop();
    }
}
Also used : OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) ScheduledFuture(java.util.concurrent.ScheduledFuture) FlinkMatchers(org.apache.flink.core.testutils.FlinkMatchers) LoggerFactory(org.slf4j.LoggerFactory) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) TimeoutException(java.util.concurrent.TimeoutException) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Assert.assertThat(org.junit.Assert.assertThat) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Duration(java.time.Duration) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) Matchers.hasSize(org.hamcrest.Matchers.hasSize) Assert.fail(org.junit.Assert.fail) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Logger(org.slf4j.Logger) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) TimeUnit(java.util.concurrent.TimeUnit) TestingUtils(org.apache.flink.testutils.TestingUtils) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Matcher(org.hamcrest.Matcher) Matchers.is(org.hamcrest.Matchers.is) Queue(java.util.Queue) ArrayDeque(java.util.ArrayDeque) Assert.assertEquals(org.junit.Assert.assertEquals) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) Test(org.junit.Test)

Example 2 with RecipientUnreachableException

use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.

the class HeartbeatManagerTest method testHeartbeatManagerResetsFailedRpcCountOnSuccessfulRpc.

@Test
public void testHeartbeatManagerResetsFailedRpcCountOnSuccessfulRpc() throws Exception {
    final long heartbeatTimeout = 10000L;
    final ResourceID someTargetId = ResourceID.generate();
    final RecipientUnreachableException unreachableException = new RecipientUnreachableException("sender", "recipient", "could not receive heartbeat");
    final Queue<CompletableFuture<Void>> heartbeatResponses = new ArrayDeque<>(Arrays.asList(FutureUtils.completedExceptionally(unreachableException), FutureUtils.completedExceptionally(unreachableException), CompletableFuture.completedFuture(null), FutureUtils.completedExceptionally(unreachableException)));
    final HeartbeatTarget<Object> someHeartbeatTarget = new TestingHeartbeatTargetBuilder<>().setReceiveHeartbeatFunction((ignoredA, ignoredB) -> heartbeatResponses.poll()).createTestingHeartbeatTarget();
    final CompletableFuture<ResourceID> unreachableTargetFuture = new CompletableFuture<>();
    final HeartbeatListener<Object, Object> testingHeartbeatListener = new TestingHeartbeatListenerBuilder<>().setNotifyTargetUnreachableConsumer(unreachableTargetFuture::complete).createNewTestingHeartbeatListener();
    final HeartbeatManager<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>(heartbeatTimeout, 3, ResourceID.generate(), testingHeartbeatListener, TestingUtils.defaultScheduledExecutor(), LOG);
    try {
        heartbeatManager.monitorTarget(someTargetId, someHeartbeatTarget);
        for (int i = 0; i < heartbeatResponses.size(); i++) {
            heartbeatManager.requestHeartbeat(someTargetId, null);
        }
        assertThat(unreachableTargetFuture, FlinkMatchers.willNotComplete(willNotCompleteWithin));
    } finally {
        heartbeatManager.stop();
    }
}
Also used : OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Arrays(java.util.Arrays) ScheduledFuture(java.util.concurrent.ScheduledFuture) FlinkMatchers(org.apache.flink.core.testutils.FlinkMatchers) LoggerFactory(org.slf4j.LoggerFactory) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) TimeoutException(java.util.concurrent.TimeoutException) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Assert.assertThat(org.junit.Assert.assertThat) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Duration(java.time.Duration) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) Matchers.hasSize(org.hamcrest.Matchers.hasSize) Assert.fail(org.junit.Assert.fail) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Logger(org.slf4j.Logger) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) TimeUnit(java.util.concurrent.TimeUnit) TestingUtils(org.apache.flink.testutils.TestingUtils) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Matcher(org.hamcrest.Matcher) Matchers.is(org.hamcrest.Matchers.is) Queue(java.util.Queue) ArrayDeque(java.util.ArrayDeque) Assert.assertEquals(org.junit.Assert.assertEquals) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) ArrayDeque(java.util.ArrayDeque) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) Test(org.junit.Test)

Example 3 with RecipientUnreachableException

use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.

the class ResourceManagerTest method testJobMasterBecomesUnreachableTriggersDisconnect.

@Test
public void testJobMasterBecomesUnreachableTriggersDisconnect() throws Exception {
    final JobID jobId = new JobID();
    final ResourceID jobMasterResourceId = ResourceID.generate();
    final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).setResourceManagerHeartbeatFunction(resourceId -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "task executor is unreachable"))).setDisconnectResourceManagerConsumer(disconnectFuture::complete).build();
    rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
    highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
        assertThat(requestedJobId, is(equalTo(jobId)));
        return jobMasterLeaderRetrievalService;
    });
    runHeartbeatTargetBecomesUnreachableTest((ignore) -> {
    }, resourceManagerGateway -> {
        final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
        assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
    }, resourceManagerResourceId -> assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId))));
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with RecipientUnreachableException

use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.

the class AkkaInvocationHandler method resolveTimeoutException.

static Throwable resolveTimeoutException(Throwable exception, @Nullable Throwable callStackCapture, String recipient, RpcInvocation rpcInvocation) {
    if (!(exception instanceof akka.pattern.AskTimeoutException)) {
        return exception;
    }
    final Exception newException;
    if (AkkaRpcServiceUtils.isRecipientTerminatedException(exception)) {
        newException = new RecipientUnreachableException("unknown", recipient, rpcInvocation.toString());
    } else {
        newException = new TimeoutException(String.format("Invocation of [%s] at recipient [%s] timed out. This is usually caused by: 1) Akka failed sending " + "the message silently, due to problems like oversized payload or serialization failures. " + "In that case, you should find detailed error information in the logs. 2) The recipient needs " + "more time for responding, due to problems like slow machines or network jitters. In that case, you can try to increase %s.", rpcInvocation, recipient, AkkaOptions.ASK_TIMEOUT_DURATION.key()));
    }
    newException.initCause(exception);
    if (callStackCapture != null) {
        // remove the stack frames coming from the proxy interface invocation
        final StackTraceElement[] stackTrace = callStackCapture.getStackTrace();
        newException.setStackTrace(Arrays.copyOfRange(stackTrace, 3, stackTrace.length));
    }
    return newException;
}
Also used : RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) TimeoutException(java.util.concurrent.TimeoutException) RpcException(org.apache.flink.runtime.rpc.exceptions.RpcException) IOException(java.io.IOException) CompletionException(java.util.concurrent.CompletionException) ExecutionException(java.util.concurrent.ExecutionException) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) TimeoutException(java.util.concurrent.TimeoutException)

Example 5 with RecipientUnreachableException

use of org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException in project flink by apache.

the class ResourceManagerTest method testTaskExecutorBecomesUnreachableTriggersDisconnect.

@Test
public void testTaskExecutorBecomesUnreachableTriggersDisconnect() throws Exception {
    final ResourceID taskExecutorId = ResourceID.generate();
    final CompletableFuture<Exception> disconnectFuture = new CompletableFuture<>();
    final CompletableFuture<ResourceID> stopWorkerFuture = new CompletableFuture<>();
    final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().setAddress(UUID.randomUUID().toString()).setDisconnectResourceManagerConsumer(disconnectFuture::complete).setHeartbeatResourceManagerFunction(resourceId -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "task executor is unreachable"))).createTestingTaskExecutorGateway();
    rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway);
    runHeartbeatTargetBecomesUnreachableTest(builder -> builder.withStopWorkerFunction((worker) -> {
        stopWorkerFuture.complete(worker);
        return true;
    }), resourceManagerGateway -> registerTaskExecutor(resourceManagerGateway, taskExecutorId, taskExecutorGateway.getAddress()), resourceManagerResourceId -> {
        assertThat(disconnectFuture.get(), instanceOf(ResourceManagerException.class));
        assertThat(stopWorkerFuture.get(), is(taskExecutorId));
    });
}
Also used : RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) After(org.junit.After) Matchers.nullValue(org.hamcrest.Matchers.nullValue) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) AfterClass(org.junit.AfterClass) UUID(java.util.UUID) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestingUtils(org.apache.flink.testutils.TestingUtils) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) Matchers.anyOf(org.hamcrest.Matchers.anyOf) Time(org.apache.flink.api.common.time.Time) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) BeforeClass(org.junit.BeforeClass) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) Function(java.util.function.Function) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) DeclarativeSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.DeclarativeSlotManagerBuilder) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) NoOpResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.NoOpResourceManagerPartitionTracker) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Before(org.junit.Before) Matchers.empty(org.hamcrest.Matchers.empty) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) Test(org.junit.Test) TaskExecutorThreadInfoGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorThreadInfoGateway) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) TestingSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.TestingSlotManagerBuilder) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) CompletableFuture(java.util.concurrent.CompletableFuture) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) TimeoutException(java.util.concurrent.TimeoutException) FlinkException(org.apache.flink.util.FlinkException) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) Test(org.junit.Test)

Aggregations

RecipientUnreachableException (org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException)9 CompletableFuture (java.util.concurrent.CompletableFuture)8 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)8 Test (org.junit.Test)8 TimeoutException (java.util.concurrent.TimeoutException)7 ArrayDeque (java.util.ArrayDeque)6 TimeUnit (java.util.concurrent.TimeUnit)6 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)6 TestLogger (org.apache.flink.util.TestLogger)6 FutureUtils (org.apache.flink.util.concurrent.FutureUtils)6 Matchers.is (org.hamcrest.Matchers.is)6 Assert.assertEquals (org.junit.Assert.assertEquals)6 Assert.fail (org.junit.Assert.fail)6 Arrays (java.util.Arrays)5 List (java.util.List)5 Queue (java.util.Queue)5 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)5 TestingUtils (org.apache.flink.testutils.TestingUtils)5 Matchers.hasSize (org.hamcrest.Matchers.hasSize)5 Assert.assertThat (org.junit.Assert.assertThat)5