Search in sources :

Example 1 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class ResourceManagerTest method testJobMasterBecomesUnreachableTriggersDisconnect.

@Test
public void testJobMasterBecomesUnreachableTriggersDisconnect() throws Exception {
    final JobID jobId = new JobID();
    final ResourceID jobMasterResourceId = ResourceID.generate();
    final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).setResourceManagerHeartbeatFunction(resourceId -> FutureUtils.completedExceptionally(new RecipientUnreachableException("sender", "recipient", "task executor is unreachable"))).setDisconnectResourceManagerConsumer(disconnectFuture::complete).build();
    rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
    highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
        assertThat(requestedJobId, is(equalTo(jobId)));
        return jobMasterLeaderRetrievalService;
    });
    runHeartbeatTargetBecomesUnreachableTest((ignore) -> {
    }, resourceManagerGateway -> {
        final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
        assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
    }, resourceManagerResourceId -> assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId))));
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class ResourceManagerTest method testDisconnectJobManager.

private void testDisconnectJobManager(JobStatus jobStatus) throws Exception {
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setAddress(UUID.randomUUID().toString()).build();
    rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final OneShotLatch jobAdded = new OneShotLatch();
    final OneShotLatch jobRemoved = new OneShotLatch();
    final JobLeaderIdService jobLeaderIdService = TestingJobLeaderIdService.newBuilder().setAddJobConsumer(ignored -> jobAdded.trigger()).setRemoveJobConsumer(ignored -> jobRemoved.trigger()).build();
    resourceManager = new ResourceManagerBuilder().withJobLeaderIdService(jobLeaderIdService).buildAndStart();
    highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()));
    final JobID jobId = JobID.generate();
    final ResourceManagerGateway resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class);
    resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), ResourceID.generate(), jobMasterGateway.getAddress(), jobId, TIMEOUT);
    jobAdded.await();
    resourceManagerGateway.disconnectJobManager(jobId, jobStatus, new FlinkException("Test exception"));
    if (jobStatus.isGloballyTerminalState()) {
        jobRemoved.await();
    } else {
        // job should not get removed
        try {
            jobRemoved.await(10L, TimeUnit.MILLISECONDS);
            fail("We should not have removed the job.");
        } catch (TimeoutException expected) {
        }
    }
}
Also used : RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TimeoutException(java.util.concurrent.TimeoutException) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) After(org.junit.After) Matchers.nullValue(org.hamcrest.Matchers.nullValue) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) AfterClass(org.junit.AfterClass) UUID(java.util.UUID) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) TestingUtils(org.apache.flink.testutils.TestingUtils) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.is(org.hamcrest.Matchers.is) Matchers.anyOf(org.hamcrest.Matchers.anyOf) Time(org.apache.flink.api.common.time.Time) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) BeforeClass(org.junit.BeforeClass) TaskExecutorMemoryConfiguration(org.apache.flink.runtime.taskexecutor.TaskExecutorMemoryConfiguration) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) Function(java.util.function.Function) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) DeclarativeSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.DeclarativeSlotManagerBuilder) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) ResourceManagerException(org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException) NoOpResourceManagerPartitionTracker(org.apache.flink.runtime.io.network.partition.NoOpResourceManagerPartitionTracker) SlotManager(org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) ResourceRequirements(org.apache.flink.runtime.slots.ResourceRequirements) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Before(org.junit.Before) Matchers.empty(org.hamcrest.Matchers.empty) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) HardwareDescription(org.apache.flink.runtime.instance.HardwareDescription) TaskManagerInfo(org.apache.flink.runtime.rest.messages.taskmanager.TaskManagerInfo) Test(org.junit.Test) TaskExecutorThreadInfoGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorThreadInfoGateway) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) TestingSlotManagerBuilder(org.apache.flink.runtime.resourcemanager.slotmanager.TestingSlotManagerBuilder) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) RecipientUnreachableException(org.apache.flink.runtime.rpc.exceptions.RecipientUnreachableException) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobID(org.apache.flink.api.common.JobID) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException)

Example 3 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class ResourceManagerTest method testHeartbeatTimeoutWithJobMaster.

@Test
public void testHeartbeatTimeoutWithJobMaster() throws Exception {
    final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>();
    final CompletableFuture<ResourceManagerId> disconnectFuture = new CompletableFuture<>();
    final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setResourceManagerHeartbeatFunction(resourceId -> {
        heartbeatRequestFuture.complete(resourceId);
        return FutureUtils.completedVoidFuture();
    }).setDisconnectResourceManagerConsumer(disconnectFuture::complete).build();
    rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
    final JobID jobId = new JobID();
    final ResourceID jobMasterResourceId = ResourceID.generate();
    final LeaderRetrievalService jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
    highAvailabilityServices.setJobMasterLeaderRetrieverFunction(requestedJobId -> {
        assertThat(requestedJobId, is(equalTo(jobId)));
        return jobMasterLeaderRetrievalService;
    });
    runHeartbeatTimeoutTest((ignore) -> {
    }, resourceManagerGateway -> {
        final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobMaster(jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), jobId, TIMEOUT);
        assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class));
    }, resourceManagerResourceId -> {
        // might have been completed or not depending whether the timeout was triggered
        // first
        final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null);
        assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue())));
        assertThat(disconnectFuture.get(), is(equalTo(resourceManagerId)));
    });
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) RegistrationResponse(org.apache.flink.runtime.registration.RegistrationResponse) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class DefaultJobLeaderIdServiceTest method testLeaderFutureWaitsForValidLeader.

/**
 * Tests that the leaderId future is only completed once the service is notified about an actual
 * leader being elected. Specifically, it tests that the future is not completed if the
 * leadership was revoked without a new leader having been elected.
 */
@Test(timeout = 10000)
public void testLeaderFutureWaitsForValidLeader() throws Exception {
    final JobID jobId = new JobID();
    TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
    SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
    highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
    JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, new ManuallyTriggeredScheduledExecutor(), Time.milliseconds(5000L));
    jobLeaderIdService.start(new NoOpJobLeaderIdActions());
    jobLeaderIdService.addJob(jobId);
    // elect some leader
    leaderRetrievalService.notifyListener("foo", UUID.randomUUID());
    // notify about leadership loss
    leaderRetrievalService.notifyListener(null, null);
    final CompletableFuture<JobMasterId> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
    // there is currently no leader, so this should not be completed
    assertThat(leaderIdFuture.isDone(), is(false));
    // elect a new leader
    final UUID newLeaderId = UUID.randomUUID();
    leaderRetrievalService.notifyListener("foo", newLeaderId);
    assertThat(leaderIdFuture.get(), is(JobMasterId.fromUuidOrNull(newLeaderId)));
}
Also used : TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) Test(org.junit.Test)

Example 5 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class DefaultJobLeaderIdServiceTest method testIsStarted.

/**
 * Tests that whether the service has been started.
 */
@Test
public void testIsStarted() throws Exception {
    final JobID jobId = new JobID();
    TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
    SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
    highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
    ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    Time timeout = Time.milliseconds(5000L);
    JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
    DefaultJobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
    assertFalse(jobLeaderIdService.isStarted());
    jobLeaderIdService.start(jobLeaderIdActions);
    assertTrue(jobLeaderIdService.isStarted());
    jobLeaderIdService.stop();
    assertFalse(jobLeaderIdService.isStarted());
}
Also used : TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) Time(org.apache.flink.api.common.time.Time) JobID(org.apache.flink.api.common.JobID) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Test(org.junit.Test)

Aggregations

SettableLeaderRetrievalService (org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService)23 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)17 JobID (org.apache.flink.api.common.JobID)16 Test (org.junit.Test)16 UUID (java.util.UUID)10 TestingJobMasterGatewayBuilder (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder)10 CompletableFuture (java.util.concurrent.CompletableFuture)9 TestingJobMasterGateway (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway)9 JobMasterId (org.apache.flink.runtime.jobmaster.JobMasterId)8 Time (org.apache.flink.api.common.time.Time)7 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)6 BlockingQueue (java.util.concurrent.BlockingQueue)6 TimeUnit (java.util.concurrent.TimeUnit)6 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)6 LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)6 TestLogger (org.apache.flink.util.TestLogger)6 ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)6 Before (org.junit.Before)6 Configuration (org.apache.flink.configuration.Configuration)5 ScheduledExecutor (org.apache.flink.util.concurrent.ScheduledExecutor)5