Search in sources :

Example 16 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class DefaultJobLeaderIdServiceTest method jobTimeoutAfterLostLeadership.

/**
 * Tests that a timeout get cancelled once a job leader has been found. Furthermore, it tests
 * that a new timeout is registered after the jobmanager has lost leadership.
 */
@Test(timeout = 10000)
public void jobTimeoutAfterLostLeadership() throws Exception {
    final JobID jobId = new JobID();
    final String address = "foobar";
    final JobMasterId leaderId = JobMasterId.generate();
    TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
    SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(null, null);
    highAvailabilityServices.setJobMasterLeaderRetriever(jobId, leaderRetrievalService);
    ScheduledFuture<?> timeout1 = mock(ScheduledFuture.class);
    ScheduledFuture<?> timeout2 = mock(ScheduledFuture.class);
    final Queue<ScheduledFuture<?>> timeoutQueue = new ArrayDeque<>(Arrays.asList(timeout1, timeout2));
    ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    final AtomicReference<Runnable> lastRunnable = new AtomicReference<>();
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            lastRunnable.set((Runnable) invocation.getArguments()[0]);
            return timeoutQueue.poll();
        }
    }).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
    Time timeout = Time.milliseconds(5000L);
    JobLeaderIdActions jobLeaderIdActions = mock(JobLeaderIdActions.class);
    final AtomicReference<UUID> lastTimeoutId = new AtomicReference<>();
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            lastTimeoutId.set((UUID) invocation.getArguments()[1]);
            return null;
        }
    }).when(jobLeaderIdActions).notifyJobTimeout(eq(jobId), any(UUID.class));
    JobLeaderIdService jobLeaderIdService = new DefaultJobLeaderIdService(highAvailabilityServices, scheduledExecutor, timeout);
    jobLeaderIdService.start(jobLeaderIdActions);
    jobLeaderIdService.addJob(jobId);
    CompletableFuture<JobMasterId> leaderIdFuture = jobLeaderIdService.getLeaderId(jobId);
    // notify the leader id service about the new leader
    leaderRetrievalService.notifyListener(address, leaderId.toUUID());
    assertEquals(leaderId, leaderIdFuture.get());
    assertTrue(jobLeaderIdService.containsJob(jobId));
    // check that the first timeout got cancelled
    verify(timeout1, times(1)).cancel(anyBoolean());
    verify(scheduledExecutor, times(1)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
    // initial timeout runnable which should no longer have an effect
    Runnable runnable = lastRunnable.get();
    assertNotNull(runnable);
    runnable.run();
    verify(jobLeaderIdActions, times(1)).notifyJobTimeout(eq(jobId), any(UUID.class));
    // the timeout should no longer be valid
    assertFalse(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
    // lose leadership
    leaderRetrievalService.notifyListener("", null);
    verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class));
    // the second runnable should be the new timeout
    runnable = lastRunnable.get();
    assertNotNull(runnable);
    runnable.run();
    verify(jobLeaderIdActions, times(2)).notifyJobTimeout(eq(jobId), any(UUID.class));
    // the new timeout should be valid
    assertTrue(jobLeaderIdService.isValidTimeout(jobId, lastTimeoutId.get()));
}
Also used : Time(org.apache.flink.api.common.time.Time) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TimeUnit(java.util.concurrent.TimeUnit) UUID(java.util.UUID) AtomicReference(java.util.concurrent.atomic.AtomicReference) ScheduledFuture(java.util.concurrent.ScheduledFuture) ArrayDeque(java.util.ArrayDeque) Answer(org.mockito.stubbing.Answer) Mockito.doAnswer(org.mockito.Mockito.doAnswer) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) InvocationOnMock(org.mockito.invocation.InvocationOnMock) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 17 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class LeaderGatewayRetrieverTest method testGatewayRetrievalFailures.

/**
 * Tests that the gateway retrieval is retried in case of a failure.
 */
@Test
public void testGatewayRetrievalFailures() throws Exception {
    final String address = "localhost";
    final UUID leaderId = UUID.randomUUID();
    RpcGateway rpcGateway = mock(RpcGateway.class);
    TestingLeaderGatewayRetriever leaderGatewayRetriever = new TestingLeaderGatewayRetriever(rpcGateway);
    SettableLeaderRetrievalService settableLeaderRetrievalService = new SettableLeaderRetrievalService();
    settableLeaderRetrievalService.start(leaderGatewayRetriever);
    CompletableFuture<RpcGateway> gatewayFuture = leaderGatewayRetriever.getFuture();
    // this triggers the first gateway retrieval attempt
    settableLeaderRetrievalService.notifyListener(address, leaderId);
    // check that the first future has been failed
    try {
        gatewayFuture.get();
        fail("The first future should have been failed.");
    } catch (ExecutionException ignored) {
    // that's what we expect
    }
    // the second attempt should fail as well
    assertFalse((leaderGatewayRetriever.getNow().isPresent()));
    // the third attempt should succeed
    assertEquals(rpcGateway, leaderGatewayRetriever.getNow().get());
}
Also used : RpcGateway(org.apache.flink.runtime.rpc.RpcGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) UUID(java.util.UUID) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 18 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class AbstractDispatcherTest method setUp.

@Before
public void setUp() throws Exception {
    heartbeatServices = new HeartbeatServices(1000L, 10000L);
    haServices = new TestingHighAvailabilityServices();
    haServices.setCheckpointRecoveryFactory(new StandaloneCheckpointRecoveryFactory());
    haServices.setResourceManagerLeaderRetriever(new SettableLeaderRetrievalService());
    haServices.setJobGraphStore(new StandaloneJobGraphStore());
    haServices.setJobResultStore(new EmbeddedJobResultStore());
    configuration = new Configuration();
    blobServer = new BlobServer(configuration, temporaryFolder.newFolder(), new VoidBlobStore());
}
Also used : HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) StandaloneJobGraphStore(org.apache.flink.runtime.jobmanager.StandaloneJobGraphStore) VoidBlobStore(org.apache.flink.runtime.blob.VoidBlobStore) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) Configuration(org.apache.flink.configuration.Configuration) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) BlobServer(org.apache.flink.runtime.blob.BlobServer) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) Before(org.junit.Before)

Example 19 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class JobMasterTest method setup.

@Before
public void setup() throws IOException {
    configuration = new Configuration();
    haServices = new TestingHighAvailabilityServices();
    jobMasterId = JobMasterId.generate();
    jmResourceId = ResourceID.generate();
    testingFatalErrorHandler = new TestingFatalErrorHandler();
    haServices.setCheckpointRecoveryFactory(new StandaloneCheckpointRecoveryFactory());
    rmLeaderRetrievalService = new SettableLeaderRetrievalService(null, null);
    haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
    configuration.setString(BlobServerOptions.STORAGE_DIRECTORY, temporaryFolder.newFolder().getAbsolutePath());
}
Also used : TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) StandaloneCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory) Configuration(org.apache.flink.configuration.Configuration) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) Before(org.junit.Before)

Example 20 with SettableLeaderRetrievalService

use of org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService in project flink by apache.

the class DefaultJobLeaderServiceTest method handlesConcurrentJobAdditionsAndLeaderChanges.

/**
 * Tests that we can concurrently modify the JobLeaderService and complete the leader retrieval
 * operation. See FLINK-16373.
 */
@Test
public void handlesConcurrentJobAdditionsAndLeaderChanges() throws Exception {
    final JobLeaderService jobLeaderService = new DefaultJobLeaderService(new LocalUnresolvedTaskManagerLocation(), RetryingRegistrationConfiguration.defaultConfiguration());
    final TestingJobLeaderListener jobLeaderListener = new TestingJobLeaderListener();
    final int numberOperations = 20;
    final BlockingQueue<SettableLeaderRetrievalService> instantiatedLeaderRetrievalServices = new ArrayBlockingQueue<>(numberOperations);
    final HighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(leaderForJobId -> {
        final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
        instantiatedLeaderRetrievalServices.offer(leaderRetrievalService);
        return leaderRetrievalService;
    }).build();
    jobLeaderService.start("foobar", rpcServiceResource.getTestingRpcService(), haServices, jobLeaderListener);
    final CheckedThread addJobAction = new CheckedThread() {

        @Override
        public void go() throws Exception {
            for (int i = 0; i < numberOperations; i++) {
                final JobID jobId = JobID.generate();
                jobLeaderService.addJob(jobId, "foobar");
                Thread.yield();
                jobLeaderService.removeJob(jobId);
            }
        }
    };
    addJobAction.start();
    for (int i = 0; i < numberOperations; i++) {
        final SettableLeaderRetrievalService leaderRetrievalService = instantiatedLeaderRetrievalServices.take();
        leaderRetrievalService.notifyListener("foobar", UUID.randomUUID());
    }
    addJobAction.sync();
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) Assert.assertThat(org.junit.Assert.assertThat) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) CheckedThread(org.apache.flink.core.testutils.CheckedThread) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

SettableLeaderRetrievalService (org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService)23 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)17 JobID (org.apache.flink.api.common.JobID)16 Test (org.junit.Test)16 UUID (java.util.UUID)10 TestingJobMasterGatewayBuilder (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder)10 CompletableFuture (java.util.concurrent.CompletableFuture)9 TestingJobMasterGateway (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway)9 JobMasterId (org.apache.flink.runtime.jobmaster.JobMasterId)8 Time (org.apache.flink.api.common.time.Time)7 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)6 BlockingQueue (java.util.concurrent.BlockingQueue)6 TimeUnit (java.util.concurrent.TimeUnit)6 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)6 LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)6 TestLogger (org.apache.flink.util.TestLogger)6 ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)6 Before (org.junit.Before)6 Configuration (org.apache.flink.configuration.Configuration)5 ScheduledExecutor (org.apache.flink.util.concurrent.ScheduledExecutor)5