Search in sources :

Example 6 with TestingHighAvailabilityServicesBuilder

use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder in project flink by apache.

the class ZooKeeperDefaultDispatcherRunnerTest method testResourceCleanupUnderLeadershipChange.

/**
 * See FLINK-11665.
 */
@Test
public void testResourceCleanupUnderLeadershipChange() throws Exception {
    final TestingRpcService rpcService = testingRpcServiceResource.getTestingRpcService();
    final TestingLeaderElectionService dispatcherLeaderElectionService = new TestingLeaderElectionService();
    final CuratorFramework client = ZooKeeperUtils.startCuratorFramework(configuration, fatalErrorHandler).asCuratorFramework();
    try (final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder().setDispatcherLeaderElectionService(dispatcherLeaderElectionService).setJobMasterLeaderRetrieverFunction(jobId -> ZooKeeperUtils.createLeaderRetrievalService(client)).build()) {
        final PartialDispatcherServices partialDispatcherServices = new PartialDispatcherServices(configuration, highAvailabilityServices, CompletableFuture::new, blobServer, new TestingHeartbeatServices(), UnregisteredMetricGroups::createUnregisteredJobManagerMetricGroup, new MemoryExecutionGraphInfoStore(), fatalErrorHandler, VoidHistoryServerArchivist.INSTANCE, null, ForkJoinPool.commonPool(), new DispatcherOperationCaches());
        final DefaultDispatcherRunnerFactory defaultDispatcherRunnerFactory = DefaultDispatcherRunnerFactory.createSessionRunner(SessionDispatcherFactory.INSTANCE);
        try (final DispatcherRunner dispatcherRunner = createDispatcherRunner(rpcService, dispatcherLeaderElectionService, new JobPersistenceComponentFactory() {

            @Override
            public JobGraphStore createJobGraphStore() {
                return createZooKeeperJobGraphStore(client);
            }

            @Override
            public JobResultStore createJobResultStore() {
                return new EmbeddedJobResultStore();
            }
        }, partialDispatcherServices, defaultDispatcherRunnerFactory)) {
            // initial run
            DispatcherGateway dispatcherGateway = grantLeadership(dispatcherLeaderElectionService);
            final JobGraph jobGraph = createJobGraphWithBlobs();
            LOG.info("Initial job submission {}.", jobGraph.getJobID());
            dispatcherGateway.submitJob(jobGraph, TESTING_TIMEOUT).get();
            dispatcherLeaderElectionService.notLeader();
            // recovering submitted jobs
            LOG.info("Re-grant leadership first time.");
            dispatcherGateway = grantLeadership(dispatcherLeaderElectionService);
            LOG.info("Cancel recovered job {}.", jobGraph.getJobID());
            // cancellation of the job should remove everything
            final CompletableFuture<JobResult> jobResultFuture = dispatcherGateway.requestJobResult(jobGraph.getJobID(), TESTING_TIMEOUT);
            dispatcherGateway.cancelJob(jobGraph.getJobID(), TESTING_TIMEOUT).get();
            // a successful cancellation should eventually remove all job information
            final JobResult jobResult = jobResultFuture.get();
            assertThat(jobResult.getApplicationStatus(), is(ApplicationStatus.CANCELED));
            dispatcherLeaderElectionService.notLeader();
            // check that the job has been removed from ZooKeeper
            final JobGraphStore submittedJobGraphStore = createZooKeeperJobGraphStore(client);
            CommonTestUtils.waitUntilCondition(() -> submittedJobGraphStore.getJobIds().isEmpty(), Deadline.fromNow(VERIFICATION_TIMEOUT), 20L);
        }
    }
    // check resource clean up
    assertThat(clusterHaStorageDir.listFiles(), is(emptyArray()));
}
Also used : ZooKeeperUtils(org.apache.flink.runtime.util.ZooKeeperUtils) Deadline(org.apache.flink.api.common.time.Deadline) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) Matchers.emptyArray(org.hamcrest.Matchers.emptyArray) JobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.JobPersistenceComponentFactory) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Assert.assertThat(org.junit.Assert.assertThat) DispatcherOperationCaches(org.apache.flink.runtime.dispatcher.DispatcherOperationCaches) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) After(org.junit.After) Duration(java.time.Duration) TestLogger(org.apache.flink.util.TestLogger) ClassRule(org.junit.ClassRule) HighAvailabilityServicesUtils(org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils) UUID(java.util.UUID) TestingUtils(org.apache.flink.testutils.TestingUtils) Matchers.is(org.hamcrest.Matchers.is) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) Time(org.apache.flink.api.common.time.Time) DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) BlobServer(org.apache.flink.runtime.blob.BlobServer) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) LeaderConnectionInfo(org.apache.flink.runtime.util.LeaderConnectionInfo) CuratorFramework(org.apache.flink.shaded.curator5.org.apache.curator.framework.CuratorFramework) VoidHistoryServerArchivist(org.apache.flink.runtime.dispatcher.VoidHistoryServerArchivist) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) JobResult(org.apache.flink.runtime.jobmaster.JobResult) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) Before(org.junit.Before) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) Logger(org.slf4j.Logger) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) ForkJoinPool(java.util.concurrent.ForkJoinPool) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) SessionDispatcherFactory(org.apache.flink.runtime.dispatcher.SessionDispatcherFactory) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ZooKeeperResource(org.apache.flink.runtime.zookeeper.ZooKeeperResource) HighAvailabilityOptions(org.apache.flink.configuration.HighAvailabilityOptions) TemporaryFolder(org.junit.rules.TemporaryFolder) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) JobResult(org.apache.flink.runtime.jobmaster.JobResult) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) JobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.JobPersistenceComponentFactory) CuratorFramework(org.apache.flink.shaded.curator5.org.apache.curator.framework.CuratorFramework) CompletableFuture(java.util.concurrent.CompletableFuture) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) DispatcherOperationCaches(org.apache.flink.runtime.dispatcher.DispatcherOperationCaches) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) Test(org.junit.Test)

Example 7 with TestingHighAvailabilityServicesBuilder

use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder in project flink by apache.

the class ClusterEntrypointTest method testClusterFinishedNormallyShouldDeregisterAppAndCleanupHAData.

@Test
public void testClusterFinishedNormallyShouldDeregisterAppAndCleanupHAData() throws Exception {
    final CompletableFuture<Void> deregisterFuture = new CompletableFuture<>();
    final CompletableFuture<Void> closeAndCleanupAllDataFuture = new CompletableFuture<>();
    final CompletableFuture<ApplicationStatus> dispatcherShutDownFuture = new CompletableFuture<>();
    final HighAvailabilityServices testingHaService = new TestingHighAvailabilityServicesBuilder().setCloseAndCleanupAllDataFuture(closeAndCleanupAllDataFuture).build();
    final TestingResourceManagerFactory testingResourceManagerFactory = new TestingResourceManagerFactory.Builder().setInternalDeregisterApplicationConsumer((ignored1, ignored2, ignore3) -> deregisterFuture.complete(null)).setInitializeConsumer((ignore) -> dispatcherShutDownFuture.complete(ApplicationStatus.SUCCEEDED)).build();
    final TestingDispatcherRunnerFactory testingDispatcherRunnerFactory = new TestingDispatcherRunnerFactory.Builder().setShutDownFuture(dispatcherShutDownFuture).build();
    final TestingEntryPoint testingEntryPoint = new TestingEntryPoint.Builder().setConfiguration(flinkConfig).setResourceManagerFactory(testingResourceManagerFactory).setDispatcherRunnerFactory(testingDispatcherRunnerFactory).setHighAvailabilityServices(testingHaService).build();
    final CompletableFuture<ApplicationStatus> appStatusFuture = startClusterEntrypoint(testingEntryPoint);
    assertThat(appStatusFuture.get(TIMEOUT_MS, TimeUnit.MILLISECONDS), is(ApplicationStatus.SUCCEEDED));
    assertThat(deregisterFuture.isDone(), is(true));
    assertThat(closeAndCleanupAllDataFuture.isDone(), is(true));
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) JobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.JobPersistenceComponentFactory) ClusterOptions(org.apache.flink.configuration.ClusterOptions) RpcSystemUtils(org.apache.flink.runtime.rpc.RpcSystemUtils) TestingResourceManagerFactory(org.apache.flink.runtime.resourcemanager.TestingResourceManagerFactory) TestingClusterEntrypointProcess(org.apache.flink.runtime.testutils.TestingClusterEntrypointProcess) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) ClassRule(org.junit.ClassRule) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) DispatcherRunner(org.apache.flink.runtime.dispatcher.runner.DispatcherRunner) UUID(java.util.UUID) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) Executors(java.util.concurrent.Executors) ResourceManagerFactory(org.apache.flink.runtime.resourcemanager.ResourceManagerFactory) Assert.assertFalse(org.junit.Assert.assertFalse) Assume.assumeTrue(org.junit.Assume.assumeTrue) TestJvmProcess(org.apache.flink.runtime.testutils.TestJvmProcess) SchedulerExecutionMode(org.apache.flink.configuration.SchedulerExecutionMode) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) LeaderElectionService(org.apache.flink.runtime.leaderelection.LeaderElectionService) CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherRunnerFactory(org.apache.flink.runtime.dispatcher.runner.DispatcherRunnerFactory) RpcService(org.apache.flink.runtime.rpc.RpcService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) ExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.ExecutionGraphInfoStore) DefaultDispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) Before(org.junit.Before) DispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponentFactory) TestExecutorResource(org.apache.flink.testutils.executor.TestExecutorResource) Executor(java.util.concurrent.Executor) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) TestingDispatcherRunner(org.apache.flink.runtime.dispatcher.runner.TestingDispatcherRunner) Configuration(org.apache.flink.configuration.Configuration) SessionRestEndpointFactory(org.apache.flink.runtime.rest.SessionRestEndpointFactory) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) SignalHandler(org.apache.flink.runtime.util.SignalHandler) OperatingSystem(org.apache.flink.util.OperatingSystem) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) StandaloneResourceManagerFactory(org.apache.flink.runtime.resourcemanager.StandaloneResourceManagerFactory) TemporaryFolder(org.junit.rules.TemporaryFolder) TestingResourceManagerFactory(org.apache.flink.runtime.resourcemanager.TestingResourceManagerFactory) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) Test(org.junit.Test)

Example 8 with TestingHighAvailabilityServicesBuilder

use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder in project flink by apache.

the class DefaultJobLeaderServiceTest method handlesConcurrentJobAdditionsAndLeaderChanges.

/**
 * Tests that we can concurrently modify the JobLeaderService and complete the leader retrieval
 * operation. See FLINK-16373.
 */
@Test
public void handlesConcurrentJobAdditionsAndLeaderChanges() throws Exception {
    final JobLeaderService jobLeaderService = new DefaultJobLeaderService(new LocalUnresolvedTaskManagerLocation(), RetryingRegistrationConfiguration.defaultConfiguration());
    final TestingJobLeaderListener jobLeaderListener = new TestingJobLeaderListener();
    final int numberOperations = 20;
    final BlockingQueue<SettableLeaderRetrievalService> instantiatedLeaderRetrievalServices = new ArrayBlockingQueue<>(numberOperations);
    final HighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(leaderForJobId -> {
        final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
        instantiatedLeaderRetrievalServices.offer(leaderRetrievalService);
        return leaderRetrievalService;
    }).build();
    jobLeaderService.start("foobar", rpcServiceResource.getTestingRpcService(), haServices, jobLeaderListener);
    final CheckedThread addJobAction = new CheckedThread() {

        @Override
        public void go() throws Exception {
            for (int i = 0; i < numberOperations; i++) {
                final JobID jobId = JobID.generate();
                jobLeaderService.addJob(jobId, "foobar");
                Thread.yield();
                jobLeaderService.removeJob(jobId);
            }
        }
    };
    addJobAction.start();
    for (int i = 0; i < numberOperations; i++) {
        final SettableLeaderRetrievalService leaderRetrievalService = instantiatedLeaderRetrievalServices.take();
        leaderRetrievalService.notifyListener("foobar", UUID.randomUUID());
    }
    addJobAction.sync();
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) Assert.assertThat(org.junit.Assert.assertThat) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) CheckedThread(org.apache.flink.core.testutils.CheckedThread) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 9 with TestingHighAvailabilityServicesBuilder

use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder in project flink by apache.

the class DefaultJobLeaderServiceTest method doesNotReconnectAfterTargetLostLeadership.

/**
 * Tests that the JobLeaderService won't try to reconnect to JobMaster after it has lost the
 * leadership. See FLINK-16836.
 */
@Test
public void doesNotReconnectAfterTargetLostLeadership() throws Exception {
    final JobID jobId = new JobID();
    final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService).build();
    final TestingJobMasterGateway jobMasterGateway = registerJobMaster();
    final OneShotLatch jobManagerGainedLeadership = new OneShotLatch();
    final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(ignored -> jobManagerGainedLeadership.trigger());
    final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener);
    try {
        jobLeaderService.addJob(jobId, jobMasterGateway.getAddress());
        leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), UUID.randomUUID());
        jobManagerGainedLeadership.await();
        // revoke the leadership
        leaderRetrievalService.notifyListener(null, null);
        testingJobLeaderListener.waitUntilJobManagerLostLeadership();
        jobLeaderService.reconnect(jobId);
    } finally {
        jobLeaderService.stop();
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) Assert.assertThat(org.junit.Assert.assertThat) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobID(org.apache.flink.api.common.JobID) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) Test(org.junit.Test)

Example 10 with TestingHighAvailabilityServicesBuilder

use of org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder in project flink by apache.

the class DefaultJobLeaderServiceTest method canReconnectToOldLeaderWithSameLeaderAddress.

/**
 * Tests that the JobLeaderService can reconnect to an old leader which seemed to have lost the
 * leadership in between. See FLINK-14316.
 */
@Test
public void canReconnectToOldLeaderWithSameLeaderAddress() throws Exception {
    final JobID jobId = new JobID();
    final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService();
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder().setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService).build();
    final TestingJobMasterGateway jobMasterGateway = registerJobMaster();
    final BlockingQueue<JobID> leadershipQueue = new ArrayBlockingQueue<>(1);
    final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(leadershipQueue::offer);
    final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener);
    try {
        jobLeaderService.addJob(jobId, jobMasterGateway.getAddress());
        final UUID leaderSessionId = UUID.randomUUID();
        leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), leaderSessionId);
        // wait for the first leadership
        assertThat(leadershipQueue.take(), is(jobId));
        // revoke the leadership
        leaderRetrievalService.notifyListener(null, null);
        testingJobLeaderListener.waitUntilJobManagerLostLeadership();
        leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), leaderSessionId);
        // check that we obtain the leadership a second time
        assertThat(leadershipQueue.take(), is(jobId));
    } finally {
        jobLeaderService.stop();
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) TimeoutException(java.util.concurrent.TimeoutException) JMTMRegistrationSuccess(org.apache.flink.runtime.jobmaster.JMTMRegistrationSuccess) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) Assert.assertThat(org.junit.Assert.assertThat) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) Assert.fail(org.junit.Assert.fail) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) RetryingRegistrationConfiguration(org.apache.flink.runtime.registration.RetryingRegistrationConfiguration) JMTMRegistrationRejection(org.apache.flink.runtime.jobmaster.JMTMRegistrationRejection) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) LocalUnresolvedTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Test(org.junit.Test) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) SettableLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) Test(org.junit.Test)

Aggregations

TestingHighAvailabilityServicesBuilder (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder)11 CompletableFuture (java.util.concurrent.CompletableFuture)9 Test (org.junit.Test)9 TestLogger (org.apache.flink.util.TestLogger)8 UUID (java.util.UUID)7 HighAvailabilityServices (org.apache.flink.runtime.highavailability.HighAvailabilityServices)7 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)7 TestingRpcServiceResource (org.apache.flink.runtime.rpc.TestingRpcServiceResource)7 Assert.assertThat (org.junit.Assert.assertThat)7 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)6 BlockingQueue (java.util.concurrent.BlockingQueue)6 TimeUnit (java.util.concurrent.TimeUnit)6 JobID (org.apache.flink.api.common.JobID)6 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)6 TestingJobMasterGateway (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway)6 TestingJobMasterGatewayBuilder (org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder)6 SettableLeaderRetrievalService (org.apache.flink.runtime.leaderretrieval.SettableLeaderRetrievalService)6 LocalUnresolvedTaskManagerLocation (org.apache.flink.runtime.taskmanager.LocalUnresolvedTaskManagerLocation)6 Rule (org.junit.Rule)6 CountDownLatch (java.util.concurrent.CountDownLatch)5