Search in sources :

Example 6 with JobGraphStore

use of org.apache.flink.runtime.jobmanager.JobGraphStore in project flink by apache.

the class SessionDispatcherLeaderProcessTest method onRemovedJobGraph_terminatesRunningJob.

@Test
public void onRemovedJobGraph_terminatesRunningJob() throws Exception {
    jobGraphStore = TestingJobGraphStore.newBuilder().setInitialJobGraphs(Collections.singleton(JOB_GRAPH)).build();
    final CompletableFuture<JobID> terminateJobFuture = new CompletableFuture<>();
    final TestingDispatcherGatewayService testingDispatcherService = TestingDispatcherGatewayService.newBuilder().setOnRemovedJobGraphFunction(jobID -> {
        terminateJobFuture.complete(jobID);
        return FutureUtils.completedVoidFuture();
    }).build();
    dispatcherServiceFactory = createFactoryBasedOnGenericSupplier(() -> testingDispatcherService);
    final ExecutorService executorService = Executors.newSingleThreadExecutor();
    try (final SessionDispatcherLeaderProcess dispatcherLeaderProcess = createDispatcherLeaderProcess()) {
        dispatcherLeaderProcess.start();
        // wait for the dispatcher process to be created
        dispatcherLeaderProcess.getDispatcherGateway().get();
        // now remove the Job from the JobGraphStore and notify the dispatcher service
        jobGraphStore.globalCleanupAsync(JOB_GRAPH.getJobID(), executorService).join();
        dispatcherLeaderProcess.onRemovedJobGraph(JOB_GRAPH.getJobID());
        assertThat(terminateJobFuture.get()).isEqualTo(JOB_GRAPH.getJobID());
    } finally {
        assertThat(executorService.shutdownNow()).isEmpty();
    }
}
Also used : OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingJobGraphStore(org.apache.flink.runtime.testutils.TestingJobGraphStore) TimeoutException(java.util.concurrent.TimeoutException) CompletableFuture(java.util.concurrent.CompletableFuture) Function(java.util.function.Function) Supplier(java.util.function.Supplier) AfterAll(org.junit.jupiter.api.AfterAll) JobResult(org.apache.flink.runtime.jobmaster.JobResult) TestLoggerExtension(org.apache.flink.util.TestLoggerExtension) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) ExtendWith(org.junit.jupiter.api.extension.ExtendWith) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) BeforeAll(org.junit.jupiter.api.BeforeAll) FlinkAssertions(org.apache.flink.core.testutils.FlinkAssertions) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) FlinkAssertions.anyCauseMatches(org.apache.flink.core.testutils.FlinkAssertions.anyCauseMatches) ExecutorService(java.util.concurrent.ExecutorService) Collection(java.util.Collection) Set(java.util.Set) UUID(java.util.UUID) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) Executors(java.util.concurrent.Executors) ExecutorUtils(org.apache.flink.util.ExecutorUtils) Test(org.junit.jupiter.api.Test) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) AfterEach(org.junit.jupiter.api.AfterEach) JobID(org.apache.flink.api.common.JobID) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) TestingDispatcherGateway(org.apache.flink.runtime.webmonitor.TestingDispatcherGateway) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) STREAM_THROWABLE(org.apache.flink.core.testutils.FlinkAssertions.STREAM_THROWABLE) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) Collections(java.util.Collections) DuplicateJobSubmissionException(org.apache.flink.runtime.client.DuplicateJobSubmissionException) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutorService(java.util.concurrent.ExecutorService) JobID(org.apache.flink.api.common.JobID) Test(org.junit.jupiter.api.Test)

Example 7 with JobGraphStore

use of org.apache.flink.runtime.jobmanager.JobGraphStore in project flink by apache.

the class ZooKeeperDefaultDispatcherRunnerTest method testResourceCleanupUnderLeadershipChange.

/**
 * See FLINK-11665.
 */
@Test
public void testResourceCleanupUnderLeadershipChange() throws Exception {
    final TestingRpcService rpcService = testingRpcServiceResource.getTestingRpcService();
    final TestingLeaderElectionService dispatcherLeaderElectionService = new TestingLeaderElectionService();
    final CuratorFramework client = ZooKeeperUtils.startCuratorFramework(configuration, fatalErrorHandler).asCuratorFramework();
    try (final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder().setDispatcherLeaderElectionService(dispatcherLeaderElectionService).setJobMasterLeaderRetrieverFunction(jobId -> ZooKeeperUtils.createLeaderRetrievalService(client)).build()) {
        final PartialDispatcherServices partialDispatcherServices = new PartialDispatcherServices(configuration, highAvailabilityServices, CompletableFuture::new, blobServer, new TestingHeartbeatServices(), UnregisteredMetricGroups::createUnregisteredJobManagerMetricGroup, new MemoryExecutionGraphInfoStore(), fatalErrorHandler, VoidHistoryServerArchivist.INSTANCE, null, ForkJoinPool.commonPool(), new DispatcherOperationCaches());
        final DefaultDispatcherRunnerFactory defaultDispatcherRunnerFactory = DefaultDispatcherRunnerFactory.createSessionRunner(SessionDispatcherFactory.INSTANCE);
        try (final DispatcherRunner dispatcherRunner = createDispatcherRunner(rpcService, dispatcherLeaderElectionService, new JobPersistenceComponentFactory() {

            @Override
            public JobGraphStore createJobGraphStore() {
                return createZooKeeperJobGraphStore(client);
            }

            @Override
            public JobResultStore createJobResultStore() {
                return new EmbeddedJobResultStore();
            }
        }, partialDispatcherServices, defaultDispatcherRunnerFactory)) {
            // initial run
            DispatcherGateway dispatcherGateway = grantLeadership(dispatcherLeaderElectionService);
            final JobGraph jobGraph = createJobGraphWithBlobs();
            LOG.info("Initial job submission {}.", jobGraph.getJobID());
            dispatcherGateway.submitJob(jobGraph, TESTING_TIMEOUT).get();
            dispatcherLeaderElectionService.notLeader();
            // recovering submitted jobs
            LOG.info("Re-grant leadership first time.");
            dispatcherGateway = grantLeadership(dispatcherLeaderElectionService);
            LOG.info("Cancel recovered job {}.", jobGraph.getJobID());
            // cancellation of the job should remove everything
            final CompletableFuture<JobResult> jobResultFuture = dispatcherGateway.requestJobResult(jobGraph.getJobID(), TESTING_TIMEOUT);
            dispatcherGateway.cancelJob(jobGraph.getJobID(), TESTING_TIMEOUT).get();
            // a successful cancellation should eventually remove all job information
            final JobResult jobResult = jobResultFuture.get();
            assertThat(jobResult.getApplicationStatus(), is(ApplicationStatus.CANCELED));
            dispatcherLeaderElectionService.notLeader();
            // check that the job has been removed from ZooKeeper
            final JobGraphStore submittedJobGraphStore = createZooKeeperJobGraphStore(client);
            CommonTestUtils.waitUntilCondition(() -> submittedJobGraphStore.getJobIds().isEmpty(), Deadline.fromNow(VERIFICATION_TIMEOUT), 20L);
        }
    }
    // check resource clean up
    assertThat(clusterHaStorageDir.listFiles(), is(emptyArray()));
}
Also used : ZooKeeperUtils(org.apache.flink.runtime.util.ZooKeeperUtils) Deadline(org.apache.flink.api.common.time.Deadline) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) Matchers.emptyArray(org.hamcrest.Matchers.emptyArray) JobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.JobPersistenceComponentFactory) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Assert.assertThat(org.junit.Assert.assertThat) DispatcherOperationCaches(org.apache.flink.runtime.dispatcher.DispatcherOperationCaches) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) After(org.junit.After) Duration(java.time.Duration) TestLogger(org.apache.flink.util.TestLogger) ClassRule(org.junit.ClassRule) HighAvailabilityServicesUtils(org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils) UUID(java.util.UUID) TestingUtils(org.apache.flink.testutils.TestingUtils) Matchers.is(org.hamcrest.Matchers.is) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) Time(org.apache.flink.api.common.time.Time) DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) BlobServer(org.apache.flink.runtime.blob.BlobServer) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) LeaderConnectionInfo(org.apache.flink.runtime.util.LeaderConnectionInfo) CuratorFramework(org.apache.flink.shaded.curator5.org.apache.curator.framework.CuratorFramework) VoidHistoryServerArchivist(org.apache.flink.runtime.dispatcher.VoidHistoryServerArchivist) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) JobResult(org.apache.flink.runtime.jobmaster.JobResult) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) Before(org.junit.Before) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) Logger(org.slf4j.Logger) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) ForkJoinPool(java.util.concurrent.ForkJoinPool) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) SessionDispatcherFactory(org.apache.flink.runtime.dispatcher.SessionDispatcherFactory) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ZooKeeperResource(org.apache.flink.runtime.zookeeper.ZooKeeperResource) HighAvailabilityOptions(org.apache.flink.configuration.HighAvailabilityOptions) TemporaryFolder(org.junit.rules.TemporaryFolder) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) JobResult(org.apache.flink.runtime.jobmaster.JobResult) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) JobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.JobPersistenceComponentFactory) CuratorFramework(org.apache.flink.shaded.curator5.org.apache.curator.framework.CuratorFramework) CompletableFuture(java.util.concurrent.CompletableFuture) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) DispatcherOperationCaches(org.apache.flink.runtime.dispatcher.DispatcherOperationCaches) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) Test(org.junit.Test)

Aggregations

JobGraphStore (org.apache.flink.runtime.jobmanager.JobGraphStore)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)6 UUID (java.util.UUID)5 CompletableFuture (java.util.concurrent.CompletableFuture)5 JobID (org.apache.flink.api.common.JobID)5 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)5 JobResultStore (org.apache.flink.runtime.highavailability.JobResultStore)5 JobResult (org.apache.flink.runtime.jobmaster.JobResult)5 TestingJobGraphStore (org.apache.flink.runtime.testutils.TestingJobGraphStore)5 TestingJobResultStore (org.apache.flink.runtime.testutils.TestingJobResultStore)5 Collections (java.util.Collections)4 TestingLeaderElectionService (org.apache.flink.runtime.leaderelection.TestingLeaderElectionService)4 FutureUtils (org.apache.flink.util.concurrent.FutureUtils)4 Test (org.junit.Test)4 Duration (java.time.Duration)3 ForkJoinPool (java.util.concurrent.ForkJoinPool)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 Deadline (org.apache.flink.api.common.time.Deadline)3 DispatcherResourceCleanerFactory (org.apache.flink.runtime.dispatcher.cleanup.DispatcherResourceCleanerFactory)3 EmbeddedJobResultStore (org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore)3