Search in sources :

Example 1 with JobManagerRunner

use of org.apache.flink.runtime.jobmaster.JobManagerRunner in project flink by apache.

the class MiniClusterJobDispatcher method shutdown.

// ------------------------------------------------------------------------
//  life cycle
// ------------------------------------------------------------------------
/**
	 * Shuts down the mini cluster dispatcher. If a job is currently running, that job will be
	 * terminally failed.
	 */
public void shutdown() {
    synchronized (lock) {
        if (!shutdown) {
            shutdown = true;
            LOG.info("Shutting down the job dispatcher");
            // in this shutdown code we copy the references to the stack first,
            // to avoid concurrent modification
            JobManagerRunner[] runners = this.runners;
            if (runners != null) {
                this.runners = null;
                for (JobManagerRunner runner : runners) {
                    runner.shutdown();
                }
            }
        }
    }
}
Also used : JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner)

Example 2 with JobManagerRunner

use of org.apache.flink.runtime.jobmaster.JobManagerRunner in project flink by apache.

the class DispatcherCleanupITCase method testCleanupNotCancellable.

@Test
public void testCleanupNotCancellable() throws Exception {
    final JobGraph jobGraph = createJobGraph();
    final JobID jobId = jobGraph.getJobID();
    final JobResultStore jobResultStore = new EmbeddedJobResultStore();
    jobResultStore.createDirtyResult(new JobResultEntry(TestingJobResultStore.createSuccessfulJobResult(jobId)));
    haServices.setJobResultStore(jobResultStore);
    // Instantiates JobManagerRunner
    final CompletableFuture<Void> jobManagerRunnerCleanupFuture = new CompletableFuture<>();
    final AtomicReference<JobManagerRunner> jobManagerRunnerEntry = new AtomicReference<>();
    final JobManagerRunnerRegistry jobManagerRunnerRegistry = TestingJobManagerRunnerRegistry.newSingleJobBuilder(jobManagerRunnerEntry).withLocalCleanupAsyncFunction((actualJobId, executor) -> jobManagerRunnerCleanupFuture).build();
    final Dispatcher dispatcher = createTestingDispatcherBuilder().setJobManagerRunnerRegistry(jobManagerRunnerRegistry).build();
    dispatcher.start();
    toTerminate.add(dispatcher);
    CommonTestUtils.waitUntilCondition(() -> jobManagerRunnerEntry.get() != null, Deadline.fromNow(Duration.ofSeconds(10)), "JobManagerRunner wasn't loaded in time.");
    assertThat("The JobResultStore should have this job still marked as dirty.", haServices.getJobResultStore().hasDirtyJobResultEntry(jobId), CoreMatchers.is(true));
    final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
    try {
        dispatcherGateway.cancelJob(jobId, TIMEOUT).get();
        Assert.fail("Should fail because cancelling the cleanup is not allowed.");
    } catch (ExecutionException e) {
        assertThat(e, FlinkMatchers.containsCause(JobCancellationFailedException.class));
    }
    jobManagerRunnerCleanupFuture.complete(null);
    CommonTestUtils.waitUntilCondition(() -> haServices.getJobResultStore().hasCleanJobResultEntry(jobId), Deadline.fromNow(Duration.ofSeconds(60)), "The JobResultStore should have this job marked as clean now.");
}
Also used : CoreMatchers(org.hamcrest.CoreMatchers) Deadline(org.apache.flink.api.common.time.Deadline) RpcEndpoint(org.apache.flink.runtime.rpc.RpcEndpoint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) IsEqual.equalTo(org.hamcrest.core.IsEqual.equalTo) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ExceptionUtils(org.apache.flink.util.ExceptionUtils) IsEmptyCollection(org.hamcrest.collection.IsEmptyCollection) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Duration(java.time.Duration) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) DispatcherResourceCleanerFactory(org.apache.flink.runtime.dispatcher.cleanup.DispatcherResourceCleanerFactory) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) TimeUtils(org.apache.flink.util.TimeUtils) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) Optional(java.util.Optional) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) FlinkMatchers(org.apache.flink.core.testutils.FlinkMatchers) TestingJobGraphStore(org.apache.flink.runtime.testutils.TestingJobGraphStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) JobResult(org.apache.flink.runtime.jobmaster.JobResult) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionException(java.util.concurrent.ExecutionException) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingRetryStrategies(org.apache.flink.runtime.dispatcher.cleanup.TestingRetryStrategies) ForkJoinPool(java.util.concurrent.ForkJoinPool) EmbeddedCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.EmbeddedCompletedCheckpointStore) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Assert(org.junit.Assert) Collections(java.util.Collections) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) AtomicReference(java.util.concurrent.atomic.AtomicReference) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CompletableFuture(java.util.concurrent.CompletableFuture) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) ExecutionException(java.util.concurrent.ExecutionException) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 3 with JobManagerRunner

use of org.apache.flink.runtime.jobmaster.JobManagerRunner in project flink by apache.

the class DispatcherCleanupITCase method setUp.

@Before
public void setUp() throws Exception {
    super.setUp();
    haServices.setCheckpointRecoveryFactory(new PerJobCheckpointRecoveryFactory<EmbeddedCompletedCheckpointStore>((maxCheckpoints, previous, sharedStateRegistryFactory, ioExecutor) -> {
        if (previous != null) {
            // First job cleanup still succeeded for the
            // CompletedCheckpointStore because the JobGraph cleanup happens
            // after the JobManagerRunner closing
            assertTrue(previous.getShutdownStatus().isPresent());
            assertTrue(previous.getAllCheckpoints().isEmpty());
            return new EmbeddedCompletedCheckpointStore(maxCheckpoints, previous.getAllCheckpoints(), sharedStateRegistryFactory.create(ioExecutor, previous.getAllCheckpoints()));
        }
        return new EmbeddedCompletedCheckpointStore(maxCheckpoints, Collections.emptyList(), sharedStateRegistryFactory.create(ioExecutor, Collections.emptyList()));
    }));
}
Also used : CoreMatchers(org.hamcrest.CoreMatchers) Deadline(org.apache.flink.api.common.time.Deadline) RpcEndpoint(org.apache.flink.runtime.rpc.RpcEndpoint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) IsEqual.equalTo(org.hamcrest.core.IsEqual.equalTo) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ExceptionUtils(org.apache.flink.util.ExceptionUtils) IsEmptyCollection(org.hamcrest.collection.IsEmptyCollection) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Duration(java.time.Duration) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) DispatcherResourceCleanerFactory(org.apache.flink.runtime.dispatcher.cleanup.DispatcherResourceCleanerFactory) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) TimeUtils(org.apache.flink.util.TimeUtils) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) Optional(java.util.Optional) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) FlinkMatchers(org.apache.flink.core.testutils.FlinkMatchers) TestingJobGraphStore(org.apache.flink.runtime.testutils.TestingJobGraphStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) JobResult(org.apache.flink.runtime.jobmaster.JobResult) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionException(java.util.concurrent.ExecutionException) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingRetryStrategies(org.apache.flink.runtime.dispatcher.cleanup.TestingRetryStrategies) ForkJoinPool(java.util.concurrent.ForkJoinPool) EmbeddedCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.EmbeddedCompletedCheckpointStore) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Assert(org.junit.Assert) Collections(java.util.Collections) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) EmbeddedCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.EmbeddedCompletedCheckpointStore) Before(org.junit.Before)

Example 4 with JobManagerRunner

use of org.apache.flink.runtime.jobmaster.JobManagerRunner in project flink by apache.

the class DispatcherResourceCleanupTest method testGlobalCleanupWhenJobFinishedWhileClosingDispatcher.

@Test
public void testGlobalCleanupWhenJobFinishedWhileClosingDispatcher() throws Exception {
    final TestingJobManagerRunner testingJobManagerRunner = TestingJobManagerRunner.newBuilder().setBlockingTermination(true).setJobId(jobId).build();
    final Queue<JobManagerRunner> jobManagerRunners = new ArrayDeque<>(Arrays.asList(testingJobManagerRunner));
    startDispatcher(new QueueJobManagerRunnerFactory(jobManagerRunners));
    submitJobAndWait();
    final CompletableFuture<Void> dispatcherTerminationFuture = dispatcher.closeAsync();
    testingJobManagerRunner.getCloseAsyncCalledLatch().await();
    testingJobManagerRunner.completeResultFuture(new ExecutionGraphInfo(new ArchivedExecutionGraphBuilder().setJobID(jobId).setState(JobStatus.FINISHED).build()));
    testingJobManagerRunner.completeTerminationFuture();
    // check that no exceptions have been thrown
    dispatcherTerminationFuture.get();
    assertGlobalCleanupTriggered(jobId);
}
Also used : ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) ArrayDeque(java.util.ArrayDeque) Test(org.junit.Test)

Example 5 with JobManagerRunner

use of org.apache.flink.runtime.jobmaster.JobManagerRunner in project flink by apache.

the class Dispatcher method terminateJob.

private void terminateJob(JobID jobId) {
    if (jobManagerRunnerRegistry.isRegistered(jobId)) {
        final JobManagerRunner jobManagerRunner = jobManagerRunnerRegistry.get(jobId);
        jobManagerRunner.closeAsync();
    }
}
Also used : JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner)

Aggregations

JobManagerRunner (org.apache.flink.runtime.jobmaster.JobManagerRunner)11 Optional (java.util.Optional)4 CompletableFuture (java.util.concurrent.CompletableFuture)4 JobID (org.apache.flink.api.common.JobID)4 JobStatus (org.apache.flink.api.common.JobStatus)4 Collections (java.util.Collections)3 List (java.util.List)3 Collectors (java.util.stream.Collectors)3 Test (org.junit.Test)3 Duration (java.time.Duration)2 ArrayList (java.util.ArrayList)2 UUID (java.util.UUID)2 BlockingQueue (java.util.concurrent.BlockingQueue)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 ExecutionException (java.util.concurrent.ExecutionException)2 ForkJoinPool (java.util.concurrent.ForkJoinPool)2 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 Nonnull (javax.annotation.Nonnull)2