Search in sources :

Example 6 with JobResultStore

use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.

the class DispatcherCleanupITCase method testCleanupAfterLeadershipChange.

@Test
public void testCleanupAfterLeadershipChange() throws Exception {
    final JobGraph jobGraph = createJobGraph();
    final JobID jobId = jobGraph.getJobID();
    // Construct job graph store.
    final AtomicInteger actualGlobalCleanupCallCount = new AtomicInteger();
    final OneShotLatch successfulCleanupLatch = new OneShotLatch();
    final RuntimeException temporaryError = new RuntimeException("Unable to remove job graph.");
    final JobGraphStore jobGraphStore = createAndStartJobGraphStoreWithCleanupFailures(1, temporaryError, actualGlobalCleanupCallCount, successfulCleanupLatch);
    haServices.setJobGraphStore(jobGraphStore);
    // Construct leader election service.
    final TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService();
    haServices.setJobMasterLeaderElectionService(jobId, leaderElectionService);
    // start the dispatcher with no retries on cleanup
    final CountDownLatch jobGraphRemovalErrorReceived = new CountDownLatch(1);
    final Dispatcher dispatcher = createTestingDispatcherBuilder().setFatalErrorHandler(throwable -> {
        final Optional<Throwable> maybeError = ExceptionUtils.findThrowable(throwable, temporaryError::equals);
        if (maybeError.isPresent()) {
            jobGraphRemovalErrorReceived.countDown();
        } else {
            testingFatalErrorHandlerResource.getFatalErrorHandler().onFatalError(throwable);
        }
    }).build();
    dispatcher.start();
    toTerminate.add(dispatcher);
    leaderElectionService.isLeader(UUID.randomUUID());
    final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
    dispatcherGateway.submitJob(jobGraph, TIMEOUT).get();
    waitForJobToFinish(leaderElectionService, dispatcherGateway, jobId);
    jobGraphRemovalErrorReceived.await();
    // Remove job master leadership.
    leaderElectionService.notLeader();
    // This will clear internal state of election service, so a new contender can register.
    leaderElectionService.stop();
    assertThat(successfulCleanupLatch.isTriggered(), CoreMatchers.is(false));
    assertThat("The JobGraph is still stored in the JobGraphStore.", haServices.getJobGraphStore().getJobIds(), CoreMatchers.is(Collections.singleton(jobId)));
    assertThat("The JobResultStore has this job marked as dirty.", haServices.getJobResultStore().getDirtyResults().stream().map(JobResult::getJobId).collect(Collectors.toSet()), CoreMatchers.is(Collections.singleton(jobId)));
    // Run a second dispatcher, that restores our finished job.
    final Dispatcher secondDispatcher = createTestingDispatcherBuilder().setRecoveredDirtyJobs(haServices.getJobResultStore().getDirtyResults()).build();
    secondDispatcher.start();
    toTerminate.add(secondDispatcher);
    leaderElectionService.isLeader(UUID.randomUUID());
    CommonTestUtils.waitUntilCondition(() -> haServices.getJobResultStore().getDirtyResults().isEmpty(), Deadline.fromNow(TimeUtils.toDuration(TIMEOUT)));
    assertThat("The JobGraph is not stored in the JobGraphStore.", haServices.getJobGraphStore().getJobIds(), IsEmptyCollection.empty());
    assertTrue("The JobResultStore has the job listed as clean.", haServices.getJobResultStore().hasJobResultEntry(jobId));
    // wait for the successful cleanup to be triggered
    successfulCleanupLatch.await();
    assertThat(actualGlobalCleanupCallCount.get(), equalTo(2));
}
Also used : CoreMatchers(org.hamcrest.CoreMatchers) Deadline(org.apache.flink.api.common.time.Deadline) RpcEndpoint(org.apache.flink.runtime.rpc.RpcEndpoint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) IsEqual.equalTo(org.hamcrest.core.IsEqual.equalTo) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ExceptionUtils(org.apache.flink.util.ExceptionUtils) IsEmptyCollection(org.hamcrest.collection.IsEmptyCollection) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Duration(java.time.Duration) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) DispatcherResourceCleanerFactory(org.apache.flink.runtime.dispatcher.cleanup.DispatcherResourceCleanerFactory) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) TimeUtils(org.apache.flink.util.TimeUtils) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) Optional(java.util.Optional) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) FlinkMatchers(org.apache.flink.core.testutils.FlinkMatchers) TestingJobGraphStore(org.apache.flink.runtime.testutils.TestingJobGraphStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) JobResult(org.apache.flink.runtime.jobmaster.JobResult) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionException(java.util.concurrent.ExecutionException) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingRetryStrategies(org.apache.flink.runtime.dispatcher.cleanup.TestingRetryStrategies) ForkJoinPool(java.util.concurrent.ForkJoinPool) EmbeddedCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.EmbeddedCompletedCheckpointStore) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Assert(org.junit.Assert) Collections(java.util.Collections) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) Optional(java.util.Optional) JobResult(org.apache.flink.runtime.jobmaster.JobResult) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) TestingJobGraphStore(org.apache.flink.runtime.testutils.TestingJobGraphStore) CountDownLatch(java.util.concurrent.CountDownLatch) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 7 with JobResultStore

use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.

the class DispatcherResourceCleanupTest method testErrorHandlingIfJobCannotBeMarkedAsCleanInJobResultStore.

@Test
public void testErrorHandlingIfJobCannotBeMarkedAsCleanInJobResultStore() throws Exception {
    final CompletableFuture<JobResultEntry> dirtyJobFuture = new CompletableFuture<>();
    final JobResultStore jobResultStore = TestingJobResultStore.builder().withCreateDirtyResultConsumer(dirtyJobFuture::complete).withMarkResultAsCleanConsumer(jobId -> {
        throw new IOException("Expected IOException.");
    }).build();
    final TestingJobManagerRunnerFactory jobManagerRunnerFactory = startDispatcherAndSubmitJob(createTestingDispatcherBuilder().setJobResultStore(jobResultStore), 0);
    ArchivedExecutionGraph executionGraph = new ArchivedExecutionGraphBuilder().setJobID(jobId).setState(JobStatus.FINISHED).build();
    final TestingJobManagerRunner testingJobManagerRunner = jobManagerRunnerFactory.takeCreatedJobManagerRunner();
    testingJobManagerRunner.completeResultFuture(new ExecutionGraphInfo(executionGraph));
    final CompletableFuture<? extends Throwable> errorFuture = this.testingFatalErrorHandlerResource.getFatalErrorHandler().getErrorFuture();
    try {
        final Throwable unexpectedError = errorFuture.get(100, TimeUnit.MILLISECONDS);
        fail("No error should have been reported but an " + unexpectedError.getClass() + " was handled.");
    } catch (TimeoutException e) {
    // expected
    }
    assertThat(dirtyJobFuture.get().getJobId(), is(jobId));
}
Also used : Arrays(java.util.Arrays) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) ThrowingRunnable(org.apache.flink.util.function.ThrowingRunnable) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) JobManagerJobMetricGroupFactory(org.apache.flink.runtime.jobmaster.factories.JobManagerJobMetricGroupFactory) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TestingFatalErrorHandlerResource(org.apache.flink.runtime.util.TestingFatalErrorHandlerResource) Assert.fail(org.junit.Assert.fail) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) ClassRule(org.junit.ClassRule) AfterClass(org.junit.AfterClass) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) FlinkMatchers.containsCause(org.apache.flink.core.testutils.FlinkMatchers.containsCause) Preconditions(org.apache.flink.util.Preconditions) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JobManagerSharedServices(org.apache.flink.runtime.jobmaster.JobManagerSharedServices) Optional(java.util.Optional) AbstractDispatcherTest.awaitStatus(org.apache.flink.runtime.dispatcher.AbstractDispatcherTest.awaitStatus) Matchers.is(org.hamcrest.Matchers.is) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) Queue(java.util.Queue) Time(org.apache.flink.api.common.time.Time) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) BlobServer(org.apache.flink.runtime.blob.BlobServer) BeforeClass(org.junit.BeforeClass) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingResourceCleanerFactory(org.apache.flink.runtime.dispatcher.cleanup.TestingResourceCleanerFactory) RpcService(org.apache.flink.runtime.rpc.RpcService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) ExpectedException(org.junit.rules.ExpectedException) IsInstanceOf(org.hamcrest.core.IsInstanceOf) Before(org.junit.Before) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) FlinkMatchers.containsMessage(org.apache.flink.core.testutils.FlinkMatchers.containsMessage) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test) IOException(java.io.IOException) TestingBlobStoreBuilder(org.apache.flink.runtime.blob.TestingBlobStoreBuilder) Reference(org.apache.flink.util.Reference) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) ArrayDeque(java.util.ArrayDeque) DuplicateJobSubmissionException(org.apache.flink.runtime.client.DuplicateJobSubmissionException) TemporaryFolder(org.junit.rules.TemporaryFolder) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) IOException(java.io.IOException) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) CompletableFuture(java.util.concurrent.CompletableFuture) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 8 with JobResultStore

use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.

the class DispatcherResourceCleanupTest method testFatalErrorIfJobCannotBeMarkedDirtyInJobResultStore.

@Test
public void testFatalErrorIfJobCannotBeMarkedDirtyInJobResultStore() throws Exception {
    final JobResultStore jobResultStore = TestingJobResultStore.builder().withCreateDirtyResultConsumer(jobResult -> {
        throw new IOException("Expected IOException.");
    }).build();
    final TestingJobManagerRunnerFactory jobManagerRunnerFactory = startDispatcherAndSubmitJob(createTestingDispatcherBuilder().setJobResultStore(jobResultStore), 0);
    ArchivedExecutionGraph executionGraph = new ArchivedExecutionGraphBuilder().setJobID(jobId).setState(JobStatus.FINISHED).build();
    final TestingJobManagerRunner testingJobManagerRunner = jobManagerRunnerFactory.takeCreatedJobManagerRunner();
    testingJobManagerRunner.completeResultFuture(new ExecutionGraphInfo(executionGraph));
    final CompletableFuture<? extends Throwable> errorFuture = this.testingFatalErrorHandlerResource.getFatalErrorHandler().getErrorFuture();
    assertThat(errorFuture.get(100, TimeUnit.MILLISECONDS), IsInstanceOf.instanceOf(FlinkException.class));
    testingFatalErrorHandlerResource.getFatalErrorHandler().clearError();
}
Also used : Arrays(java.util.Arrays) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) ThrowingRunnable(org.apache.flink.util.function.ThrowingRunnable) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) JobManagerJobMetricGroupFactory(org.apache.flink.runtime.jobmaster.factories.JobManagerJobMetricGroupFactory) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TestingFatalErrorHandlerResource(org.apache.flink.runtime.util.TestingFatalErrorHandlerResource) Assert.fail(org.junit.Assert.fail) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) ClassRule(org.junit.ClassRule) AfterClass(org.junit.AfterClass) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) FlinkMatchers.containsCause(org.apache.flink.core.testutils.FlinkMatchers.containsCause) Preconditions(org.apache.flink.util.Preconditions) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JobManagerSharedServices(org.apache.flink.runtime.jobmaster.JobManagerSharedServices) Optional(java.util.Optional) AbstractDispatcherTest.awaitStatus(org.apache.flink.runtime.dispatcher.AbstractDispatcherTest.awaitStatus) Matchers.is(org.hamcrest.Matchers.is) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) Queue(java.util.Queue) Time(org.apache.flink.api.common.time.Time) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) BlobServer(org.apache.flink.runtime.blob.BlobServer) BeforeClass(org.junit.BeforeClass) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingResourceCleanerFactory(org.apache.flink.runtime.dispatcher.cleanup.TestingResourceCleanerFactory) RpcService(org.apache.flink.runtime.rpc.RpcService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) ExpectedException(org.junit.rules.ExpectedException) IsInstanceOf(org.hamcrest.core.IsInstanceOf) Before(org.junit.Before) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) FlinkMatchers.containsMessage(org.apache.flink.core.testutils.FlinkMatchers.containsMessage) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test) IOException(java.io.IOException) TestingBlobStoreBuilder(org.apache.flink.runtime.blob.TestingBlobStoreBuilder) Reference(org.apache.flink.util.Reference) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) ArrayDeque(java.util.ArrayDeque) DuplicateJobSubmissionException(org.apache.flink.runtime.client.DuplicateJobSubmissionException) TemporaryFolder(org.junit.rules.TemporaryFolder) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) IOException(java.io.IOException) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) FlinkException(org.apache.flink.util.FlinkException) Test(org.junit.Test)

Example 9 with JobResultStore

use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.

the class DispatcherResourceCleanupTest method testJobBeingMarkedAsCleanAfterCleanup.

@Test
public void testJobBeingMarkedAsCleanAfterCleanup() throws Exception {
    final CompletableFuture<JobID> markAsCleanFuture = new CompletableFuture<>();
    final JobResultStore jobResultStore = TestingJobResultStore.builder().withMarkResultAsCleanConsumer(markAsCleanFuture::complete).build();
    final OneShotLatch localCleanupLatch = new OneShotLatch();
    final OneShotLatch globalCleanupLatch = new OneShotLatch();
    final TestingResourceCleanerFactory resourceCleanerFactory = TestingResourceCleanerFactory.builder().withLocallyCleanableResource((ignoredJobId, ignoredExecutor) -> {
        try {
            localCleanupLatch.await();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        return FutureUtils.completedVoidFuture();
    }).withGloballyCleanableResource((ignoredJobId, ignoredExecutor) -> {
        try {
            globalCleanupLatch.await();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        return FutureUtils.completedVoidFuture();
    }).build();
    final TestingDispatcher.Builder dispatcherBuilder = createTestingDispatcherBuilder().setJobResultStore(jobResultStore).setResourceCleanerFactory(resourceCleanerFactory);
    final TestingJobManagerRunnerFactory jobManagerRunnerFactory = startDispatcherAndSubmitJob(dispatcherBuilder, 0);
    finishJob(jobManagerRunnerFactory.takeCreatedJobManagerRunner());
    assertThat(markAsCleanFuture.isDone(), is(false));
    localCleanupLatch.trigger();
    assertThat(markAsCleanFuture.isDone(), is(false));
    globalCleanupLatch.trigger();
    assertThat(markAsCleanFuture.get(), is(jobId));
}
Also used : Arrays(java.util.Arrays) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) ThrowingRunnable(org.apache.flink.util.function.ThrowingRunnable) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TimeoutException(java.util.concurrent.TimeoutException) ExceptionUtils(org.apache.flink.util.ExceptionUtils) JobManagerJobMetricGroupFactory(org.apache.flink.runtime.jobmaster.factories.JobManagerJobMetricGroupFactory) Assert.assertThat(org.junit.Assert.assertThat) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TestingFatalErrorHandlerResource(org.apache.flink.runtime.util.TestingFatalErrorHandlerResource) Assert.fail(org.junit.Assert.fail) TestingJobManagerRunner(org.apache.flink.runtime.jobmaster.TestingJobManagerRunner) ClassRule(org.junit.ClassRule) AfterClass(org.junit.AfterClass) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) FlinkMatchers.containsCause(org.apache.flink.core.testutils.FlinkMatchers.containsCause) Preconditions(org.apache.flink.util.Preconditions) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JobManagerSharedServices(org.apache.flink.runtime.jobmaster.JobManagerSharedServices) Optional(java.util.Optional) AbstractDispatcherTest.awaitStatus(org.apache.flink.runtime.dispatcher.AbstractDispatcherTest.awaitStatus) Matchers.is(org.hamcrest.Matchers.is) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) Queue(java.util.Queue) Time(org.apache.flink.api.common.time.Time) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) FlinkException(org.apache.flink.util.FlinkException) BlobServer(org.apache.flink.runtime.blob.BlobServer) BeforeClass(org.junit.BeforeClass) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TestingResourceCleanerFactory(org.apache.flink.runtime.dispatcher.cleanup.TestingResourceCleanerFactory) RpcService(org.apache.flink.runtime.rpc.RpcService) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) ExpectedException(org.junit.rules.ExpectedException) IsInstanceOf(org.hamcrest.core.IsInstanceOf) Before(org.junit.Before) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) FlinkMatchers.containsMessage(org.apache.flink.core.testutils.FlinkMatchers.containsMessage) Configuration(org.apache.flink.configuration.Configuration) Test(org.junit.Test) IOException(java.io.IOException) TestingBlobStoreBuilder(org.apache.flink.runtime.blob.TestingBlobStoreBuilder) Reference(org.apache.flink.util.Reference) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) JobID(org.apache.flink.api.common.JobID) Rule(org.junit.Rule) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) ArrayDeque(java.util.ArrayDeque) DuplicateJobSubmissionException(org.apache.flink.runtime.client.DuplicateJobSubmissionException) TemporaryFolder(org.junit.rules.TemporaryFolder) CompletableFuture(java.util.concurrent.CompletableFuture) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) TestingResourceCleanerFactory(org.apache.flink.runtime.dispatcher.cleanup.TestingResourceCleanerFactory) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 10 with JobResultStore

use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.

the class ZooKeeperDefaultDispatcherRunnerTest method testResourceCleanupUnderLeadershipChange.

/**
 * See FLINK-11665.
 */
@Test
public void testResourceCleanupUnderLeadershipChange() throws Exception {
    final TestingRpcService rpcService = testingRpcServiceResource.getTestingRpcService();
    final TestingLeaderElectionService dispatcherLeaderElectionService = new TestingLeaderElectionService();
    final CuratorFramework client = ZooKeeperUtils.startCuratorFramework(configuration, fatalErrorHandler).asCuratorFramework();
    try (final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder().setDispatcherLeaderElectionService(dispatcherLeaderElectionService).setJobMasterLeaderRetrieverFunction(jobId -> ZooKeeperUtils.createLeaderRetrievalService(client)).build()) {
        final PartialDispatcherServices partialDispatcherServices = new PartialDispatcherServices(configuration, highAvailabilityServices, CompletableFuture::new, blobServer, new TestingHeartbeatServices(), UnregisteredMetricGroups::createUnregisteredJobManagerMetricGroup, new MemoryExecutionGraphInfoStore(), fatalErrorHandler, VoidHistoryServerArchivist.INSTANCE, null, ForkJoinPool.commonPool(), new DispatcherOperationCaches());
        final DefaultDispatcherRunnerFactory defaultDispatcherRunnerFactory = DefaultDispatcherRunnerFactory.createSessionRunner(SessionDispatcherFactory.INSTANCE);
        try (final DispatcherRunner dispatcherRunner = createDispatcherRunner(rpcService, dispatcherLeaderElectionService, new JobPersistenceComponentFactory() {

            @Override
            public JobGraphStore createJobGraphStore() {
                return createZooKeeperJobGraphStore(client);
            }

            @Override
            public JobResultStore createJobResultStore() {
                return new EmbeddedJobResultStore();
            }
        }, partialDispatcherServices, defaultDispatcherRunnerFactory)) {
            // initial run
            DispatcherGateway dispatcherGateway = grantLeadership(dispatcherLeaderElectionService);
            final JobGraph jobGraph = createJobGraphWithBlobs();
            LOG.info("Initial job submission {}.", jobGraph.getJobID());
            dispatcherGateway.submitJob(jobGraph, TESTING_TIMEOUT).get();
            dispatcherLeaderElectionService.notLeader();
            // recovering submitted jobs
            LOG.info("Re-grant leadership first time.");
            dispatcherGateway = grantLeadership(dispatcherLeaderElectionService);
            LOG.info("Cancel recovered job {}.", jobGraph.getJobID());
            // cancellation of the job should remove everything
            final CompletableFuture<JobResult> jobResultFuture = dispatcherGateway.requestJobResult(jobGraph.getJobID(), TESTING_TIMEOUT);
            dispatcherGateway.cancelJob(jobGraph.getJobID(), TESTING_TIMEOUT).get();
            // a successful cancellation should eventually remove all job information
            final JobResult jobResult = jobResultFuture.get();
            assertThat(jobResult.getApplicationStatus(), is(ApplicationStatus.CANCELED));
            dispatcherLeaderElectionService.notLeader();
            // check that the job has been removed from ZooKeeper
            final JobGraphStore submittedJobGraphStore = createZooKeeperJobGraphStore(client);
            CommonTestUtils.waitUntilCondition(() -> submittedJobGraphStore.getJobIds().isEmpty(), Deadline.fromNow(VERIFICATION_TIMEOUT), 20L);
        }
    }
    // check resource clean up
    assertThat(clusterHaStorageDir.listFiles(), is(emptyArray()));
}
Also used : ZooKeeperUtils(org.apache.flink.runtime.util.ZooKeeperUtils) Deadline(org.apache.flink.api.common.time.Deadline) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) Matchers.emptyArray(org.hamcrest.Matchers.emptyArray) JobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.JobPersistenceComponentFactory) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) ExceptionUtils(org.apache.flink.util.ExceptionUtils) Assert.assertThat(org.junit.Assert.assertThat) DispatcherOperationCaches(org.apache.flink.runtime.dispatcher.DispatcherOperationCaches) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) After(org.junit.After) Duration(java.time.Duration) TestLogger(org.apache.flink.util.TestLogger) ClassRule(org.junit.ClassRule) HighAvailabilityServicesUtils(org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils) UUID(java.util.UUID) TestingUtils(org.apache.flink.testutils.TestingUtils) Matchers.is(org.hamcrest.Matchers.is) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) Time(org.apache.flink.api.common.time.Time) DispatcherId(org.apache.flink.runtime.dispatcher.DispatcherId) BlobServer(org.apache.flink.runtime.blob.BlobServer) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) LeaderConnectionInfo(org.apache.flink.runtime.util.LeaderConnectionInfo) CuratorFramework(org.apache.flink.shaded.curator5.org.apache.curator.framework.CuratorFramework) VoidHistoryServerArchivist(org.apache.flink.runtime.dispatcher.VoidHistoryServerArchivist) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) CompletableFuture(java.util.concurrent.CompletableFuture) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) JobResult(org.apache.flink.runtime.jobmaster.JobResult) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) Before(org.junit.Before) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) Logger(org.slf4j.Logger) ApplicationStatus(org.apache.flink.runtime.clusterframework.ApplicationStatus) Configuration(org.apache.flink.configuration.Configuration) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) ForkJoinPool(java.util.concurrent.ForkJoinPool) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) TestingRpcServiceResource(org.apache.flink.runtime.rpc.TestingRpcServiceResource) BlobUtils(org.apache.flink.runtime.blob.BlobUtils) SessionDispatcherFactory(org.apache.flink.runtime.dispatcher.SessionDispatcherFactory) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ZooKeeperResource(org.apache.flink.runtime.zookeeper.ZooKeeperResource) HighAvailabilityOptions(org.apache.flink.configuration.HighAvailabilityOptions) TemporaryFolder(org.junit.rules.TemporaryFolder) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) PartialDispatcherServices(org.apache.flink.runtime.dispatcher.PartialDispatcherServices) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) JobResult(org.apache.flink.runtime.jobmaster.JobResult) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) DispatcherGateway(org.apache.flink.runtime.dispatcher.DispatcherGateway) TestingHighAvailabilityServicesBuilder(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) JobPersistenceComponentFactory(org.apache.flink.runtime.jobmanager.JobPersistenceComponentFactory) CuratorFramework(org.apache.flink.shaded.curator5.org.apache.curator.framework.CuratorFramework) CompletableFuture(java.util.concurrent.CompletableFuture) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingHeartbeatServices(org.apache.flink.runtime.heartbeat.TestingHeartbeatServices) DispatcherOperationCaches(org.apache.flink.runtime.dispatcher.DispatcherOperationCaches) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) TestingRpcService(org.apache.flink.runtime.rpc.TestingRpcService) Test(org.junit.Test)

Aggregations

JobResultStore (org.apache.flink.runtime.highavailability.JobResultStore)10 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)9 CompletableFuture (java.util.concurrent.CompletableFuture)6 JobID (org.apache.flink.api.common.JobID)6 Configuration (org.apache.flink.configuration.Configuration)6 Optional (java.util.Optional)5 ExecutionException (java.util.concurrent.ExecutionException)5 JobStatus (org.apache.flink.api.common.JobStatus)5 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)5 JobResultEntry (org.apache.flink.runtime.highavailability.JobResultEntry)5 TestingJobResultStore (org.apache.flink.runtime.testutils.TestingJobResultStore)5 ExceptionUtils (org.apache.flink.util.ExceptionUtils)5 After (org.junit.After)5 Before (org.junit.Before)5 Test (org.junit.Test)5 IOException (java.io.IOException)4 Deadline (org.apache.flink.api.common.time.Deadline)4 Time (org.apache.flink.api.common.time.Time)4 BlobServer (org.apache.flink.runtime.blob.BlobServer)4 BlobUtils (org.apache.flink.runtime.blob.BlobUtils)4