Search in sources :

Example 31 with JobResult

use of org.apache.flink.runtime.jobmaster.JobResult in project flink by apache.

the class DispatcherTest method testJobSuspensionWhenDispatcherIsTerminated.

/**
 * Tests that a submitted job is suspended if the Dispatcher is terminated.
 */
@Test
public void testJobSuspensionWhenDispatcherIsTerminated() throws Exception {
    dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(jobId, createdJobManagerRunnerLatch));
    DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
    dispatcherGateway.submitJob(jobGraph, TIMEOUT).get();
    final CompletableFuture<JobResult> jobResultFuture = dispatcherGateway.requestJobResult(jobGraph.getJobID(), TIMEOUT);
    assertThat(jobResultFuture.isDone(), is(false));
    dispatcher.close();
    final JobResult jobResult = jobResultFuture.get();
    assertEquals(jobResult.getApplicationStatus(), ApplicationStatus.UNKNOWN);
}
Also used : JobResult(org.apache.flink.runtime.jobmaster.JobResult) Test(org.junit.Test)

Example 32 with JobResult

use of org.apache.flink.runtime.jobmaster.JobResult in project flink by apache.

the class DispatcherTest method testDuplicateJobSubmissionWithGloballyTerminatedJobId.

@Test
public void testDuplicateJobSubmissionWithGloballyTerminatedJobId() throws Exception {
    final JobResult jobResult = TestingJobResultStore.createJobResult(jobGraph.getJobID(), ApplicationStatus.SUCCEEDED);
    haServices.getJobResultStore().createDirtyResult(new JobResultEntry(jobResult));
    dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(jobId, createdJobManagerRunnerLatch));
    final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
    final CompletableFuture<Acknowledge> submitFuture = dispatcherGateway.submitJob(jobGraph, TIMEOUT);
    final ExecutionException executionException = assertThrows(ExecutionException.class, submitFuture::get);
    assertTrue(executionException.getCause() instanceof DuplicateJobSubmissionException);
    final DuplicateJobSubmissionException duplicateException = (DuplicateJobSubmissionException) executionException.getCause();
    assertTrue(duplicateException.isGloballyTerminated());
}
Also used : JobResult(org.apache.flink.runtime.jobmaster.JobResult) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) ExecutionException(java.util.concurrent.ExecutionException) DuplicateJobSubmissionException(org.apache.flink.runtime.client.DuplicateJobSubmissionException) Test(org.junit.Test)

Example 33 with JobResult

use of org.apache.flink.runtime.jobmaster.JobResult in project flink by apache.

the class DispatcherCleanupITCase method testCleanupAfterLeadershipChange.

@Test
public void testCleanupAfterLeadershipChange() throws Exception {
    final JobGraph jobGraph = createJobGraph();
    final JobID jobId = jobGraph.getJobID();
    // Construct job graph store.
    final AtomicInteger actualGlobalCleanupCallCount = new AtomicInteger();
    final OneShotLatch successfulCleanupLatch = new OneShotLatch();
    final RuntimeException temporaryError = new RuntimeException("Unable to remove job graph.");
    final JobGraphStore jobGraphStore = createAndStartJobGraphStoreWithCleanupFailures(1, temporaryError, actualGlobalCleanupCallCount, successfulCleanupLatch);
    haServices.setJobGraphStore(jobGraphStore);
    // Construct leader election service.
    final TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService();
    haServices.setJobMasterLeaderElectionService(jobId, leaderElectionService);
    // start the dispatcher with no retries on cleanup
    final CountDownLatch jobGraphRemovalErrorReceived = new CountDownLatch(1);
    final Dispatcher dispatcher = createTestingDispatcherBuilder().setFatalErrorHandler(throwable -> {
        final Optional<Throwable> maybeError = ExceptionUtils.findThrowable(throwable, temporaryError::equals);
        if (maybeError.isPresent()) {
            jobGraphRemovalErrorReceived.countDown();
        } else {
            testingFatalErrorHandlerResource.getFatalErrorHandler().onFatalError(throwable);
        }
    }).build();
    dispatcher.start();
    toTerminate.add(dispatcher);
    leaderElectionService.isLeader(UUID.randomUUID());
    final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
    dispatcherGateway.submitJob(jobGraph, TIMEOUT).get();
    waitForJobToFinish(leaderElectionService, dispatcherGateway, jobId);
    jobGraphRemovalErrorReceived.await();
    // Remove job master leadership.
    leaderElectionService.notLeader();
    // This will clear internal state of election service, so a new contender can register.
    leaderElectionService.stop();
    assertThat(successfulCleanupLatch.isTriggered(), CoreMatchers.is(false));
    assertThat("The JobGraph is still stored in the JobGraphStore.", haServices.getJobGraphStore().getJobIds(), CoreMatchers.is(Collections.singleton(jobId)));
    assertThat("The JobResultStore has this job marked as dirty.", haServices.getJobResultStore().getDirtyResults().stream().map(JobResult::getJobId).collect(Collectors.toSet()), CoreMatchers.is(Collections.singleton(jobId)));
    // Run a second dispatcher, that restores our finished job.
    final Dispatcher secondDispatcher = createTestingDispatcherBuilder().setRecoveredDirtyJobs(haServices.getJobResultStore().getDirtyResults()).build();
    secondDispatcher.start();
    toTerminate.add(secondDispatcher);
    leaderElectionService.isLeader(UUID.randomUUID());
    CommonTestUtils.waitUntilCondition(() -> haServices.getJobResultStore().getDirtyResults().isEmpty(), Deadline.fromNow(TimeUtils.toDuration(TIMEOUT)));
    assertThat("The JobGraph is not stored in the JobGraphStore.", haServices.getJobGraphStore().getJobIds(), IsEmptyCollection.empty());
    assertTrue("The JobResultStore has the job listed as clean.", haServices.getJobResultStore().hasJobResultEntry(jobId));
    // wait for the successful cleanup to be triggered
    successfulCleanupLatch.await();
    assertThat(actualGlobalCleanupCallCount.get(), equalTo(2));
}
Also used : CoreMatchers(org.hamcrest.CoreMatchers) Deadline(org.apache.flink.api.common.time.Deadline) RpcEndpoint(org.apache.flink.runtime.rpc.RpcEndpoint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) IsEqual.equalTo(org.hamcrest.core.IsEqual.equalTo) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) ExceptionUtils(org.apache.flink.util.ExceptionUtils) IsEmptyCollection(org.hamcrest.collection.IsEmptyCollection) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Duration(java.time.Duration) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) DispatcherResourceCleanerFactory(org.apache.flink.runtime.dispatcher.cleanup.DispatcherResourceCleanerFactory) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) TimeUtils(org.apache.flink.util.TimeUtils) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) Optional(java.util.Optional) JobResultStore(org.apache.flink.runtime.highavailability.JobResultStore) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) FlinkMatchers(org.apache.flink.core.testutils.FlinkMatchers) TestingJobGraphStore(org.apache.flink.runtime.testutils.TestingJobGraphStore) EmbeddedJobResultStore(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedJobResultStore) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobMasterGateway(org.apache.flink.runtime.jobmaster.JobMasterGateway) JobResult(org.apache.flink.runtime.jobmaster.JobResult) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) JobGraphBuilder(org.apache.flink.runtime.jobgraph.JobGraphBuilder) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) RpcUtils(org.apache.flink.runtime.rpc.RpcUtils) ExecutionException(java.util.concurrent.ExecutionException) JobResultEntry(org.apache.flink.runtime.highavailability.JobResultEntry) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) TestingRetryStrategies(org.apache.flink.runtime.dispatcher.cleanup.TestingRetryStrategies) ForkJoinPool(java.util.concurrent.ForkJoinPool) EmbeddedCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.EmbeddedCompletedCheckpointStore) JobManagerRunner(org.apache.flink.runtime.jobmaster.JobManagerRunner) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Assert(org.junit.Assert) Collections(java.util.Collections) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) Optional(java.util.Optional) JobResult(org.apache.flink.runtime.jobmaster.JobResult) JobGraphStore(org.apache.flink.runtime.jobmanager.JobGraphStore) TestingJobGraphStore(org.apache.flink.runtime.testutils.TestingJobGraphStore) CountDownLatch(java.util.concurrent.CountDownLatch) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 34 with JobResult

use of org.apache.flink.runtime.jobmaster.JobResult in project flink by apache.

the class FileSystemJobResultStoreTestInternal method testJobResultSerializationDeserialization.

@Test
public void testJobResultSerializationDeserialization() throws IOException {
    fileSystemJobResultStore.createDirtyResult(DUMMY_JOB_RESULT_ENTRY);
    final File dirtyFile = expectedDirtyFile(DUMMY_JOB_RESULT_ENTRY);
    final FileSystemJobResultStore.JsonJobResultEntry deserializedEntry = MAPPER.readValue(dirtyFile, FileSystemJobResultStore.JsonJobResultEntry.class);
    final JobResult deserializedJobResult = deserializedEntry.getJobResult();
    assertThat(deserializedJobResult).extracting(JobResult::getJobId).isEqualTo(DUMMY_JOB_RESULT_ENTRY.getJobId());
    assertThat(deserializedJobResult).extracting(JobResult::getApplicationStatus).isEqualTo(DUMMY_JOB_RESULT_ENTRY.getJobResult().getApplicationStatus());
    assertThat(deserializedJobResult).extracting(JobResult::getNetRuntime).isEqualTo(DUMMY_JOB_RESULT_ENTRY.getJobResult().getNetRuntime());
    assertThat(deserializedJobResult).extracting(JobResult::getSerializedThrowable).isEqualTo(DUMMY_JOB_RESULT_ENTRY.getJobResult().getSerializedThrowable());
    assertThat(deserializedJobResult).extracting(JobResult::getAccumulatorResults).isEqualTo(DUMMY_JOB_RESULT_ENTRY.getJobResult().getAccumulatorResults());
}
Also used : JobResult(org.apache.flink.runtime.jobmaster.JobResult) File(java.io.File) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 35 with JobResult

use of org.apache.flink.runtime.jobmaster.JobResult in project flink by apache.

the class JobResultStoreContractTest method testGetDirtyResultsWithDirtyEntry.

@Test
default void testGetDirtyResultsWithDirtyEntry() throws IOException {
    JobResultStore jobResultStore = createJobResultStore();
    jobResultStore.createDirtyResult(DUMMY_JOB_RESULT_ENTRY);
    assertThat(jobResultStore.getDirtyResults().stream().map(JobResult::getJobId).collect(Collectors.toList())).singleElement().isEqualTo(DUMMY_JOB_RESULT_ENTRY.getJobId());
}
Also used : JobResult(org.apache.flink.runtime.jobmaster.JobResult) TestingJobResultStore(org.apache.flink.runtime.testutils.TestingJobResultStore) Test(org.junit.jupiter.api.Test)

Aggregations

JobResult (org.apache.flink.runtime.jobmaster.JobResult)58 Test (org.junit.Test)28 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)25 JobID (org.apache.flink.api.common.JobID)15 Test (org.junit.jupiter.api.Test)13 MiniCluster (org.apache.flink.runtime.minicluster.MiniCluster)11 ExecutionException (java.util.concurrent.ExecutionException)8 JobSubmissionResult (org.apache.flink.api.common.JobSubmissionResult)7 Deadline (org.apache.flink.api.common.time.Deadline)7 Configuration (org.apache.flink.configuration.Configuration)7 File (java.io.File)5 JobResultStore (org.apache.flink.runtime.highavailability.JobResultStore)5 IOException (java.io.IOException)4 CompletableFuture (java.util.concurrent.CompletableFuture)4 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)4 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)4 Duration (java.time.Duration)3 List (java.util.List)3 Time (org.apache.flink.api.common.time.Time)3 MiniClusterClient (org.apache.flink.client.program.MiniClusterClient)3