use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.
the class DispatcherCleanupITCase method testCleanupNotCancellable.
@Test
public void testCleanupNotCancellable() throws Exception {
final JobGraph jobGraph = createJobGraph();
final JobID jobId = jobGraph.getJobID();
final JobResultStore jobResultStore = new EmbeddedJobResultStore();
jobResultStore.createDirtyResult(new JobResultEntry(TestingJobResultStore.createSuccessfulJobResult(jobId)));
haServices.setJobResultStore(jobResultStore);
// Instantiates JobManagerRunner
final CompletableFuture<Void> jobManagerRunnerCleanupFuture = new CompletableFuture<>();
final AtomicReference<JobManagerRunner> jobManagerRunnerEntry = new AtomicReference<>();
final JobManagerRunnerRegistry jobManagerRunnerRegistry = TestingJobManagerRunnerRegistry.newSingleJobBuilder(jobManagerRunnerEntry).withLocalCleanupAsyncFunction((actualJobId, executor) -> jobManagerRunnerCleanupFuture).build();
final Dispatcher dispatcher = createTestingDispatcherBuilder().setJobManagerRunnerRegistry(jobManagerRunnerRegistry).build();
dispatcher.start();
toTerminate.add(dispatcher);
CommonTestUtils.waitUntilCondition(() -> jobManagerRunnerEntry.get() != null, Deadline.fromNow(Duration.ofSeconds(10)), "JobManagerRunner wasn't loaded in time.");
assertThat("The JobResultStore should have this job still marked as dirty.", haServices.getJobResultStore().hasDirtyJobResultEntry(jobId), CoreMatchers.is(true));
final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class);
try {
dispatcherGateway.cancelJob(jobId, TIMEOUT).get();
Assert.fail("Should fail because cancelling the cleanup is not allowed.");
} catch (ExecutionException e) {
assertThat(e, FlinkMatchers.containsCause(JobCancellationFailedException.class));
}
jobManagerRunnerCleanupFuture.complete(null);
CommonTestUtils.waitUntilCondition(() -> haServices.getJobResultStore().hasCleanJobResultEntry(jobId), Deadline.fromNow(Duration.ofSeconds(60)), "The JobResultStore should have this job marked as clean now.");
}
use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.
the class JobDispatcherLeaderProcessFactoryFactory method createFactory.
@Override
public JobDispatcherLeaderProcessFactory createFactory(JobPersistenceComponentFactory jobPersistenceComponentFactory, Executor ioExecutor, RpcService rpcService, PartialDispatcherServices partialDispatcherServices, FatalErrorHandler fatalErrorHandler) {
final JobGraph jobGraph;
try {
jobGraph = Preconditions.checkNotNull(jobGraphRetriever.retrieveJobGraph(partialDispatcherServices.getConfiguration()));
} catch (FlinkException e) {
throw new FlinkRuntimeException("Could not retrieve the JobGraph.", e);
}
final JobResultStore jobResultStore = jobPersistenceComponentFactory.createJobResultStore();
final Collection<JobResult> recoveredDirtyJobResults = getDirtyJobResults(jobResultStore);
final Optional<JobResult> maybeRecoveredDirtyJobResult = extractDirtyJobResult(recoveredDirtyJobResults, jobGraph);
final Optional<JobGraph> maybeJobGraph = getJobGraphBasedOnDirtyJobResults(jobGraph, recoveredDirtyJobResults);
final DefaultDispatcherGatewayServiceFactory defaultDispatcherServiceFactory = new DefaultDispatcherGatewayServiceFactory(JobDispatcherFactory.INSTANCE, rpcService, partialDispatcherServices);
return new JobDispatcherLeaderProcessFactory(defaultDispatcherServiceFactory, maybeJobGraph.orElse(null), maybeRecoveredDirtyJobResult.orElse(null), jobResultStore, fatalErrorHandler);
}
use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.
the class ApplicationDispatcherGatewayServiceFactory method create.
@Override
public AbstractDispatcherLeaderProcess.DispatcherGatewayService create(DispatcherId fencingToken, Collection<JobGraph> recoveredJobs, Collection<JobResult> recoveredDirtyJobResults, JobGraphWriter jobGraphWriter, JobResultStore jobResultStore) {
final List<JobID> recoveredJobIds = getRecoveredJobIds(recoveredJobs);
final Dispatcher dispatcher;
try {
dispatcher = dispatcherFactory.createDispatcher(rpcService, fencingToken, recoveredJobs, recoveredDirtyJobResults, (dispatcherGateway, scheduledExecutor, errorHandler) -> new ApplicationDispatcherBootstrap(application, recoveredJobIds, configuration, dispatcherGateway, scheduledExecutor, errorHandler), PartialDispatcherServicesWithJobPersistenceComponents.from(partialDispatcherServices, jobGraphWriter, jobResultStore));
} catch (Exception e) {
throw new FlinkRuntimeException("Could not create the Dispatcher rpc endpoint.", e);
}
dispatcher.start();
return DefaultDispatcherGatewayService.from(dispatcher);
}
use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.
the class ApplicationDispatcherBootstrapITCase method testDirtyJobResultRecoveryInApplicationMode.
@Test
public void testDirtyJobResultRecoveryInApplicationMode() throws Exception {
final Deadline deadline = Deadline.fromNow(TIMEOUT);
final Configuration configuration = new Configuration();
configuration.set(HighAvailabilityOptions.HA_MODE, HighAvailabilityMode.ZOOKEEPER.name());
configuration.set(DeploymentOptions.TARGET, EmbeddedExecutor.NAME);
configuration.set(ClientOptions.CLIENT_RETRY_PERIOD, Duration.ofMillis(100));
final TestingMiniClusterConfiguration clusterConfiguration = TestingMiniClusterConfiguration.newBuilder().setConfiguration(configuration).build();
// having a dirty entry in the JobResultStore should make the ApplicationDispatcherBootstrap
// implementation fail to submit the job
final JobResultStore jobResultStore = new EmbeddedJobResultStore();
jobResultStore.createDirtyResult(new JobResultEntry(TestingJobResultStore.createSuccessfulJobResult(ApplicationDispatcherBootstrap.ZERO_JOB_ID)));
final EmbeddedHaServicesWithLeadershipControl haServices = new EmbeddedHaServicesWithLeadershipControl(TestingUtils.defaultExecutor()) {
@Override
public JobResultStore getJobResultStore() {
return jobResultStore;
}
};
final TestingMiniCluster.Builder clusterBuilder = TestingMiniCluster.newBuilder(clusterConfiguration).setHighAvailabilityServicesSupplier(() -> haServices).setDispatcherResourceManagerComponentFactorySupplier(createApplicationModeDispatcherResourceManagerComponentFactorySupplier(clusterConfiguration.getConfiguration(), ErrorHandlingSubmissionJob.createPackagedProgram()));
try (final MiniCluster cluster = clusterBuilder.build()) {
// start mini cluster and submit the job
cluster.start();
// the cluster should shut down automatically once the application completes
awaitClusterStopped(cluster, deadline);
}
FlinkAssertions.assertThatChainOfCauses(ErrorHandlingSubmissionJob.getSubmissionException()).as("The job's main method shouldn't have been succeeded due to a DuplicateJobSubmissionException.").hasAtLeastOneElementOfType(DuplicateJobSubmissionException.class);
assertThat(jobResultStore.hasDirtyJobResultEntry(ApplicationDispatcherBootstrap.ZERO_JOB_ID)).isFalse();
assertThat(jobResultStore.hasCleanJobResultEntry(ApplicationDispatcherBootstrap.ZERO_JOB_ID)).isTrue();
}
use of org.apache.flink.runtime.highavailability.JobResultStore in project flink by apache.
the class DefaultDispatcherGatewayServiceFactory method create.
@Override
public AbstractDispatcherLeaderProcess.DispatcherGatewayService create(DispatcherId fencingToken, Collection<JobGraph> recoveredJobs, Collection<JobResult> recoveredDirtyJobResults, JobGraphWriter jobGraphWriter, JobResultStore jobResultStore) {
final Dispatcher dispatcher;
try {
dispatcher = dispatcherFactory.createDispatcher(rpcService, fencingToken, recoveredJobs, recoveredDirtyJobResults, (dispatcherGateway, scheduledExecutor, errorHandler) -> new NoOpDispatcherBootstrap(), PartialDispatcherServicesWithJobPersistenceComponents.from(partialDispatcherServices, jobGraphWriter, jobResultStore));
} catch (Exception e) {
throw new FlinkRuntimeException("Could not create the Dispatcher rpc endpoint.", e);
}
dispatcher.start();
return DefaultDispatcherGatewayService.from(dispatcher);
}
Aggregations