use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.
the class DefaultExecutionGraph method enableCheckpointing.
@Override
public void enableCheckpointing(CheckpointCoordinatorConfiguration chkConfig, List<MasterTriggerRestoreHook<?>> masterHooks, CheckpointIDCounter checkpointIDCounter, CompletedCheckpointStore checkpointStore, StateBackend checkpointStateBackend, CheckpointStorage checkpointStorage, CheckpointStatsTracker statsTracker, CheckpointsCleaner checkpointsCleaner) {
checkState(state == JobStatus.CREATED, "Job must be in CREATED state");
checkState(checkpointCoordinator == null, "checkpointing already enabled");
final Collection<OperatorCoordinatorCheckpointContext> operatorCoordinators = buildOpCoordinatorCheckpointContexts();
checkpointStatsTracker = checkNotNull(statsTracker, "CheckpointStatsTracker");
checkpointCoordinatorConfiguration = checkNotNull(chkConfig, "CheckpointCoordinatorConfiguration");
CheckpointFailureManager failureManager = new CheckpointFailureManager(chkConfig.getTolerableCheckpointFailureNumber(), new CheckpointFailureManager.FailJobCallback() {
@Override
public void failJob(Throwable cause) {
getJobMasterMainThreadExecutor().execute(() -> failGlobal(cause));
}
@Override
public void failJobDueToTaskFailure(Throwable cause, ExecutionAttemptID failingTask) {
getJobMasterMainThreadExecutor().execute(() -> failGlobalIfExecutionIsStillRunning(cause, failingTask));
}
});
checkState(checkpointCoordinatorTimer == null);
checkpointCoordinatorTimer = Executors.newSingleThreadScheduledExecutor(new DispatcherThreadFactory(Thread.currentThread().getThreadGroup(), "Checkpoint Timer"));
// create the coordinator that triggers and commits checkpoints and holds the state
checkpointCoordinator = new CheckpointCoordinator(jobInformation.getJobId(), chkConfig, operatorCoordinators, checkpointIDCounter, checkpointStore, checkpointStorage, ioExecutor, checkpointsCleaner, new ScheduledExecutorServiceAdapter(checkpointCoordinatorTimer), failureManager, createCheckpointPlanCalculator(chkConfig.isEnableCheckpointsAfterTasksFinish()), new ExecutionAttemptMappingProvider(getAllExecutionVertices()), checkpointStatsTracker);
// register the master hooks on the checkpoint coordinator
for (MasterTriggerRestoreHook<?> hook : masterHooks) {
if (!checkpointCoordinator.addMasterHook(hook)) {
LOG.warn("Trying to register multiple checkpoint hooks with the name: {}", hook.getIdentifier());
}
}
if (checkpointCoordinator.isPeriodicCheckpointingConfigured()) {
// the periodic checkpoint scheduler is activated and deactivated as a result of
// job status changes (running -> on, all other states -> off)
registerJobStatusListener(checkpointCoordinator.createActivatorDeactivator());
}
this.stateBackendName = checkpointStateBackend.getClass().getSimpleName();
this.checkpointStorageName = checkpointStorage.getClass().getSimpleName();
}
use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.
the class CheckpointCoordinatorTest method testMinCheckpointPause.
@Test
public void testMinCheckpointPause() throws Exception {
// will use a different thread to allow checkpoint triggering before exiting from
// receiveAcknowledgeMessage
ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor();
CheckpointCoordinator coordinator = null;
try {
int pause = 1000;
JobVertexID jobVertexId = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexId).setMainThreadExecutor(ComponentMainThreadExecutorServiceAdapter.forSingleThreadExecutor(new DirectScheduledExecutorService())).build();
ExecutionVertex vertex = graph.getJobVertex(jobVertexId).getTaskVertices()[0];
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
coordinator = new CheckpointCoordinatorBuilder().setTimer(new ScheduledExecutorServiceAdapter(executorService)).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(pause).setCheckpointTimeout(Long.MAX_VALUE).setMaxConcurrentCheckpoints(1).setMinPauseBetweenCheckpoints(pause).build()).setExecutionGraph(graph).build();
coordinator.startCheckpointScheduler();
coordinator.triggerCheckpoint(// trigger, execute, and later complete by receiveAcknowledgeMessage
true);
coordinator.triggerCheckpoint(// enqueue and later see if it gets executed in the middle of
true);
// receiveAcknowledgeMessage
while (coordinator.getNumberOfPendingCheckpoints() == 0) {
// wait for at least 1 request to be fully processed
Thread.sleep(10);
}
coordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptId, 1L), TASK_MANAGER_LOCATION_INFO);
Thread.sleep(pause / 2);
assertEquals(0, coordinator.getNumberOfPendingCheckpoints());
// make sure that the 2nd request is eventually processed
while (coordinator.getNumberOfPendingCheckpoints() == 0) {
Thread.sleep(1);
}
} finally {
if (coordinator != null) {
coordinator.shutdown();
}
executorService.shutdownNow();
}
}
use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.
the class CheckpointCoordinatorTriggeringTest method discardingTriggeringCheckpointWillExecuteNextCheckpointRequest.
/**
* This test only fails eventually.
*/
@Test
public void discardingTriggeringCheckpointWillExecuteNextCheckpointRequest() throws Exception {
final ScheduledExecutorService scheduledExecutorService = Executors.newSingleThreadScheduledExecutor();
final CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder().setTimer(new ScheduledExecutorServiceAdapter(scheduledExecutorService)).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().build()).build();
final CompletableFuture<String> masterHookCheckpointFuture = new CompletableFuture<>();
final OneShotLatch triggerCheckpointLatch = new OneShotLatch();
checkpointCoordinator.addMasterHook(new TestingMasterHook(masterHookCheckpointFuture, triggerCheckpointLatch));
try {
checkpointCoordinator.triggerCheckpoint(false);
final CompletableFuture<CompletedCheckpoint> secondCheckpoint = checkpointCoordinator.triggerCheckpoint(false);
triggerCheckpointLatch.await();
masterHookCheckpointFuture.complete("Completed");
// discard triggering checkpoint
checkpointCoordinator.abortPendingCheckpoints(new CheckpointException(CheckpointFailureReason.CHECKPOINT_DECLINED));
try {
// verify that the second checkpoint request will be executed and eventually times
// out
secondCheckpoint.get();
fail("Expected the second checkpoint to fail.");
} catch (ExecutionException ee) {
assertThat(ExceptionUtils.stripExecutionException(ee), instanceOf(CheckpointException.class));
}
} finally {
checkpointCoordinator.shutdown();
ExecutorUtils.gracefulShutdown(10L, TimeUnit.SECONDS, scheduledExecutorService);
}
}
use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.
the class JobStatusPollingUtilsTest method testPolling.
@Test
public void testPolling() {
final int maxAttemptCounter = 3;
final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
try {
final ScheduledExecutor scheduledExecutor = new ScheduledExecutorServiceAdapter(executor);
final CallCountingJobStatusSupplier jobStatusSupplier = new CallCountingJobStatusSupplier(maxAttemptCounter);
final CompletableFuture<JobResult> result = JobStatusPollingUtils.pollJobResultAsync(jobStatusSupplier, () -> CompletableFuture.completedFuture(createSuccessfulJobResult(new JobID(0, 0))), scheduledExecutor, 10);
result.join();
assertThat(jobStatusSupplier.getAttemptCounter(), is(equalTo(maxAttemptCounter)));
} finally {
ExecutorUtils.gracefulShutdown(5, TimeUnit.SECONDS, executor);
}
}
use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.
the class JobStatusPollingUtilsTest method testFailedJobResult.
@Test
public void testFailedJobResult() throws ExecutionException, InterruptedException {
final int maxAttemptCounter = 1;
final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
try {
final ScheduledExecutor scheduledExecutor = new ScheduledExecutorServiceAdapter(executor);
final CallCountingJobStatusSupplier jobStatusSupplier = new CallCountingJobStatusSupplier(maxAttemptCounter);
final CompletableFuture<JobResult> result = JobStatusPollingUtils.pollJobResultAsync(jobStatusSupplier, () -> CompletableFuture.completedFuture(createFailedJobResult(new JobID(0, 0))), scheduledExecutor, 10);
result.join();
assertThat(jobStatusSupplier.getAttemptCounter(), is(equalTo(maxAttemptCounter)));
assertTrue(result.isDone() && result.get().getSerializedThrowable().isPresent());
} finally {
ExecutorUtils.gracefulShutdown(5, TimeUnit.SECONDS, executor);
}
}
Aggregations