Search in sources :

Example 1 with ScheduledExecutorServiceAdapter

use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.

the class DefaultExecutionGraph method enableCheckpointing.

@Override
public void enableCheckpointing(CheckpointCoordinatorConfiguration chkConfig, List<MasterTriggerRestoreHook<?>> masterHooks, CheckpointIDCounter checkpointIDCounter, CompletedCheckpointStore checkpointStore, StateBackend checkpointStateBackend, CheckpointStorage checkpointStorage, CheckpointStatsTracker statsTracker, CheckpointsCleaner checkpointsCleaner) {
    checkState(state == JobStatus.CREATED, "Job must be in CREATED state");
    checkState(checkpointCoordinator == null, "checkpointing already enabled");
    final Collection<OperatorCoordinatorCheckpointContext> operatorCoordinators = buildOpCoordinatorCheckpointContexts();
    checkpointStatsTracker = checkNotNull(statsTracker, "CheckpointStatsTracker");
    checkpointCoordinatorConfiguration = checkNotNull(chkConfig, "CheckpointCoordinatorConfiguration");
    CheckpointFailureManager failureManager = new CheckpointFailureManager(chkConfig.getTolerableCheckpointFailureNumber(), new CheckpointFailureManager.FailJobCallback() {

        @Override
        public void failJob(Throwable cause) {
            getJobMasterMainThreadExecutor().execute(() -> failGlobal(cause));
        }

        @Override
        public void failJobDueToTaskFailure(Throwable cause, ExecutionAttemptID failingTask) {
            getJobMasterMainThreadExecutor().execute(() -> failGlobalIfExecutionIsStillRunning(cause, failingTask));
        }
    });
    checkState(checkpointCoordinatorTimer == null);
    checkpointCoordinatorTimer = Executors.newSingleThreadScheduledExecutor(new DispatcherThreadFactory(Thread.currentThread().getThreadGroup(), "Checkpoint Timer"));
    // create the coordinator that triggers and commits checkpoints and holds the state
    checkpointCoordinator = new CheckpointCoordinator(jobInformation.getJobId(), chkConfig, operatorCoordinators, checkpointIDCounter, checkpointStore, checkpointStorage, ioExecutor, checkpointsCleaner, new ScheduledExecutorServiceAdapter(checkpointCoordinatorTimer), failureManager, createCheckpointPlanCalculator(chkConfig.isEnableCheckpointsAfterTasksFinish()), new ExecutionAttemptMappingProvider(getAllExecutionVertices()), checkpointStatsTracker);
    // register the master hooks on the checkpoint coordinator
    for (MasterTriggerRestoreHook<?> hook : masterHooks) {
        if (!checkpointCoordinator.addMasterHook(hook)) {
            LOG.warn("Trying to register multiple checkpoint hooks with the name: {}", hook.getIdentifier());
        }
    }
    if (checkpointCoordinator.isPeriodicCheckpointingConfigured()) {
        // the periodic checkpoint scheduler is activated and deactivated as a result of
        // job status changes (running -> on, all other states -> off)
        registerJobStatusListener(checkpointCoordinator.createActivatorDeactivator());
    }
    this.stateBackendName = checkpointStateBackend.getClass().getSimpleName();
    this.checkpointStorageName = checkpointStorage.getClass().getSimpleName();
}
Also used : OperatorCoordinatorCheckpointContext(org.apache.flink.runtime.checkpoint.OperatorCoordinatorCheckpointContext) ExecutionAttemptMappingProvider(org.apache.flink.runtime.checkpoint.ExecutionAttemptMappingProvider) DispatcherThreadFactory(org.apache.flink.runtime.taskmanager.DispatcherThreadFactory) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) CheckpointCoordinator(org.apache.flink.runtime.checkpoint.CheckpointCoordinator) CheckpointFailureManager(org.apache.flink.runtime.checkpoint.CheckpointFailureManager)

Example 2 with ScheduledExecutorServiceAdapter

use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.

the class CheckpointCoordinatorTest method testMinCheckpointPause.

@Test
public void testMinCheckpointPause() throws Exception {
    // will use a different thread to allow checkpoint triggering before exiting from
    // receiveAcknowledgeMessage
    ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor();
    CheckpointCoordinator coordinator = null;
    try {
        int pause = 1000;
        JobVertexID jobVertexId = new JobVertexID();
        ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexId).setMainThreadExecutor(ComponentMainThreadExecutorServiceAdapter.forSingleThreadExecutor(new DirectScheduledExecutorService())).build();
        ExecutionVertex vertex = graph.getJobVertex(jobVertexId).getTaskVertices()[0];
        ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
        coordinator = new CheckpointCoordinatorBuilder().setTimer(new ScheduledExecutorServiceAdapter(executorService)).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setCheckpointInterval(pause).setCheckpointTimeout(Long.MAX_VALUE).setMaxConcurrentCheckpoints(1).setMinPauseBetweenCheckpoints(pause).build()).setExecutionGraph(graph).build();
        coordinator.startCheckpointScheduler();
        coordinator.triggerCheckpoint(// trigger, execute, and later complete by receiveAcknowledgeMessage
        true);
        coordinator.triggerCheckpoint(// enqueue and later see if it gets executed in the middle of
        true);
        // receiveAcknowledgeMessage
        while (coordinator.getNumberOfPendingCheckpoints() == 0) {
            // wait for at least 1 request to be fully processed
            Thread.sleep(10);
        }
        coordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptId, 1L), TASK_MANAGER_LOCATION_INFO);
        Thread.sleep(pause / 2);
        assertEquals(0, coordinator.getNumberOfPendingCheckpoints());
        // make sure that the 2nd request is eventually processed
        while (coordinator.getNumberOfPendingCheckpoints() == 0) {
            Thread.sleep(1);
        }
    } finally {
        if (coordinator != null) {
            coordinator.shutdown();
        }
        executorService.shutdownNow();
    }
}
Also used : DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 3 with ScheduledExecutorServiceAdapter

use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.

the class CheckpointCoordinatorTriggeringTest method discardingTriggeringCheckpointWillExecuteNextCheckpointRequest.

/**
 * This test only fails eventually.
 */
@Test
public void discardingTriggeringCheckpointWillExecuteNextCheckpointRequest() throws Exception {
    final ScheduledExecutorService scheduledExecutorService = Executors.newSingleThreadScheduledExecutor();
    final CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder().setTimer(new ScheduledExecutorServiceAdapter(scheduledExecutorService)).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().build()).build();
    final CompletableFuture<String> masterHookCheckpointFuture = new CompletableFuture<>();
    final OneShotLatch triggerCheckpointLatch = new OneShotLatch();
    checkpointCoordinator.addMasterHook(new TestingMasterHook(masterHookCheckpointFuture, triggerCheckpointLatch));
    try {
        checkpointCoordinator.triggerCheckpoint(false);
        final CompletableFuture<CompletedCheckpoint> secondCheckpoint = checkpointCoordinator.triggerCheckpoint(false);
        triggerCheckpointLatch.await();
        masterHookCheckpointFuture.complete("Completed");
        // discard triggering checkpoint
        checkpointCoordinator.abortPendingCheckpoints(new CheckpointException(CheckpointFailureReason.CHECKPOINT_DECLINED));
        try {
            // verify that the second checkpoint request will be executed and eventually times
            // out
            secondCheckpoint.get();
            fail("Expected the second checkpoint to fail.");
        } catch (ExecutionException ee) {
            assertThat(ExceptionUtils.stripExecutionException(ee), instanceOf(CheckpointException.class));
        }
    } finally {
        checkpointCoordinator.shutdown();
        ExecutorUtils.gracefulShutdown(10L, TimeUnit.SECONDS, scheduledExecutorService);
    }
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) CheckpointCoordinatorBuilder(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 4 with ScheduledExecutorServiceAdapter

use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.

the class JobStatusPollingUtilsTest method testPolling.

@Test
public void testPolling() {
    final int maxAttemptCounter = 3;
    final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
    try {
        final ScheduledExecutor scheduledExecutor = new ScheduledExecutorServiceAdapter(executor);
        final CallCountingJobStatusSupplier jobStatusSupplier = new CallCountingJobStatusSupplier(maxAttemptCounter);
        final CompletableFuture<JobResult> result = JobStatusPollingUtils.pollJobResultAsync(jobStatusSupplier, () -> CompletableFuture.completedFuture(createSuccessfulJobResult(new JobID(0, 0))), scheduledExecutor, 10);
        result.join();
        assertThat(jobStatusSupplier.getAttemptCounter(), is(equalTo(maxAttemptCounter)));
    } finally {
        ExecutorUtils.gracefulShutdown(5, TimeUnit.SECONDS, executor);
    }
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) JobResult(org.apache.flink.runtime.jobmaster.JobResult) JobID(org.apache.flink.api.common.JobID) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Test(org.junit.Test)

Example 5 with ScheduledExecutorServiceAdapter

use of org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter in project flink by apache.

the class JobStatusPollingUtilsTest method testFailedJobResult.

@Test
public void testFailedJobResult() throws ExecutionException, InterruptedException {
    final int maxAttemptCounter = 1;
    final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
    try {
        final ScheduledExecutor scheduledExecutor = new ScheduledExecutorServiceAdapter(executor);
        final CallCountingJobStatusSupplier jobStatusSupplier = new CallCountingJobStatusSupplier(maxAttemptCounter);
        final CompletableFuture<JobResult> result = JobStatusPollingUtils.pollJobResultAsync(jobStatusSupplier, () -> CompletableFuture.completedFuture(createFailedJobResult(new JobID(0, 0))), scheduledExecutor, 10);
        result.join();
        assertThat(jobStatusSupplier.getAttemptCounter(), is(equalTo(maxAttemptCounter)));
        assertTrue(result.isDone() && result.get().getSerializedThrowable().isPresent());
    } finally {
        ExecutorUtils.gracefulShutdown(5, TimeUnit.SECONDS, executor);
    }
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) JobResult(org.apache.flink.runtime.jobmaster.JobResult) JobID(org.apache.flink.api.common.JobID) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Test(org.junit.Test)

Aggregations

ScheduledExecutorServiceAdapter (org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter)13 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)8 Test (org.junit.Test)8 JobID (org.apache.flink.api.common.JobID)3 JobResult (org.apache.flink.runtime.jobmaster.JobResult)3 ScheduledExecutor (org.apache.flink.util.concurrent.ScheduledExecutor)3 ExecutionException (java.util.concurrent.ExecutionException)2 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)2 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)2 CheckpointsCleaner (org.apache.flink.runtime.checkpoint.CheckpointsCleaner)2 RestartBackoffTimeStrategy (org.apache.flink.runtime.executiongraph.failover.flip1.RestartBackoffTimeStrategy)2 SlotPool (org.apache.flink.runtime.jobmaster.slotpool.SlotPool)2 IOException (java.io.IOException)1 UndeclaredThrowableException (java.lang.reflect.UndeclaredThrowableException)1 UUID (java.util.UUID)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)1 TimeUnit (java.util.concurrent.TimeUnit)1 TimeoutException (java.util.concurrent.TimeoutException)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1