Search in sources :

Example 1 with CheckpointsCleaner

use of org.apache.flink.runtime.checkpoint.CheckpointsCleaner in project flink by apache.

the class DefaultExecutionGraphFactory method createAndRestoreExecutionGraph.

@Override
public ExecutionGraph createAndRestoreExecutionGraph(JobGraph jobGraph, CompletedCheckpointStore completedCheckpointStore, CheckpointsCleaner checkpointsCleaner, CheckpointIDCounter checkpointIdCounter, TaskDeploymentDescriptorFactory.PartitionLocationConstraint partitionLocationConstraint, long initializationTimestamp, VertexAttemptNumberStore vertexAttemptNumberStore, VertexParallelismStore vertexParallelismStore, ExecutionStateUpdateListener executionStateUpdateListener, Logger log) throws Exception {
    ExecutionDeploymentListener executionDeploymentListener = new ExecutionDeploymentTrackerDeploymentListenerAdapter(executionDeploymentTracker);
    ExecutionStateUpdateListener combinedExecutionStateUpdateListener = (execution, previousState, newState) -> {
        executionStateUpdateListener.onStateUpdate(execution, previousState, newState);
        if (newState.isTerminal()) {
            executionDeploymentTracker.stopTrackingDeploymentOf(execution);
        }
    };
    final ExecutionGraph newExecutionGraph = DefaultExecutionGraphBuilder.buildGraph(jobGraph, configuration, futureExecutor, ioExecutor, userCodeClassLoader, completedCheckpointStore, checkpointsCleaner, checkpointIdCounter, rpcTimeout, blobWriter, log, shuffleMaster, jobMasterPartitionTracker, partitionLocationConstraint, executionDeploymentListener, combinedExecutionStateUpdateListener, initializationTimestamp, vertexAttemptNumberStore, vertexParallelismStore, checkpointStatsTrackerFactory, isDynamicGraph);
    final CheckpointCoordinator checkpointCoordinator = newExecutionGraph.getCheckpointCoordinator();
    if (checkpointCoordinator != null) {
        // check whether we find a valid checkpoint
        if (!checkpointCoordinator.restoreInitialCheckpointIfPresent(new HashSet<>(newExecutionGraph.getAllVertices().values()))) {
            // check whether we can restore from a savepoint
            tryRestoreExecutionGraphFromSavepoint(newExecutionGraph, jobGraph.getSavepointRestoreSettings());
        }
    }
    return newExecutionGraph;
}
Also used : ExecutionStateUpdateListener(org.apache.flink.runtime.executiongraph.ExecutionStateUpdateListener) ShuffleMaster(org.apache.flink.runtime.shuffle.ShuffleMaster) CheckpointCoordinator(org.apache.flink.runtime.checkpoint.CheckpointCoordinator) TaskDeploymentDescriptorFactory(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorFactory) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Supplier(java.util.function.Supplier) CachingSupplier(org.apache.flink.util.function.CachingSupplier) HashSet(java.util.HashSet) JobMasterPartitionTracker(org.apache.flink.runtime.io.network.partition.JobMasterPartitionTracker) CheckpointStatsTracker(org.apache.flink.runtime.checkpoint.CheckpointStatsTracker) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) BlobWriter(org.apache.flink.runtime.blob.BlobWriter) Logger(org.slf4j.Logger) Executor(java.util.concurrent.Executor) JobManagerJobMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerJobMetricGroup) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) Configuration(org.apache.flink.configuration.Configuration) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) ExecutionDeploymentTracker(org.apache.flink.runtime.jobmaster.ExecutionDeploymentTracker) DefaultExecutionGraphBuilder(org.apache.flink.runtime.executiongraph.DefaultExecutionGraphBuilder) ExecutionDeploymentTrackerDeploymentListenerAdapter(org.apache.flink.runtime.jobmaster.ExecutionDeploymentTrackerDeploymentListenerAdapter) VertexAttemptNumberStore(org.apache.flink.runtime.executiongraph.VertexAttemptNumberStore) ExecutionDeploymentListener(org.apache.flink.runtime.executiongraph.ExecutionDeploymentListener) WebOptions(org.apache.flink.configuration.WebOptions) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Time(org.apache.flink.api.common.time.Time) CheckpointCoordinator(org.apache.flink.runtime.checkpoint.CheckpointCoordinator) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ExecutionDeploymentListener(org.apache.flink.runtime.executiongraph.ExecutionDeploymentListener) ExecutionDeploymentTrackerDeploymentListenerAdapter(org.apache.flink.runtime.jobmaster.ExecutionDeploymentTrackerDeploymentListenerAdapter) ExecutionStateUpdateListener(org.apache.flink.runtime.executiongraph.ExecutionStateUpdateListener) HashSet(java.util.HashSet)

Example 2 with CheckpointsCleaner

use of org.apache.flink.runtime.checkpoint.CheckpointsCleaner in project flink by apache.

the class AdaptiveBatchSchedulerFactory method createInstance.

@Override
public SchedulerNG createInstance(Logger log, JobGraph jobGraph, Executor ioExecutor, Configuration jobMasterConfiguration, SlotPoolService slotPoolService, ScheduledExecutorService futureExecutor, ClassLoader userCodeLoader, CheckpointRecoveryFactory checkpointRecoveryFactory, Time rpcTimeout, BlobWriter blobWriter, JobManagerJobMetricGroup jobManagerJobMetricGroup, Time slotRequestTimeout, ShuffleMaster<?> shuffleMaster, JobMasterPartitionTracker partitionTracker, ExecutionDeploymentTracker executionDeploymentTracker, long initializationTimestamp, ComponentMainThreadExecutor mainThreadExecutor, FatalErrorHandler fatalErrorHandler, JobStatusListener jobStatusListener) throws Exception {
    checkState(jobGraph.getJobType() == JobType.BATCH, "Adaptive batch scheduler only supports batch jobs");
    checkAllExchangesBlocking(jobGraph);
    final SlotPool slotPool = slotPoolService.castInto(SlotPool.class).orElseThrow(() -> new IllegalStateException("The DefaultScheduler requires a SlotPool."));
    final SlotSelectionStrategy slotSelectionStrategy = SlotSelectionStrategyUtils.selectSlotSelectionStrategy(JobType.BATCH, jobMasterConfiguration);
    final PhysicalSlotRequestBulkChecker bulkChecker = PhysicalSlotRequestBulkCheckerImpl.createFromSlotPool(slotPool, SystemClock.getInstance());
    final PhysicalSlotProvider physicalSlotProvider = new PhysicalSlotProviderImpl(slotSelectionStrategy, slotPool);
    final ExecutionSlotAllocatorFactory allocatorFactory = new SlotSharingExecutionSlotAllocatorFactory(physicalSlotProvider, false, bulkChecker, slotRequestTimeout);
    final RestartBackoffTimeStrategy restartBackoffTimeStrategy = RestartBackoffTimeStrategyFactoryLoader.createRestartBackoffTimeStrategyFactory(jobGraph.getSerializedExecutionConfig().deserializeValue(userCodeLoader).getRestartStrategy(), jobMasterConfiguration, jobGraph.isCheckpointingEnabled()).create();
    log.info("Using restart back off time strategy {} for {} ({}).", restartBackoffTimeStrategy, jobGraph.getName(), jobGraph.getJobID());
    final ExecutionGraphFactory executionGraphFactory = new DefaultExecutionGraphFactory(jobMasterConfiguration, userCodeLoader, executionDeploymentTracker, futureExecutor, ioExecutor, rpcTimeout, jobManagerJobMetricGroup, blobWriter, shuffleMaster, partitionTracker, true);
    return new AdaptiveBatchScheduler(log, jobGraph, ioExecutor, jobMasterConfiguration, bulkChecker::start, new ScheduledExecutorServiceAdapter(futureExecutor), userCodeLoader, new CheckpointsCleaner(), checkpointRecoveryFactory, jobManagerJobMetricGroup, new VertexwiseSchedulingStrategy.Factory(), FailoverStrategyFactoryLoader.loadFailoverStrategyFactory(jobMasterConfiguration), restartBackoffTimeStrategy, new DefaultExecutionVertexOperations(), new ExecutionVertexVersioner(), allocatorFactory, initializationTimestamp, mainThreadExecutor, jobStatusListener, executionGraphFactory, shuffleMaster, rpcTimeout, DefaultVertexParallelismDecider.from(jobMasterConfiguration), jobMasterConfiguration.getInteger(JobManagerOptions.ADAPTIVE_BATCH_SCHEDULER_MAX_PARALLELISM));
}
Also used : DefaultExecutionVertexOperations(org.apache.flink.runtime.scheduler.DefaultExecutionVertexOperations) SlotSharingExecutionSlotAllocatorFactory(org.apache.flink.runtime.scheduler.SlotSharingExecutionSlotAllocatorFactory) SlotSelectionStrategy(org.apache.flink.runtime.jobmaster.slotpool.SlotSelectionStrategy) VertexwiseSchedulingStrategy(org.apache.flink.runtime.scheduler.strategy.VertexwiseSchedulingStrategy) SlotPool(org.apache.flink.runtime.jobmaster.slotpool.SlotPool) PhysicalSlotRequestBulkChecker(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotRequestBulkChecker) PhysicalSlotProviderImpl(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProviderImpl) SlotSharingExecutionSlotAllocatorFactory(org.apache.flink.runtime.scheduler.SlotSharingExecutionSlotAllocatorFactory) ExecutionSlotAllocatorFactory(org.apache.flink.runtime.scheduler.ExecutionSlotAllocatorFactory) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) RestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.RestartBackoffTimeStrategy) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) DefaultExecutionGraphFactory(org.apache.flink.runtime.scheduler.DefaultExecutionGraphFactory) ExecutionVertexVersioner(org.apache.flink.runtime.scheduler.ExecutionVertexVersioner) ExecutionGraphFactory(org.apache.flink.runtime.scheduler.ExecutionGraphFactory) DefaultExecutionGraphFactory(org.apache.flink.runtime.scheduler.DefaultExecutionGraphFactory) PhysicalSlotProvider(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlotProvider)

Example 3 with CheckpointsCleaner

use of org.apache.flink.runtime.checkpoint.CheckpointsCleaner in project flink by apache.

the class AdaptiveSchedulerFactory method createInstance.

@Override
public SchedulerNG createInstance(Logger log, JobGraph jobGraph, Executor ioExecutor, Configuration jobMasterConfiguration, SlotPoolService slotPoolService, ScheduledExecutorService futureExecutor, ClassLoader userCodeLoader, CheckpointRecoveryFactory checkpointRecoveryFactory, Time rpcTimeout, BlobWriter blobWriter, JobManagerJobMetricGroup jobManagerJobMetricGroup, Time slotRequestTimeout, ShuffleMaster<?> shuffleMaster, JobMasterPartitionTracker partitionTracker, ExecutionDeploymentTracker executionDeploymentTracker, long initializationTimestamp, ComponentMainThreadExecutor mainThreadExecutor, FatalErrorHandler fatalErrorHandler, JobStatusListener jobStatusListener) throws Exception {
    final DeclarativeSlotPool declarativeSlotPool = slotPoolService.castInto(DeclarativeSlotPool.class).orElseThrow(() -> new IllegalStateException("The AdaptiveScheduler requires a DeclarativeSlotPool."));
    final RestartBackoffTimeStrategy restartBackoffTimeStrategy = RestartBackoffTimeStrategyFactoryLoader.createRestartBackoffTimeStrategyFactory(jobGraph.getSerializedExecutionConfig().deserializeValue(userCodeLoader).getRestartStrategy(), jobMasterConfiguration, jobGraph.isCheckpointingEnabled()).create();
    log.info("Using restart back off time strategy {} for {} ({}).", restartBackoffTimeStrategy, jobGraph.getName(), jobGraph.getJobID());
    final SlotSharingSlotAllocator slotAllocator = createSlotSharingSlotAllocator(declarativeSlotPool);
    final ExecutionGraphFactory executionGraphFactory = new DefaultExecutionGraphFactory(jobMasterConfiguration, userCodeLoader, executionDeploymentTracker, futureExecutor, ioExecutor, rpcTimeout, jobManagerJobMetricGroup, blobWriter, shuffleMaster, partitionTracker);
    return new AdaptiveScheduler(jobGraph, jobMasterConfiguration, declarativeSlotPool, slotAllocator, ioExecutor, userCodeLoader, new CheckpointsCleaner(), checkpointRecoveryFactory, initialResourceAllocationTimeout, resourceStabilizationTimeout, jobManagerJobMetricGroup, restartBackoffTimeStrategy, initializationTimestamp, mainThreadExecutor, fatalErrorHandler, jobStatusListener, executionGraphFactory);
}
Also used : DeclarativeSlotPool(org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPool) RestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.RestartBackoffTimeStrategy) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) DefaultExecutionGraphFactory(org.apache.flink.runtime.scheduler.DefaultExecutionGraphFactory) SlotSharingSlotAllocator(org.apache.flink.runtime.scheduler.adaptive.allocator.SlotSharingSlotAllocator) ExecutionGraphFactory(org.apache.flink.runtime.scheduler.ExecutionGraphFactory) DefaultExecutionGraphFactory(org.apache.flink.runtime.scheduler.DefaultExecutionGraphFactory)

Example 4 with CheckpointsCleaner

use of org.apache.flink.runtime.checkpoint.CheckpointsCleaner in project flink by apache.

the class DefaultSchedulerTest method doTestCheckpointCleanerIsClosedAfterCheckpointServices.

/**
 * Visible for re-use in {@link
 * org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest}.
 */
public static void doTestCheckpointCleanerIsClosedAfterCheckpointServices(BiFunction<CheckpointRecoveryFactory, CheckpointsCleaner, SchedulerNG> schedulerFactory, ScheduledExecutorService executorService) throws Exception {
    final CountDownLatch checkpointServicesShutdownBlocked = new CountDownLatch(1);
    final CountDownLatch cleanerClosed = new CountDownLatch(1);
    final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1) {

        @Override
        public void shutdown(JobStatus jobStatus, CheckpointsCleaner checkpointsCleaner) throws Exception {
            checkpointServicesShutdownBlocked.await();
            super.shutdown(jobStatus, checkpointsCleaner);
        }
    };
    final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter() {

        @Override
        public void shutdown(JobStatus jobStatus) throws Exception {
            checkpointServicesShutdownBlocked.await();
            super.shutdown(jobStatus);
        }
    };
    final CheckpointsCleaner checkpointsCleaner = new CheckpointsCleaner() {

        @Override
        public synchronized CompletableFuture<Void> closeAsync() {
            cleanerClosed.countDown();
            return super.closeAsync();
        }
    };
    final SchedulerNG scheduler = schedulerFactory.apply(new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter), checkpointsCleaner);
    final CompletableFuture<Void> schedulerClosed = new CompletableFuture<>();
    final CountDownLatch schedulerClosing = new CountDownLatch(1);
    executorService.submit(() -> {
        scheduler.closeAsync().thenRun(() -> schedulerClosed.complete(null));
        schedulerClosing.countDown();
    });
    // Wait for scheduler to start closing.
    schedulerClosing.await();
    assertFalse("CheckpointCleaner should not close before checkpoint services.", cleanerClosed.await(10, TimeUnit.MILLISECONDS));
    checkpointServicesShutdownBlocked.countDown();
    cleanerClosed.await();
    schedulerClosed.get();
}
Also used : JobStatus(org.apache.flink.api.common.JobStatus) CompletableFuture(java.util.concurrent.CompletableFuture) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) StandaloneCheckpointIDCounter(org.apache.flink.runtime.checkpoint.StandaloneCheckpointIDCounter) CountDownLatch(java.util.concurrent.CountDownLatch) TestingCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore) StandaloneCheckpointIDCounter(org.apache.flink.runtime.checkpoint.StandaloneCheckpointIDCounter)

Example 5 with CheckpointsCleaner

use of org.apache.flink.runtime.checkpoint.CheckpointsCleaner in project flink by apache.

the class AdaptiveSchedulerTest method testExceptionHistoryWithTaskFailureFromStopWithSavepoint.

@Test
public void testExceptionHistoryWithTaskFailureFromStopWithSavepoint() throws Exception {
    final Exception expectedException = new Exception("Expected Local Exception");
    Consumer<JobGraph> setupJobGraph = jobGraph -> jobGraph.setSnapshotSettings(new JobCheckpointingSettings(CheckpointCoordinatorConfiguration.builder().build(), null));
    final CompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
    final CheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
    final CheckpointsCleaner checkpointCleaner = new CheckpointsCleaner();
    TestingCheckpointRecoveryFactory checkpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, checkpointIDCounter);
    Consumer<AdaptiveSchedulerBuilder> setupScheduler = builder -> builder.setCheckpointRecoveryFactory(checkpointRecoveryFactory).setCheckpointCleaner(checkpointCleaner);
    BiConsumer<AdaptiveScheduler, List<ExecutionAttemptID>> testLogic = (scheduler, attemptIds) -> {
        final ExecutionAttemptID attemptId = attemptIds.get(1);
        scheduler.stopWithSavepoint("file:///tmp/target", true, SavepointFormatType.CANONICAL);
        scheduler.updateTaskExecutionState(new TaskExecutionStateTransition(new TaskExecutionState(attemptId, ExecutionState.FAILED, expectedException)));
    };
    final Iterable<RootExceptionHistoryEntry> actualExceptionHistory = runExceptionHistoryTests(testLogic, setupScheduler, setupJobGraph);
    assertThat(actualExceptionHistory).hasSize(1);
    final RootExceptionHistoryEntry failure = actualExceptionHistory.iterator().next();
    assertThat(failure.getException().deserializeError(classLoader)).isEqualTo(expectedException);
}
Also used : Arrays(java.util.Arrays) ResourceRequirement(org.apache.flink.runtime.slots.ResourceRequirement) TaskNotRunningException(org.apache.flink.runtime.operators.coordination.TaskNotRunningException) ArchivedExecution(org.apache.flink.runtime.executiongraph.ArchivedExecution) TestingSlotAllocator(org.apache.flink.runtime.scheduler.adaptive.allocator.TestingSlotAllocator) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) Duration(java.time.Duration) ClassRule(org.junit.ClassRule) TestingCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory) ManuallyTriggeredComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ManuallyTriggeredComponentMainThreadExecutor) BlockingQueue(java.util.concurrent.BlockingQueue) MetricOptions(org.apache.flink.configuration.MetricOptions) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) Executors(java.util.concurrent.Executors) MetricNames(org.apache.flink.runtime.metrics.MetricNames) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) VertexParallelismStore(org.apache.flink.runtime.scheduler.VertexParallelismStore) Time(org.apache.flink.api.common.time.Time) RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) UpTimeGauge(org.apache.flink.runtime.executiongraph.metrics.UpTimeGauge) ExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.ExceptionHistoryEntry) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) ResourceCounter(org.apache.flink.runtime.util.ResourceCounter) JobStatus(org.apache.flink.api.common.JobStatus) DefaultAllocatedSlotPool(org.apache.flink.runtime.jobmaster.slotpool.DefaultAllocatedSlotPool) ArrayList(java.util.ArrayList) SchedulerNG(org.apache.flink.runtime.scheduler.SchedulerNG) DownTimeGauge(org.apache.flink.runtime.executiongraph.metrics.DownTimeGauge) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) Gauge(org.apache.flink.metrics.Gauge) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) BiConsumer(java.util.function.BiConsumer) FixedDelayRestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.FixedDelayRestartBackoffTimeStrategy) Nullable(javax.annotation.Nullable) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) TestExecutorResource(org.apache.flink.testutils.executor.TestExecutorResource) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) TestOperatorEvent(org.apache.flink.runtime.operators.coordination.TestOperatorEvent) Test(org.junit.Test) IOException(java.io.IOException) IterableUtils(org.apache.flink.util.IterableUtils) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) ExecutionException(java.util.concurrent.ExecutionException) TaskExecutionStateTransition(org.apache.flink.runtime.executiongraph.TaskExecutionStateTransition) JobID(org.apache.flink.api.common.JobID) NoRestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.NoRestartBackoffTimeStrategy) SlotPoolTestUtils.offerSlots(org.apache.flink.runtime.jobmaster.slotpool.SlotPoolTestUtils.offerSlots) JobManagerMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerMetricGroup) ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) ArchivedExecutionGraphTest(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraphTest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestLogger(org.apache.flink.util.TestLogger) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) ArchivedExecutionJobVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionJobVertex) TestingCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.TestingCompletedCheckpointStore) TestRestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.TestRestartBackoffTimeStrategy) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Collectors(java.util.stream.Collectors) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) DefaultDeclarativeSlotPool(org.apache.flink.runtime.jobmaster.slotpool.DefaultDeclarativeSlotPool) List(java.util.List) StandaloneCheckpointIDCounter(org.apache.flink.runtime.checkpoint.StandaloneCheckpointIDCounter) CoordinationRequest(org.apache.flink.runtime.operators.coordination.CoordinationRequest) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) SchedulerExecutionMode(org.apache.flink.configuration.SchedulerExecutionMode) KeyGroupRangeAssignment(org.apache.flink.runtime.state.KeyGroupRangeAssignment) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CompletableFuture(java.util.concurrent.CompletableFuture) VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) TestingCheckpointIDCounter(org.apache.flink.runtime.checkpoint.TestingCheckpointIDCounter) TestingMetricRegistry(org.apache.flink.runtime.metrics.util.TestingMetricRegistry) Nonnull(javax.annotation.Nonnull) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) DefaultSchedulerTest(org.apache.flink.runtime.scheduler.DefaultSchedulerTest) JobGraphTestUtils.streamingJobGraph(org.apache.flink.runtime.jobgraph.JobGraphTestUtils.streamingJobGraph) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) ArchivedExecutionGraphBuilder(org.apache.flink.runtime.rest.handler.legacy.utils.ArchivedExecutionGraphBuilder) Logger(org.slf4j.Logger) Configuration(org.apache.flink.configuration.Configuration) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore) JobStatusListener(org.apache.flink.runtime.executiongraph.JobStatusListener) DefaultDeclarativeSlotPoolTest.createSlotOffersForResourceRequirements(org.apache.flink.runtime.jobmaster.slotpool.DefaultDeclarativeSlotPoolTest.createSlotOffersForResourceRequirements) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionGraphTestUtils.createNoOpVertex(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.createNoOpVertex) SchedulerTestingUtils.enableCheckpointing(org.apache.flink.runtime.scheduler.SchedulerTestingUtils.enableCheckpointing) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) TemporaryFolder(org.junit.rules.TemporaryFolder) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskExecutionStateTransition(org.apache.flink.runtime.executiongraph.TaskExecutionStateTransition) RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) TaskNotRunningException(org.apache.flink.runtime.operators.coordination.TaskNotRunningException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FlinkException(org.apache.flink.util.FlinkException) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) TestingCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobGraphTestUtils.streamingJobGraph(org.apache.flink.runtime.jobgraph.JobGraphTestUtils.streamingJobGraph) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) ArrayList(java.util.ArrayList) List(java.util.List) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) StandaloneCheckpointIDCounter(org.apache.flink.runtime.checkpoint.StandaloneCheckpointIDCounter) TestingCheckpointIDCounter(org.apache.flink.runtime.checkpoint.TestingCheckpointIDCounter) TestingCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.TestingCompletedCheckpointStore) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore) StandaloneCheckpointIDCounter(org.apache.flink.runtime.checkpoint.StandaloneCheckpointIDCounter) Test(org.junit.Test) ArchivedExecutionGraphTest(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraphTest) DefaultSchedulerTest(org.apache.flink.runtime.scheduler.DefaultSchedulerTest)

Aggregations

CheckpointsCleaner (org.apache.flink.runtime.checkpoint.CheckpointsCleaner)10 StandaloneCompletedCheckpointStore (org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore)6 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)6 StandaloneCheckpointIDCounter (org.apache.flink.runtime.checkpoint.StandaloneCheckpointIDCounter)5 CheckpointIDCounter (org.apache.flink.runtime.checkpoint.CheckpointIDCounter)4 Test (org.junit.Test)4 IOException (java.io.IOException)3 CompletableFuture (java.util.concurrent.CompletableFuture)3 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)3 JobStatus (org.apache.flink.api.common.JobStatus)3 Time (org.apache.flink.api.common.time.Time)3 Configuration (org.apache.flink.configuration.Configuration)3 CompletedCheckpointStore (org.apache.flink.runtime.checkpoint.CompletedCheckpointStore)3 Duration (java.time.Duration)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 List (java.util.List)2 Optional (java.util.Optional)2 ArrayBlockingQueue (java.util.concurrent.ArrayBlockingQueue)2 BlockingQueue (java.util.concurrent.BlockingQueue)2