Search in sources :

Example 1 with CheckpointResponder

use of org.apache.flink.runtime.taskmanager.CheckpointResponder in project flink by apache.

the class TaskExecutor method submitTask.

// ----------------------------------------------------------------------
// Task lifecycle RPCs
// ----------------------------------------------------------------------
@Override
public CompletableFuture<Acknowledge> submitTask(TaskDeploymentDescriptor tdd, JobMasterId jobMasterId, Time timeout) {
    try {
        final JobID jobId = tdd.getJobId();
        final ExecutionAttemptID executionAttemptID = tdd.getExecutionAttemptId();
        final JobTable.Connection jobManagerConnection = jobTable.getConnection(jobId).orElseThrow(() -> {
            final String message = "Could not submit task because there is no JobManager " + "associated for the job " + jobId + '.';
            log.debug(message);
            return new TaskSubmissionException(message);
        });
        if (!Objects.equals(jobManagerConnection.getJobMasterId(), jobMasterId)) {
            final String message = "Rejecting the task submission because the job manager leader id " + jobMasterId + " does not match the expected job manager leader id " + jobManagerConnection.getJobMasterId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
        if (!taskSlotTable.tryMarkSlotActive(jobId, tdd.getAllocationId())) {
            final String message = "No task slot allocated for job ID " + jobId + " and allocation ID " + tdd.getAllocationId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
        // re-integrate offloaded data:
        try {
            tdd.loadBigData(taskExecutorBlobService.getPermanentBlobService());
        } catch (IOException | ClassNotFoundException e) {
            throw new TaskSubmissionException("Could not re-integrate offloaded TaskDeploymentDescriptor data.", e);
        }
        // deserialize the pre-serialized information
        final JobInformation jobInformation;
        final TaskInformation taskInformation;
        try {
            jobInformation = tdd.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
            taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
        } catch (IOException | ClassNotFoundException e) {
            throw new TaskSubmissionException("Could not deserialize the job or task information.", e);
        }
        if (!jobId.equals(jobInformation.getJobId())) {
            throw new TaskSubmissionException("Inconsistent job ID information inside TaskDeploymentDescriptor (" + tdd.getJobId() + " vs. " + jobInformation.getJobId() + ")");
        }
        TaskManagerJobMetricGroup jobGroup = taskManagerMetricGroup.addJob(jobInformation.getJobId(), jobInformation.getJobName());
        // note that a pre-existing job group can NOT be closed concurrently - this is done by
        // the same TM thread in removeJobMetricsGroup
        TaskMetricGroup taskMetricGroup = jobGroup.addTask(taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskInformation.getTaskName(), tdd.getSubtaskIndex(), tdd.getAttemptNumber());
        InputSplitProvider inputSplitProvider = new RpcInputSplitProvider(jobManagerConnection.getJobManagerGateway(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskManagerConfiguration.getRpcTimeout());
        final TaskOperatorEventGateway taskOperatorEventGateway = new RpcTaskOperatorEventGateway(jobManagerConnection.getJobManagerGateway(), executionAttemptID, (t) -> runAsync(() -> failTask(executionAttemptID, t)));
        TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
        CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
        GlobalAggregateManager aggregateManager = jobManagerConnection.getGlobalAggregateManager();
        LibraryCacheManager.ClassLoaderHandle classLoaderHandle = jobManagerConnection.getClassLoaderHandle();
        ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = jobManagerConnection.getResultPartitionConsumableNotifier();
        PartitionProducerStateChecker partitionStateChecker = jobManagerConnection.getPartitionStateChecker();
        final TaskLocalStateStore localStateStore = localStateStoresManager.localStateStoreForSubtask(jobId, tdd.getAllocationId(), taskInformation.getJobVertexId(), tdd.getSubtaskIndex());
        // TODO: Pass config value from user program and do overriding here.
        final StateChangelogStorage<?> changelogStorage;
        try {
            changelogStorage = changelogStoragesManager.stateChangelogStorageForJob(jobId, taskManagerConfiguration.getConfiguration(), jobGroup);
        } catch (IOException e) {
            throw new TaskSubmissionException(e);
        }
        final JobManagerTaskRestore taskRestore = tdd.getTaskRestore();
        final TaskStateManager taskStateManager = new TaskStateManagerImpl(jobId, tdd.getExecutionAttemptId(), localStateStore, changelogStorage, taskRestore, checkpointResponder);
        MemoryManager memoryManager;
        try {
            memoryManager = taskSlotTable.getTaskMemoryManager(tdd.getAllocationId());
        } catch (SlotNotFoundException e) {
            throw new TaskSubmissionException("Could not submit task.", e);
        }
        Task task = new Task(jobInformation, taskInformation, tdd.getExecutionAttemptId(), tdd.getAllocationId(), tdd.getSubtaskIndex(), tdd.getAttemptNumber(), tdd.getProducedPartitions(), tdd.getInputGates(), memoryManager, taskExecutorServices.getIOManager(), taskExecutorServices.getShuffleEnvironment(), taskExecutorServices.getKvStateService(), taskExecutorServices.getBroadcastVariableManager(), taskExecutorServices.getTaskEventDispatcher(), externalResourceInfoProvider, taskStateManager, taskManagerActions, inputSplitProvider, checkpointResponder, taskOperatorEventGateway, aggregateManager, classLoaderHandle, fileCache, taskManagerConfiguration, taskMetricGroup, resultPartitionConsumableNotifier, partitionStateChecker, getRpcService().getScheduledExecutor());
        taskMetricGroup.gauge(MetricNames.IS_BACK_PRESSURED, task::isBackPressured);
        log.info("Received task {} ({}), deploy into slot with allocation id {}.", task.getTaskInfo().getTaskNameWithSubtasks(), tdd.getExecutionAttemptId(), tdd.getAllocationId());
        boolean taskAdded;
        try {
            taskAdded = taskSlotTable.addTask(task);
        } catch (SlotNotFoundException | SlotNotActiveException e) {
            throw new TaskSubmissionException("Could not submit task.", e);
        }
        if (taskAdded) {
            task.startTaskThread();
            setupResultPartitionBookkeeping(tdd.getJobId(), tdd.getProducedPartitions(), task.getTerminationFuture());
            return CompletableFuture.completedFuture(Acknowledge.get());
        } else {
            final String message = "TaskManager already contains a task for id " + task.getExecutionId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
    } catch (TaskSubmissionException e) {
        return FutureUtils.completedExceptionally(e);
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) Task(org.apache.flink.runtime.taskmanager.Task) SlotNotActiveException(org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) RpcTaskOperatorEventGateway(org.apache.flink.runtime.taskexecutor.rpc.RpcTaskOperatorEventGateway) TaskOperatorEventGateway(org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) RpcTaskOperatorEventGateway(org.apache.flink.runtime.taskexecutor.rpc.RpcTaskOperatorEventGateway) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) TaskLocalStateStore(org.apache.flink.runtime.state.TaskLocalStateStore) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskManagerJobMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerJobMetricGroup) IOException(java.io.IOException) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) RpcGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.rpc.RpcGlobalAggregateManager) JobID(org.apache.flink.api.common.JobID)

Example 2 with CheckpointResponder

use of org.apache.flink.runtime.taskmanager.CheckpointResponder in project flink by apache.

the class TaskExecutor method associateWithJobManager.

private JobTable.Connection associateWithJobManager(JobTable.Job job, ResourceID resourceID, JobMasterGateway jobMasterGateway) {
    checkNotNull(resourceID);
    checkNotNull(jobMasterGateway);
    TaskManagerActions taskManagerActions = new TaskManagerActionsImpl(jobMasterGateway);
    CheckpointResponder checkpointResponder = new RpcCheckpointResponder(jobMasterGateway);
    GlobalAggregateManager aggregateManager = new RpcGlobalAggregateManager(jobMasterGateway);
    ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = new RpcResultPartitionConsumableNotifier(jobMasterGateway, getRpcService().getScheduledExecutor(), taskManagerConfiguration.getRpcTimeout());
    PartitionProducerStateChecker partitionStateChecker = new RpcPartitionStateChecker(jobMasterGateway);
    registerQueryableState(job.getJobId(), jobMasterGateway);
    return job.connect(resourceID, jobMasterGateway, taskManagerActions, checkpointResponder, aggregateManager, resultPartitionConsumableNotifier, partitionStateChecker);
}
Also used : RpcPartitionStateChecker(org.apache.flink.runtime.taskexecutor.rpc.RpcPartitionStateChecker) RpcGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.rpc.RpcGlobalAggregateManager) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) RpcGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.rpc.RpcGlobalAggregateManager) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions)

Example 3 with CheckpointResponder

use of org.apache.flink.runtime.taskmanager.CheckpointResponder in project flink by apache.

the class MultipleInputStreamTaskTest method testTriggeringStopWithSavepointWithDrain.

@Test
public void testTriggeringStopWithSavepointWithDrain() throws Exception {
    SourceOperatorFactory<Integer> sourceOperatorFactory = new SourceOperatorFactory<>(new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 2), WatermarkStrategy.noWatermarks());
    CompletableFuture<Boolean> checkpointCompleted = new CompletableFuture<>();
    CheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            checkpointCompleted.complete(null);
        }
    };
    try (StreamTaskMailboxTestHarness<String> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(MultipleInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO).setCollectNetworkEvents().modifyStreamConfig(config -> config.setCheckpointingEnabled(true)).modifyExecutionConfig(applyObjectReuse(objectReuse)).addInput(BasicTypeInfo.INT_TYPE_INFO).addInput(BasicTypeInfo.INT_TYPE_INFO).addInput(BasicTypeInfo.INT_TYPE_INFO).setTaskStateSnapshot(1, TaskStateSnapshot.FINISHED_ON_RESTORE).setupOperatorChain(new LifeCycleMonitorMultipleInputOperatorFactory()).finishForSingletonOperatorChain(StringSerializer.INSTANCE).setCheckpointResponder(checkpointResponder).build()) {
        CompletableFuture<Boolean> triggerResult = testHarness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(2, 2), CheckpointOptions.alignedNoTimeout(SavepointType.terminate(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
        checkpointCompleted.whenComplete((ignored, exception) -> testHarness.streamTask.notifyCheckpointCompleteAsync(2));
        testHarness.waitForTaskCompletion();
        testHarness.finishProcessing();
        assertTrue(triggerResult.isDone());
        assertTrue(triggerResult.get());
        assertTrue(checkpointCompleted.isDone());
    }
}
Also used : EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) TaskIOMetricGroup(org.apache.flink.runtime.metrics.groups.TaskIOMetricGroup) Arrays(java.util.Arrays) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) SharedObjects(org.apache.flink.testutils.junit.SharedObjects) NoMoreSplitsEvent(org.apache.flink.runtime.source.event.NoMoreSplitsEvent) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) Duration(java.time.Duration) Map(java.util.Map) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) Serializable(java.io.Serializable) StopMode(org.apache.flink.runtime.io.network.api.StopMode) MetricNames(org.apache.flink.runtime.metrics.MetricNames) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) Matchers.contains(org.hamcrest.Matchers.contains) WatermarkMetricOperator(org.apache.flink.streaming.runtime.tasks.OneInputStreamTaskTest.WatermarkMetricOperator) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Boundedness(org.apache.flink.api.connector.source.Boundedness) Counter(org.apache.flink.metrics.Counter) RunWith(org.junit.runner.RunWith) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) TimestampAssigner(org.apache.flink.api.common.eventtime.TimestampAssigner) DataOutputView(org.apache.flink.core.memory.DataOutputView) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) ArrayList(java.util.ArrayList) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) InternalOperatorMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorMetricGroup) Gauge(org.apache.flink.metrics.Gauge) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TestHarnessUtil(org.apache.flink.streaming.util.TestHarnessUtil) Before(org.junit.Before) CheckpointStorageLocationReference(org.apache.flink.runtime.state.CheckpointStorageLocationReference) SourceReader(org.apache.flink.api.connector.source.SourceReader) Parameter(org.junit.runners.Parameterized.Parameter) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) InterceptingTaskMetricGroup(org.apache.flink.runtime.metrics.util.InterceptingTaskMetricGroup) AddSplitEvent(org.apache.flink.runtime.source.event.AddSplitEvent) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) JobID(org.apache.flink.api.common.JobID) UnregisteredMetricGroups(org.apache.flink.runtime.metrics.groups.UnregisteredMetricGroups) StreamTaskFinalCheckpointsTest.processMailTillCheckpointSucceeds(org.apache.flink.streaming.runtime.tasks.StreamTaskFinalCheckpointsTest.processMailTillCheckpointSucceeds) Assert(org.junit.Assert) ArrayDeque(java.util.ArrayDeque) Assert.assertEquals(org.junit.Assert.assertEquals) Input(org.apache.flink.streaming.api.operators.Input) LifeCycleMonitorMultipleInputOperatorFactory(org.apache.flink.streaming.runtime.tasks.MultipleInputStreamTaskChainedSourcesCheckpointingTest.LifeCycleMonitorMultipleInputOperatorFactory) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) SavepointType(org.apache.flink.runtime.checkpoint.SavepointType) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) StreamTaskFinalCheckpointsTest.triggerCheckpoint(org.apache.flink.streaming.runtime.tasks.StreamTaskFinalCheckpointsTest.triggerCheckpoint) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) TypeSerializerSnapshot(org.apache.flink.api.common.typeutils.TypeSerializerSnapshot) Parameterized(org.junit.runners.Parameterized) SourceReaderContext(org.apache.flink.api.connector.source.SourceReaderContext) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) CheckpointType(org.apache.flink.runtime.checkpoint.CheckpointType) InterceptingOperatorMetricGroup(org.apache.flink.runtime.metrics.util.InterceptingOperatorMetricGroup) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) MockSourceReader(org.apache.flink.api.connector.source.mocks.MockSourceReader) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) MockSourceSplitSerializer(org.apache.flink.api.connector.source.mocks.MockSourceSplitSerializer) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CancelCheckpointMarker(org.apache.flink.runtime.io.network.api.CancelCheckpointMarker) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Parameters(org.junit.runners.Parameterized.Parameters) CoreMatchers.not(org.hamcrest.CoreMatchers.not) EndOfPartitionEvent(org.apache.flink.runtime.io.network.api.EndOfPartitionEvent) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) Watermark(org.apache.flink.streaming.api.watermark.Watermark) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Metric(org.apache.flink.metrics.Metric) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) MockSourceSplit(org.apache.flink.api.connector.source.mocks.MockSourceSplit) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) MockSource(org.apache.flink.api.connector.source.mocks.MockSource) OperatorMetricGroup(org.apache.flink.metrics.groups.OperatorMetricGroup) DataInputView(org.apache.flink.core.memory.DataInputView) SharedReference(org.apache.flink.testutils.junit.SharedReference) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) IsMapContaining(org.hamcrest.collection.IsMapContaining) Consumer(java.util.function.Consumer) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) Rule(org.junit.Rule) PartitionTestUtils(org.apache.flink.runtime.io.network.partition.PartitionTestUtils) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Collections(java.util.Collections) MockSource(org.apache.flink.api.connector.source.mocks.MockSource) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) LifeCycleMonitorMultipleInputOperatorFactory(org.apache.flink.streaming.runtime.tasks.MultipleInputStreamTaskChainedSourcesCheckpointingTest.LifeCycleMonitorMultipleInputOperatorFactory) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) CompletingCheckpointResponder(org.apache.flink.streaming.util.CompletingCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) CompletableFuture(java.util.concurrent.CompletableFuture) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with CheckpointResponder

use of org.apache.flink.runtime.taskmanager.CheckpointResponder in project flink by apache.

the class SourceStreamTaskTest method testTriggeringStopWithSavepointWithDrain.

@Test
public void testTriggeringStopWithSavepointWithDrain() throws Exception {
    SourceFunction<String> testSource = new EmptySource();
    CompletableFuture<Boolean> checkpointCompleted = new CompletableFuture<>();
    CheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            checkpointCompleted.complete(null);
        }
    };
    try (StreamTaskMailboxTestHarness<String> harness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, STRING_TYPE_INFO).setTaskStateSnapshot(1, TaskStateSnapshot.FINISHED_ON_RESTORE).setCheckpointResponder(checkpointResponder).setupOutputForSingletonOperatorChain(new StreamSource<>(testSource)).build()) {
        CompletableFuture<Boolean> triggerResult = harness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(2, 2), CheckpointOptions.alignedNoTimeout(SavepointType.terminate(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
        checkpointCompleted.whenComplete((ignored, exception) -> harness.streamTask.notifyCheckpointCompleteAsync(2));
        // Run mailbox till the source thread finished and suspend the mailbox
        harness.streamTask.runMailboxLoop();
        harness.finishProcessing();
        assertTrue(triggerResult.isDone());
        assertTrue(triggerResult.get());
        assertTrue(checkpointCompleted.isDone());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CompletableFuture(java.util.concurrent.CompletableFuture) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with CheckpointResponder

use of org.apache.flink.runtime.taskmanager.CheckpointResponder in project flink by apache.

the class RocksDBAsyncSnapshotTest method testFullyAsyncSnapshot.

/**
 * This ensures that asynchronous state handles are actually materialized asynchronously.
 *
 * <p>We use latches to block at various stages and see if the code still continues through the
 * parts that are not asynchronous. If the checkpoint is not done asynchronously the test will
 * simply lock forever.
 */
@Test
public void testFullyAsyncSnapshot() throws Exception {
    final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<>(OneInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.configureForKeyedStream(new KeySelector<String, String>() {

        @Override
        public String getKey(String value) throws Exception {
            return value;
        }
    }, BasicTypeInfo.STRING_TYPE_INFO);
    StreamConfig streamConfig = testHarness.getStreamConfig();
    File dbDir = temporaryFolder.newFolder();
    RocksDBStateBackend backend = new RocksDBStateBackend(new MemoryStateBackend());
    backend.setDbStoragePath(dbDir.getAbsolutePath());
    streamConfig.setStateBackend(backend);
    streamConfig.setStreamOperator(new AsyncCheckpointOperator());
    streamConfig.setOperatorID(new OperatorID());
    final OneShotLatch delayCheckpointLatch = new OneShotLatch();
    final OneShotLatch ensureCheckpointLatch = new OneShotLatch();
    CheckpointResponder checkpointResponderMock = new CheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            // even though the async checkpoint would not finish
            try {
                delayCheckpointLatch.await();
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
            boolean hasManagedKeyedState = false;
            for (Map.Entry<OperatorID, OperatorSubtaskState> entry : subtaskState.getSubtaskStateMappings()) {
                OperatorSubtaskState state = entry.getValue();
                if (state != null) {
                    hasManagedKeyedState |= state.getManagedKeyedState() != null;
                }
            }
            // should be one k/v state
            assertTrue(hasManagedKeyedState);
            // we now know that the checkpoint went through
            ensureCheckpointLatch.trigger();
        }

        @Override
        public void reportCheckpointMetrics(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics) {
        }

        @Override
        public void declineCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointException checkpointException) {
        }
    };
    JobID jobID = new JobID();
    ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
    TestTaskStateManager taskStateManagerTestMock = new TestTaskStateManager(jobID, executionAttemptID, checkpointResponderMock, TestLocalRecoveryConfig.disabled(), new InMemoryStateChangelogStorage(), new HashMap<>(), -1L, new OneShotLatch());
    StreamMockEnvironment mockEnv = new StreamMockEnvironment(testHarness.jobConfig, testHarness.taskConfig, testHarness.memorySize, new MockInputSplitProvider(), testHarness.bufferSize, taskStateManagerTestMock);
    AtomicReference<Throwable> errorRef = new AtomicReference<>();
    mockEnv.setExternalExceptionHandler(errorRef::set);
    testHarness.invoke(mockEnv);
    testHarness.waitForTaskRunning();
    final OneInputStreamTask<String, String> task = testHarness.getTask();
    task.triggerCheckpointAsync(new CheckpointMetaData(42, 17), CheckpointOptions.forCheckpointWithDefaultLocation()).get();
    testHarness.processElement(new StreamRecord<>("Wohoo", 0));
    // now we allow the checkpoint
    delayCheckpointLatch.trigger();
    // wait for the checkpoint to go through
    ensureCheckpointLatch.await();
    testHarness.endInput();
    ExecutorService threadPool = task.getAsyncOperationsThreadPool();
    threadPool.shutdown();
    Assert.assertTrue(threadPool.awaitTermination(60_000, TimeUnit.MILLISECONDS));
    testHarness.waitForTaskCompletion();
    if (errorRef.get() != null) {
        fail("Unexpected exception during execution.");
    }
}
Also used : OneInputStreamTask(org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) StreamMockEnvironment(org.apache.flink.streaming.runtime.tasks.StreamMockEnvironment) MockInputSplitProvider(org.apache.flink.runtime.operators.testutils.MockInputSplitProvider) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) AtomicReference(java.util.concurrent.atomic.AtomicReference) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) OneInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.OneInputStreamTaskTestHarness) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)11 JobID (org.apache.flink.api.common.JobID)9 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)8 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)7 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)7 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)7 Test (org.junit.Test)7 IOException (java.io.IOException)6 CompletableFuture (java.util.concurrent.CompletableFuture)4 ResultPartitionConsumableNotifier (org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier)4 TaskMetricGroup (org.apache.flink.runtime.metrics.groups.TaskMetricGroup)4 RpcCheckpointResponder (org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder)4 RpcResultPartitionConsumableNotifier (org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier)4 TaskManagerActions (org.apache.flink.runtime.taskmanager.TaskManagerActions)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 TestCheckpointResponder (org.apache.flink.runtime.taskmanager.TestCheckpointResponder)3 Serializable (java.io.Serializable)2 Duration (java.time.Duration)2