Search in sources :

Example 1 with TaskStateManagerImpl

use of org.apache.flink.runtime.state.TaskStateManagerImpl in project flink by apache.

the class TaskExecutor method submitTask.

// ----------------------------------------------------------------------
// Task lifecycle RPCs
// ----------------------------------------------------------------------
@Override
public CompletableFuture<Acknowledge> submitTask(TaskDeploymentDescriptor tdd, JobMasterId jobMasterId, Time timeout) {
    try {
        final JobID jobId = tdd.getJobId();
        final ExecutionAttemptID executionAttemptID = tdd.getExecutionAttemptId();
        final JobTable.Connection jobManagerConnection = jobTable.getConnection(jobId).orElseThrow(() -> {
            final String message = "Could not submit task because there is no JobManager " + "associated for the job " + jobId + '.';
            log.debug(message);
            return new TaskSubmissionException(message);
        });
        if (!Objects.equals(jobManagerConnection.getJobMasterId(), jobMasterId)) {
            final String message = "Rejecting the task submission because the job manager leader id " + jobMasterId + " does not match the expected job manager leader id " + jobManagerConnection.getJobMasterId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
        if (!taskSlotTable.tryMarkSlotActive(jobId, tdd.getAllocationId())) {
            final String message = "No task slot allocated for job ID " + jobId + " and allocation ID " + tdd.getAllocationId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
        // re-integrate offloaded data:
        try {
            tdd.loadBigData(taskExecutorBlobService.getPermanentBlobService());
        } catch (IOException | ClassNotFoundException e) {
            throw new TaskSubmissionException("Could not re-integrate offloaded TaskDeploymentDescriptor data.", e);
        }
        // deserialize the pre-serialized information
        final JobInformation jobInformation;
        final TaskInformation taskInformation;
        try {
            jobInformation = tdd.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
            taskInformation = tdd.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
        } catch (IOException | ClassNotFoundException e) {
            throw new TaskSubmissionException("Could not deserialize the job or task information.", e);
        }
        if (!jobId.equals(jobInformation.getJobId())) {
            throw new TaskSubmissionException("Inconsistent job ID information inside TaskDeploymentDescriptor (" + tdd.getJobId() + " vs. " + jobInformation.getJobId() + ")");
        }
        TaskManagerJobMetricGroup jobGroup = taskManagerMetricGroup.addJob(jobInformation.getJobId(), jobInformation.getJobName());
        // note that a pre-existing job group can NOT be closed concurrently - this is done by
        // the same TM thread in removeJobMetricsGroup
        TaskMetricGroup taskMetricGroup = jobGroup.addTask(taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskInformation.getTaskName(), tdd.getSubtaskIndex(), tdd.getAttemptNumber());
        InputSplitProvider inputSplitProvider = new RpcInputSplitProvider(jobManagerConnection.getJobManagerGateway(), taskInformation.getJobVertexId(), tdd.getExecutionAttemptId(), taskManagerConfiguration.getRpcTimeout());
        final TaskOperatorEventGateway taskOperatorEventGateway = new RpcTaskOperatorEventGateway(jobManagerConnection.getJobManagerGateway(), executionAttemptID, (t) -> runAsync(() -> failTask(executionAttemptID, t)));
        TaskManagerActions taskManagerActions = jobManagerConnection.getTaskManagerActions();
        CheckpointResponder checkpointResponder = jobManagerConnection.getCheckpointResponder();
        GlobalAggregateManager aggregateManager = jobManagerConnection.getGlobalAggregateManager();
        LibraryCacheManager.ClassLoaderHandle classLoaderHandle = jobManagerConnection.getClassLoaderHandle();
        ResultPartitionConsumableNotifier resultPartitionConsumableNotifier = jobManagerConnection.getResultPartitionConsumableNotifier();
        PartitionProducerStateChecker partitionStateChecker = jobManagerConnection.getPartitionStateChecker();
        final TaskLocalStateStore localStateStore = localStateStoresManager.localStateStoreForSubtask(jobId, tdd.getAllocationId(), taskInformation.getJobVertexId(), tdd.getSubtaskIndex());
        // TODO: Pass config value from user program and do overriding here.
        final StateChangelogStorage<?> changelogStorage;
        try {
            changelogStorage = changelogStoragesManager.stateChangelogStorageForJob(jobId, taskManagerConfiguration.getConfiguration(), jobGroup);
        } catch (IOException e) {
            throw new TaskSubmissionException(e);
        }
        final JobManagerTaskRestore taskRestore = tdd.getTaskRestore();
        final TaskStateManager taskStateManager = new TaskStateManagerImpl(jobId, tdd.getExecutionAttemptId(), localStateStore, changelogStorage, taskRestore, checkpointResponder);
        MemoryManager memoryManager;
        try {
            memoryManager = taskSlotTable.getTaskMemoryManager(tdd.getAllocationId());
        } catch (SlotNotFoundException e) {
            throw new TaskSubmissionException("Could not submit task.", e);
        }
        Task task = new Task(jobInformation, taskInformation, tdd.getExecutionAttemptId(), tdd.getAllocationId(), tdd.getSubtaskIndex(), tdd.getAttemptNumber(), tdd.getProducedPartitions(), tdd.getInputGates(), memoryManager, taskExecutorServices.getIOManager(), taskExecutorServices.getShuffleEnvironment(), taskExecutorServices.getKvStateService(), taskExecutorServices.getBroadcastVariableManager(), taskExecutorServices.getTaskEventDispatcher(), externalResourceInfoProvider, taskStateManager, taskManagerActions, inputSplitProvider, checkpointResponder, taskOperatorEventGateway, aggregateManager, classLoaderHandle, fileCache, taskManagerConfiguration, taskMetricGroup, resultPartitionConsumableNotifier, partitionStateChecker, getRpcService().getScheduledExecutor());
        taskMetricGroup.gauge(MetricNames.IS_BACK_PRESSURED, task::isBackPressured);
        log.info("Received task {} ({}), deploy into slot with allocation id {}.", task.getTaskInfo().getTaskNameWithSubtasks(), tdd.getExecutionAttemptId(), tdd.getAllocationId());
        boolean taskAdded;
        try {
            taskAdded = taskSlotTable.addTask(task);
        } catch (SlotNotFoundException | SlotNotActiveException e) {
            throw new TaskSubmissionException("Could not submit task.", e);
        }
        if (taskAdded) {
            task.startTaskThread();
            setupResultPartitionBookkeeping(tdd.getJobId(), tdd.getProducedPartitions(), task.getTerminationFuture());
            return CompletableFuture.completedFuture(Acknowledge.get());
        } else {
            final String message = "TaskManager already contains a task for id " + task.getExecutionId() + '.';
            log.debug(message);
            throw new TaskSubmissionException(message);
        }
    } catch (TaskSubmissionException e) {
        return FutureUtils.completedExceptionally(e);
    }
}
Also used : SlotNotFoundException(org.apache.flink.runtime.taskexecutor.slot.SlotNotFoundException) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) Task(org.apache.flink.runtime.taskmanager.Task) SlotNotActiveException(org.apache.flink.runtime.taskexecutor.slot.SlotNotActiveException) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) RpcTaskOperatorEventGateway(org.apache.flink.runtime.taskexecutor.rpc.RpcTaskOperatorEventGateway) TaskOperatorEventGateway(org.apache.flink.runtime.jobgraph.tasks.TaskOperatorEventGateway) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) RpcTaskOperatorEventGateway(org.apache.flink.runtime.taskexecutor.rpc.RpcTaskOperatorEventGateway) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) TaskSubmissionException(org.apache.flink.runtime.taskexecutor.exceptions.TaskSubmissionException) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) RpcInputSplitProvider(org.apache.flink.runtime.taskexecutor.rpc.RpcInputSplitProvider) ResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier) RpcResultPartitionConsumableNotifier(org.apache.flink.runtime.taskexecutor.rpc.RpcResultPartitionConsumableNotifier) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) TaskLocalStateStore(org.apache.flink.runtime.state.TaskLocalStateStore) TaskMetricGroup(org.apache.flink.runtime.metrics.groups.TaskMetricGroup) RpcCheckpointResponder(org.apache.flink.runtime.taskexecutor.rpc.RpcCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskManagerJobMetricGroup(org.apache.flink.runtime.metrics.groups.TaskManagerJobMetricGroup) IOException(java.io.IOException) LibraryCacheManager(org.apache.flink.runtime.execution.librarycache.LibraryCacheManager) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) RpcGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.rpc.RpcGlobalAggregateManager) JobID(org.apache.flink.api.common.JobID)

Example 2 with TaskStateManagerImpl

use of org.apache.flink.runtime.state.TaskStateManagerImpl in project flink by apache.

the class StateInitializationContextImplTest method setUp.

@Before
public void setUp() throws Exception {
    this.writtenKeyGroups = 0;
    this.writtenOperatorStates = new HashSet<>();
    this.closableRegistry = new CloseableRegistry();
    ByteArrayOutputStreamWithPos out = new ByteArrayOutputStreamWithPos(64);
    List<KeyedStateHandle> keyedStateHandles = new ArrayList<>(NUM_HANDLES);
    int prev = 0;
    for (int i = 0; i < NUM_HANDLES; ++i) {
        out.reset();
        int size = i % 4;
        int end = prev + size;
        DataOutputView dov = new DataOutputViewStreamWrapper(out);
        KeyGroupRangeOffsets offsets = new KeyGroupRangeOffsets(i == 9 ? KeyGroupRange.EMPTY_KEY_GROUP_RANGE : new KeyGroupRange(prev, end));
        prev = end + 1;
        for (int kg : offsets.getKeyGroupRange()) {
            offsets.setKeyGroupOffset(kg, out.getPosition());
            dov.writeInt(kg);
            ++writtenKeyGroups;
        }
        KeyedStateHandle handle = new KeyGroupsStateHandle(offsets, new ByteStateHandleCloseChecking("kg-" + i, out.toByteArray()));
        keyedStateHandles.add(handle);
    }
    List<OperatorStateHandle> operatorStateHandles = new ArrayList<>(NUM_HANDLES);
    for (int i = 0; i < NUM_HANDLES; ++i) {
        int size = i % 4;
        out.reset();
        DataOutputView dov = new DataOutputViewStreamWrapper(out);
        LongArrayList offsets = new LongArrayList(size);
        for (int s = 0; s < size; ++s) {
            offsets.add(out.getPosition());
            int val = i * NUM_HANDLES + s;
            dov.writeInt(val);
            writtenOperatorStates.add(val);
        }
        Map<String, OperatorStateHandle.StateMetaInfo> offsetsMap = new HashMap<>();
        offsetsMap.put(DefaultOperatorStateBackend.DEFAULT_OPERATOR_STATE_NAME, new OperatorStateHandle.StateMetaInfo(offsets.toArray(), OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
        OperatorStateHandle operatorStateHandle = new OperatorStreamStateHandle(offsetsMap, new ByteStateHandleCloseChecking("os-" + i, out.toByteArray()));
        operatorStateHandles.add(operatorStateHandle);
    }
    OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setRawOperatorState(new StateObjectCollection<>(operatorStateHandles)).setRawKeyedState(new StateObjectCollection<>(keyedStateHandles)).build();
    OperatorID operatorID = new OperatorID();
    TaskStateSnapshot taskStateSnapshot = new TaskStateSnapshot();
    taskStateSnapshot.putSubtaskStateByOperatorID(operatorID, operatorSubtaskState);
    JobManagerTaskRestore jobManagerTaskRestore = new JobManagerTaskRestore(0L, taskStateSnapshot);
    TaskStateManager manager = new TaskStateManagerImpl(new JobID(), new ExecutionAttemptID(), new TestTaskLocalStateStore(), new InMemoryStateChangelogStorage(), jobManagerTaskRestore, mock(CheckpointResponder.class));
    DummyEnvironment environment = new DummyEnvironment("test", 1, 0, prev);
    environment.setTaskStateManager(manager);
    StateBackend stateBackend = new MemoryStateBackend(1024);
    StreamTaskStateInitializer streamTaskStateManager = new StreamTaskStateInitializerImpl(environment, stateBackend, TtlTimeProvider.DEFAULT, new InternalTimeServiceManager.Provider() {

        @Override
        public <K> InternalTimeServiceManager<K> create(CheckpointableKeyedStateBackend<K> keyedStatedBackend, ClassLoader userClassloader, KeyContext keyContext, ProcessingTimeService processingTimeService, Iterable<KeyGroupStatePartitionStreamProvider> rawKeyedStates) throws Exception {
            // stream.
            return null;
        }
    });
    AbstractStreamOperator<?> mockOperator = mock(AbstractStreamOperator.class);
    when(mockOperator.getOperatorID()).thenReturn(operatorID);
    StreamOperatorStateContext stateContext = streamTaskStateManager.streamOperatorStateContext(operatorID, "TestOperatorClass", mock(ProcessingTimeService.class), mockOperator, // consumed by the timer service.
    IntSerializer.INSTANCE, closableRegistry, new UnregisteredMetricsGroup(), 1.0, false);
    OptionalLong restoredCheckpointId = stateContext.getRestoredCheckpointId();
    this.initializationContext = new StateInitializationContextImpl(restoredCheckpointId.isPresent() ? restoredCheckpointId.getAsLong() : null, stateContext.operatorStateBackend(), mock(KeyedStateStore.class), stateContext.rawKeyedStateInputs(), stateContext.rawOperatorStateInputs());
}
Also used : HashMap(java.util.HashMap) KeyGroupRangeOffsets(org.apache.flink.runtime.state.KeyGroupRangeOffsets) LongArrayList(org.apache.flink.runtime.util.LongArrayList) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) StateBackend(org.apache.flink.runtime.state.StateBackend) DefaultOperatorStateBackend(org.apache.flink.runtime.state.DefaultOperatorStateBackend) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) CheckpointableKeyedStateBackend(org.apache.flink.runtime.state.CheckpointableKeyedStateBackend) KeyGroupStatePartitionStreamProvider(org.apache.flink.runtime.state.KeyGroupStatePartitionStreamProvider) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) ByteArrayOutputStreamWithPos(org.apache.flink.core.memory.ByteArrayOutputStreamWithPos) TestTaskLocalStateStore(org.apache.flink.runtime.state.TestTaskLocalStateStore) DataOutputView(org.apache.flink.core.memory.DataOutputView) StateObjectCollection(org.apache.flink.runtime.checkpoint.StateObjectCollection) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) StateInitializationContextImpl(org.apache.flink.runtime.state.StateInitializationContextImpl) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) ProcessingTimeService(org.apache.flink.streaming.runtime.tasks.ProcessingTimeService) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) LongArrayList(org.apache.flink.runtime.util.LongArrayList) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) DummyEnvironment(org.apache.flink.runtime.operators.testutils.DummyEnvironment) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) IOException(java.io.IOException) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) OptionalLong(java.util.OptionalLong) Before(org.junit.Before)

Example 3 with TaskStateManagerImpl

use of org.apache.flink.runtime.state.TaskStateManagerImpl in project flink by apache.

the class LocalStateForwardingTest method testReportingFromTaskStateManagerToResponderAndTaskLocalStateStore.

/**
 * This tests that state that was reported to the {@link
 * org.apache.flink.runtime.state.TaskStateManager} is also reported to {@link
 * org.apache.flink.runtime.taskmanager.CheckpointResponder} and {@link
 * TaskLocalStateStoreImpl}.
 */
@Test
public void testReportingFromTaskStateManagerToResponderAndTaskLocalStateStore() throws Exception {
    final JobID jobID = new JobID();
    final AllocationID allocationID = new AllocationID();
    final ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
    final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(42L, 4711L);
    final CheckpointMetrics checkpointMetrics = new CheckpointMetrics();
    final int subtaskIdx = 42;
    JobVertexID jobVertexID = new JobVertexID();
    TaskStateSnapshot jmSnapshot = new TaskStateSnapshot();
    TaskStateSnapshot tmSnapshot = new TaskStateSnapshot();
    final AtomicBoolean jmReported = new AtomicBoolean(false);
    final AtomicBoolean tmReported = new AtomicBoolean(false);
    TestCheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID lJobID, ExecutionAttemptID lExecutionAttemptID, long lCheckpointId, CheckpointMetrics lCheckpointMetrics, TaskStateSnapshot lSubtaskState) {
            Assert.assertEquals(jobID, lJobID);
            Assert.assertEquals(executionAttemptID, lExecutionAttemptID);
            Assert.assertEquals(checkpointMetaData.getCheckpointId(), lCheckpointId);
            Assert.assertEquals(checkpointMetrics, lCheckpointMetrics);
            jmReported.set(true);
        }
    };
    Executor executor = Executors.directExecutor();
    LocalRecoveryDirectoryProviderImpl directoryProvider = new LocalRecoveryDirectoryProviderImpl(temporaryFolder.newFolder(), jobID, jobVertexID, subtaskIdx);
    LocalRecoveryConfig localRecoveryConfig = new LocalRecoveryConfig(directoryProvider);
    TaskLocalStateStore taskLocalStateStore = new TaskLocalStateStoreImpl(jobID, allocationID, jobVertexID, subtaskIdx, localRecoveryConfig, executor) {

        @Override
        public void storeLocalState(@Nonnegative long checkpointId, @Nullable TaskStateSnapshot localState) {
            Assert.assertEquals(tmSnapshot, localState);
            tmReported.set(true);
        }
    };
    StateChangelogStorage<?> stateChangelogStorage = new InMemoryStateChangelogStorage();
    TaskStateManagerImpl taskStateManager = new TaskStateManagerImpl(jobID, executionAttemptID, taskLocalStateStore, stateChangelogStorage, null, checkpointResponder);
    taskStateManager.reportTaskStateSnapshots(checkpointMetaData, checkpointMetrics, jmSnapshot, tmSnapshot);
    Assert.assertTrue("Reporting for JM state was not called.", jmReported.get());
    Assert.assertTrue("Reporting for TM state was not called.", tmReported.get());
}
Also used : TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskLocalStateStore(org.apache.flink.runtime.state.TaskLocalStateStore) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) LocalRecoveryDirectoryProviderImpl(org.apache.flink.runtime.state.LocalRecoveryDirectoryProviderImpl) LocalRecoveryConfig(org.apache.flink.runtime.state.LocalRecoveryConfig) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) Executor(java.util.concurrent.Executor) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) Nonnegative(javax.annotation.Nonnegative) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 4 with TaskStateManagerImpl

use of org.apache.flink.runtime.state.TaskStateManagerImpl in project flink by apache.

the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseAfterAcknowledge.

/**
 * FLINK-5667
 *
 * <p>Tests that a concurrent cancel operation does not discard the state handles of an
 * acknowledged checkpoint. The situation can only happen if the cancel call is executed after
 * Environment.acknowledgeCheckpoint() and before the CloseableRegistry.unregisterClosable()
 * call.
 */
@Test
public void testAsyncCheckpointingConcurrentCloseAfterAcknowledge() throws Exception {
    final OneShotLatch acknowledgeCheckpointLatch = new OneShotLatch();
    final OneShotLatch completeAcknowledge = new OneShotLatch();
    CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) {
            acknowledgeCheckpointLatch.trigger();
            // block here so that we can issue the concurrent cancel call
            while (true) {
                try {
                    // wait until we successfully await (no pun intended)
                    completeAcknowledge.await();
                    // when await() returns normally, we break out of the loop
                    break;
                } catch (InterruptedException e) {
                // survive interruptions that arise from thread pool
                // shutdown
                // production code cannot actually throw
                // InterruptedException from
                // checkpoint acknowledgement
                }
            }
            return null;
        }
    }).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), any(TaskStateSnapshot.class));
    TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
    KeyedStateHandle managedKeyedStateHandle = mock(KeyedStateHandle.class);
    KeyedStateHandle rawKeyedStateHandle = mock(KeyedStateHandle.class);
    OperatorStateHandle managedOperatorStateHandle = mock(OperatorStreamStateHandle.class);
    OperatorStateHandle rawOperatorStateHandle = mock(OperatorStreamStateHandle.class);
    OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(rawKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskName("mock-task").setTaskStateManager(taskStateManager).build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult))));
        MockStreamTask streamTask = task.streamTask;
        waitTaskIsRunning(streamTask, task.invocationFuture);
        final long checkpointId = 42L;
        streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
        acknowledgeCheckpointLatch.await();
        ArgumentCaptor<TaskStateSnapshot> subtaskStateCaptor = ArgumentCaptor.forClass(TaskStateSnapshot.class);
        // check that the checkpoint has been completed
        verify(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), eq(checkpointId), any(CheckpointMetrics.class), subtaskStateCaptor.capture());
        TaskStateSnapshot subtaskStates = subtaskStateCaptor.getValue();
        OperatorSubtaskState subtaskState = subtaskStates.getSubtaskStateMappings().iterator().next().getValue();
        // check that the subtask state contains the expected state handles
        assertEquals(singleton(managedKeyedStateHandle), subtaskState.getManagedKeyedState());
        assertEquals(singleton(rawKeyedStateHandle), subtaskState.getRawKeyedState());
        assertEquals(singleton(managedOperatorStateHandle), subtaskState.getManagedOperatorState());
        assertEquals(singleton(rawOperatorStateHandle), subtaskState.getRawOperatorState());
        // check that the state handles have not been discarded
        verify(managedKeyedStateHandle, never()).discardState();
        verify(rawKeyedStateHandle, never()).discardState();
        verify(managedOperatorStateHandle, never()).discardState();
        verify(rawOperatorStateHandle, never()).discardState();
        streamTask.cancel();
        completeAcknowledge.trigger();
        // canceling the stream task after it has acknowledged the checkpoint should not discard
        // the state handles
        verify(managedKeyedStateHandle, never()).discardState();
        verify(rawKeyedStateHandle, never()).discardState();
        verify(managedOperatorStateHandle, never()).discardState();
        verify(rawOperatorStateHandle, never()).discardState();
        task.waitForTaskCompletion(true);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with TaskStateManagerImpl

use of org.apache.flink.runtime.state.TaskStateManagerImpl in project flink by apache.

the class StreamTaskTest method testEmptySubtaskStateLeadsToStatelessAcknowledgment.

/**
 * FLINK-5985
 *
 * <p>This test ensures that empty snapshots (no op/keyed stated whatsoever) will be reported as
 * stateless tasks. This happens by translating an empty {@link SubtaskState} into reporting
 * 'null' to #acknowledgeCheckpoint.
 */
@Test
public void testEmptySubtaskStateLeadsToStatelessAcknowledgment() throws Exception {
    // latch blocks until the async checkpoint thread acknowledges
    final OneShotLatch checkpointCompletedLatch = new OneShotLatch();
    final List<SubtaskState> checkpointResult = new ArrayList<>(1);
    CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) throws Throwable {
            SubtaskState subtaskState = invocation.getArgument(4);
            checkpointResult.add(subtaskState);
            checkpointCompletedLatch.trigger();
            return null;
        }
    }).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), nullable(TaskStateSnapshot.class));
    TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
    // mock the operator with empty snapshot result (all state handles are null)
    OneInputStreamOperator<String, String> statelessOperator = streamOperatorWithSnapshot(new OperatorSnapshotFutures());
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskStateManager(taskStateManager).build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(statelessOperator)));
        waitTaskIsRunning(task.streamTask, task.invocationFuture);
        task.streamTask.triggerCheckpointAsync(new CheckpointMetaData(42L, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
        checkpointCompletedLatch.await(30, TimeUnit.SECONDS);
        // ensure that 'null' was acknowledged as subtask state
        Assert.assertNull(checkpointResult.get(0));
        task.streamTask.cancel();
        task.waitForTaskCompletion(true);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) ArrayList(java.util.ArrayList) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) InvocationOnMock(org.mockito.invocation.InvocationOnMock) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

JobID (org.apache.flink.api.common.JobID)5 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)5 TaskStateManagerImpl (org.apache.flink.runtime.state.TaskStateManagerImpl)5 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)4 TaskStateManager (org.apache.flink.runtime.state.TaskStateManager)4 InMemoryStateChangelogStorage (org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage)4 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)4 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)3 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)3 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)2 JobManagerTaskRestore (org.apache.flink.runtime.checkpoint.JobManagerTaskRestore)2 MockEnvironment (org.apache.flink.runtime.operators.testutils.MockEnvironment)2 MockEnvironmentBuilder (org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder)2 TaskLocalStateStore (org.apache.flink.runtime.state.TaskLocalStateStore)2 TaskLocalStateStoreImpl (org.apache.flink.runtime.state.TaskLocalStateStoreImpl)2 OperatorSnapshotFutures (org.apache.flink.streaming.api.operators.OperatorSnapshotFutures)2 Test (org.junit.Test)2