Search in sources :

Example 11 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class InterruptSensitiveRestoreTest method createTask.

// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
private static Task createTask(StreamConfig streamConfig, Configuration taskConfig, StreamStateHandle state, int mode) throws IOException {
    ShuffleEnvironment<?, ?> shuffleEnvironment = new NettyShuffleEnvironmentBuilder().build();
    Collection<KeyedStateHandle> keyedStateFromBackend = Collections.emptyList();
    Collection<KeyedStateHandle> keyedStateFromStream = Collections.emptyList();
    Collection<OperatorStateHandle> operatorStateBackend = Collections.emptyList();
    Collection<OperatorStateHandle> operatorStateStream = Collections.emptyList();
    Map<String, OperatorStateHandle.StateMetaInfo> operatorStateMetadata = new HashMap<>(1);
    OperatorStateHandle.StateMetaInfo metaInfo = new OperatorStateHandle.StateMetaInfo(new long[] { 0 }, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE);
    operatorStateMetadata.put(DefaultOperatorStateBackend.DEFAULT_OPERATOR_STATE_NAME, metaInfo);
    KeyGroupRangeOffsets keyGroupRangeOffsets = new KeyGroupRangeOffsets(new KeyGroupRange(0, 0));
    Collection<OperatorStateHandle> operatorStateHandles = Collections.singletonList(new OperatorStreamStateHandle(operatorStateMetadata, state));
    List<KeyedStateHandle> keyedStateHandles = Collections.singletonList(new KeyGroupsStateHandle(keyGroupRangeOffsets, state));
    switch(mode) {
        case OPERATOR_MANAGED:
            operatorStateBackend = operatorStateHandles;
            break;
        case OPERATOR_RAW:
            operatorStateStream = operatorStateHandles;
            break;
        case KEYED_MANAGED:
            keyedStateFromBackend = keyedStateHandles;
            break;
        case KEYED_RAW:
            keyedStateFromStream = keyedStateHandles;
            break;
        default:
            throw new IllegalArgumentException();
    }
    OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(new StateObjectCollection<>(operatorStateBackend)).setRawOperatorState(new StateObjectCollection<>(operatorStateStream)).setManagedKeyedState(new StateObjectCollection<>(keyedStateFromBackend)).setRawKeyedState(new StateObjectCollection<>(keyedStateFromStream)).build();
    JobVertexID jobVertexID = new JobVertexID();
    OperatorID operatorID = OperatorID.fromJobVertexID(jobVertexID);
    streamConfig.setOperatorID(operatorID);
    TaskStateSnapshot stateSnapshot = new TaskStateSnapshot();
    stateSnapshot.putSubtaskStateByOperatorID(operatorID, operatorSubtaskState);
    JobManagerTaskRestore taskRestore = new JobManagerTaskRestore(1L, stateSnapshot);
    JobInformation jobInformation = new JobInformation(new JobID(), "test job name", new SerializedValue<>(new ExecutionConfig()), new Configuration(), Collections.emptyList(), Collections.emptyList());
    TaskInformation taskInformation = new TaskInformation(jobVertexID, "test task name", 1, 1, SourceStreamTask.class.getName(), taskConfig);
    TestTaskStateManager taskStateManager = TestTaskStateManager.builder().setReportedCheckpointId(taskRestore.getRestoreCheckpointId()).setJobManagerTaskStateSnapshotsByCheckpointId(Collections.singletonMap(taskRestore.getRestoreCheckpointId(), taskRestore.getTaskStateSnapshot())).build();
    return new Task(jobInformation, taskInformation, new ExecutionAttemptID(), new AllocationID(), 0, 0, Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), mock(MemoryManager.class), mock(IOManager.class), shuffleEnvironment, new KvStateService(new KvStateRegistry(), null, null), mock(BroadcastVariableManager.class), new TaskEventDispatcher(), ExternalResourceInfoProvider.NO_EXTERNAL_RESOURCES, taskStateManager, mock(TaskManagerActions.class), mock(InputSplitProvider.class), mock(CheckpointResponder.class), new NoOpTaskOperatorEventGateway(), new TestGlobalAggregateManager(), TestingClassLoaderLease.newBuilder().build(), new FileCache(new String[] { EnvironmentInformation.getTemporaryFileDirectory() }, VoidPermanentBlobService.INSTANCE), new TestingTaskManagerRuntimeInfo(), UnregisteredMetricGroups.createUnregisteredTaskMetricGroup(), new NoOpResultPartitionConsumableNotifier(), mock(PartitionProducerStateChecker.class), mock(Executor.class));
}
Also used : KvStateRegistry(org.apache.flink.runtime.query.KvStateRegistry) Task(org.apache.flink.runtime.taskmanager.Task) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) KeyGroupRangeOffsets(org.apache.flink.runtime.state.KeyGroupRangeOffsets) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) NettyShuffleEnvironmentBuilder(org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) TaskManagerActions(org.apache.flink.runtime.taskmanager.TaskManagerActions) NoOpTaskOperatorEventGateway(org.apache.flink.runtime.taskmanager.NoOpTaskOperatorEventGateway) TestingTaskManagerRuntimeInfo(org.apache.flink.runtime.util.TestingTaskManagerRuntimeInfo) BroadcastVariableManager(org.apache.flink.runtime.broadcast.BroadcastVariableManager) PartitionProducerStateChecker(org.apache.flink.runtime.taskexecutor.PartitionProducerStateChecker) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) TestGlobalAggregateManager(org.apache.flink.runtime.taskexecutor.TestGlobalAggregateManager) FileCache(org.apache.flink.runtime.filecache.FileCache) StateObjectCollection(org.apache.flink.runtime.checkpoint.StateObjectCollection) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) KvStateService(org.apache.flink.runtime.taskexecutor.KvStateService) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) Executor(java.util.concurrent.Executor) InputSplitProvider(org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) NoOpResultPartitionConsumableNotifier(org.apache.flink.runtime.io.network.partition.NoOpResultPartitionConsumableNotifier) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) TaskEventDispatcher(org.apache.flink.runtime.io.network.TaskEventDispatcher)

Example 12 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class SourceStreamTaskTest method testTriggeringStopWithSavepointWithDrain.

@Test
public void testTriggeringStopWithSavepointWithDrain() throws Exception {
    SourceFunction<String> testSource = new EmptySource();
    CompletableFuture<Boolean> checkpointCompleted = new CompletableFuture<>();
    CheckpointResponder checkpointResponder = new TestCheckpointResponder() {

        @Override
        public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
            super.acknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, subtaskState);
            checkpointCompleted.complete(null);
        }
    };
    try (StreamTaskMailboxTestHarness<String> harness = new StreamTaskMailboxTestHarnessBuilder<>(SourceStreamTask::new, STRING_TYPE_INFO).setTaskStateSnapshot(1, TaskStateSnapshot.FINISHED_ON_RESTORE).setCheckpointResponder(checkpointResponder).setupOutputForSingletonOperatorChain(new StreamSource<>(testSource)).build()) {
        CompletableFuture<Boolean> triggerResult = harness.streamTask.triggerCheckpointAsync(new CheckpointMetaData(2, 2), CheckpointOptions.alignedNoTimeout(SavepointType.terminate(SavepointFormatType.CANONICAL), CheckpointStorageLocationReference.getDefault()));
        checkpointCompleted.whenComplete((ignored, exception) -> harness.streamTask.notifyCheckpointCompleteAsync(2));
        // Run mailbox till the source thread finished and suspend the mailbox
        harness.streamTask.runMailboxLoop();
        harness.finishProcessing();
        assertTrue(triggerResult.isDone());
        assertTrue(triggerResult.get());
        assertTrue(checkpointCompleted.isDone());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CompletableFuture(java.util.concurrent.CompletableFuture) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 13 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class CoordinatorEventsExactlyOnceITCase method createSnapshot.

static TaskStateSnapshot createSnapshot(StreamStateHandle handle, OperatorID operatorId) {
    final OperatorStateHandle.StateMetaInfo metaInfo = new OperatorStateHandle.StateMetaInfo(new long[] { 0 }, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE);
    final OperatorStateHandle state = new OperatorStreamStateHandle(Collections.singletonMap("état_et_moi_:_ça_fait_deux", metaInfo), handle);
    final OperatorSubtaskState oss = OperatorSubtaskState.builder().setManagedOperatorState(state).build();
    return new TaskStateSnapshot(Collections.singletonMap(operatorId, oss));
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) PrioritizedOperatorSubtaskState(org.apache.flink.runtime.checkpoint.PrioritizedOperatorSubtaskState) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState)

Example 14 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class CheckpointMessagesTest method testConfirmTaskCheckpointed.

@Test
public void testConfirmTaskCheckpointed() {
    final Random rnd = new Random();
    try {
        AcknowledgeCheckpoint noState = new AcknowledgeCheckpoint(new JobID(), new ExecutionAttemptID(), 569345L);
        KeyGroupRange keyGroupRange = KeyGroupRange.of(42, 42);
        TaskStateSnapshot checkpointStateHandles = new TaskStateSnapshot();
        OperatorSubtaskState subtaskState = OperatorSubtaskState.builder().setManagedOperatorState(generatePartitionableStateHandle(new JobVertexID(), 0, 2, 8, false)).setManagedKeyedState(generateKeyGroupState(keyGroupRange, Collections.singletonList(new MyHandle()))).setInputChannelState(singleton(createNewInputChannelStateHandle(10, rnd))).setResultSubpartitionState(singleton(createNewResultSubpartitionStateHandle(10, rnd))).build();
        checkpointStateHandles.putSubtaskStateByOperatorID(new OperatorID(), subtaskState);
        AcknowledgeCheckpoint withState = new AcknowledgeCheckpoint(new JobID(), new ExecutionAttemptID(), 87658976143L, new CheckpointMetrics(), checkpointStateHandles);
        testSerializabilityEqualsHashCode(noState);
        testSerializabilityEqualsHashCode(withState);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) Random(java.util.Random) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) JobID(org.apache.flink.api.common.JobID) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) IOException(java.io.IOException) Test(org.junit.Test)

Example 15 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class ChannelPersistenceITCase method upstreamBlocksAfterRecoveringState.

private void upstreamBlocksAfterRecoveringState(ResultPartitionType type) throws Exception {
    NetworkBufferPool networkBufferPool = new NetworkBufferPool(4, 1024);
    byte[] dataAfterRecovery = randomBytes(1024);
    try {
        BufferWritingResultPartition resultPartition = buildResultPartition(networkBufferPool, type, 0, 1);
        new SequentialChannelStateReaderImpl(new TaskStateSnapshot()).readOutputData(new BufferWritingResultPartition[] { resultPartition }, true);
        resultPartition.emitRecord(ByteBuffer.wrap(dataAfterRecovery), 0);
        ResultSubpartitionView view = resultPartition.createSubpartitionView(0, new NoOpBufferAvailablityListener());
        if (type != ResultPartitionType.PIPELINED_APPROXIMATE) {
            assertEquals(RECOVERY_COMPLETION, view.getNextBuffer().buffer().getDataType());
            assertNull(view.getNextBuffer());
            view.resumeConsumption();
        }
        assertArrayEquals(dataAfterRecovery, collectBytes(view.getNextBuffer().buffer()));
    } finally {
        networkBufferPool.destroy();
    }
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) BufferWritingResultPartition(org.apache.flink.runtime.io.network.partition.BufferWritingResultPartition) ResultSubpartitionView(org.apache.flink.runtime.io.network.partition.ResultSubpartitionView) SequentialChannelStateReaderImpl(org.apache.flink.runtime.checkpoint.channel.SequentialChannelStateReaderImpl) NoOpBufferAvailablityListener(org.apache.flink.runtime.io.network.partition.NoOpBufferAvailablityListener) NetworkBufferPool(org.apache.flink.runtime.io.network.buffer.NetworkBufferPool)

Aggregations

TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)42 Test (org.junit.Test)28 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)19 JobID (org.apache.flink.api.common.JobID)17 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)16 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)13 JobManagerTaskRestore (org.apache.flink.runtime.checkpoint.JobManagerTaskRestore)13 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)13 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)12 TestCheckpointResponder (org.apache.flink.runtime.taskmanager.TestCheckpointResponder)9 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)8 IOException (java.io.IOException)7 HashMap (java.util.HashMap)6 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)6 CompletableFuture (java.util.concurrent.CompletableFuture)5 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)5 TestTaskStateManager (org.apache.flink.runtime.state.TestTaskStateManager)5 InMemoryStateChangelogStorage (org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4