Search in sources :

Example 1 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class JobMasterTester method createNonEmptyStateSnapshot.

private static TaskStateSnapshot createNonEmptyStateSnapshot(TaskInformation taskInformation) {
    final TaskStateSnapshot checkpointStateHandles = new TaskStateSnapshot();
    checkpointStateHandles.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(taskInformation.getJobVertexId()), OperatorSubtaskState.builder().setManagedOperatorState(new OperatorStreamStateHandle(Collections.emptyMap(), new ByteStreamStateHandle("foobar", new byte[0]))).build());
    return checkpointStateHandles;
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TaskStateSnapshot.serializeTaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot.serializeTaskStateSnapshot) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle)

Example 2 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class StateInitializationContextImplTest method setUp.

@Before
public void setUp() throws Exception {
    this.writtenKeyGroups = 0;
    this.writtenOperatorStates = new HashSet<>();
    this.closableRegistry = new CloseableRegistry();
    ByteArrayOutputStreamWithPos out = new ByteArrayOutputStreamWithPos(64);
    List<KeyedStateHandle> keyedStateHandles = new ArrayList<>(NUM_HANDLES);
    int prev = 0;
    for (int i = 0; i < NUM_HANDLES; ++i) {
        out.reset();
        int size = i % 4;
        int end = prev + size;
        DataOutputView dov = new DataOutputViewStreamWrapper(out);
        KeyGroupRangeOffsets offsets = new KeyGroupRangeOffsets(i == 9 ? KeyGroupRange.EMPTY_KEY_GROUP_RANGE : new KeyGroupRange(prev, end));
        prev = end + 1;
        for (int kg : offsets.getKeyGroupRange()) {
            offsets.setKeyGroupOffset(kg, out.getPosition());
            dov.writeInt(kg);
            ++writtenKeyGroups;
        }
        KeyedStateHandle handle = new KeyGroupsStateHandle(offsets, new ByteStateHandleCloseChecking("kg-" + i, out.toByteArray()));
        keyedStateHandles.add(handle);
    }
    List<OperatorStateHandle> operatorStateHandles = new ArrayList<>(NUM_HANDLES);
    for (int i = 0; i < NUM_HANDLES; ++i) {
        int size = i % 4;
        out.reset();
        DataOutputView dov = new DataOutputViewStreamWrapper(out);
        LongArrayList offsets = new LongArrayList(size);
        for (int s = 0; s < size; ++s) {
            offsets.add(out.getPosition());
            int val = i * NUM_HANDLES + s;
            dov.writeInt(val);
            writtenOperatorStates.add(val);
        }
        Map<String, OperatorStateHandle.StateMetaInfo> offsetsMap = new HashMap<>();
        offsetsMap.put(DefaultOperatorStateBackend.DEFAULT_OPERATOR_STATE_NAME, new OperatorStateHandle.StateMetaInfo(offsets.toArray(), OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
        OperatorStateHandle operatorStateHandle = new OperatorStreamStateHandle(offsetsMap, new ByteStateHandleCloseChecking("os-" + i, out.toByteArray()));
        operatorStateHandles.add(operatorStateHandle);
    }
    OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setRawOperatorState(new StateObjectCollection<>(operatorStateHandles)).setRawKeyedState(new StateObjectCollection<>(keyedStateHandles)).build();
    OperatorID operatorID = new OperatorID();
    TaskStateSnapshot taskStateSnapshot = new TaskStateSnapshot();
    taskStateSnapshot.putSubtaskStateByOperatorID(operatorID, operatorSubtaskState);
    JobManagerTaskRestore jobManagerTaskRestore = new JobManagerTaskRestore(0L, taskStateSnapshot);
    TaskStateManager manager = new TaskStateManagerImpl(new JobID(), new ExecutionAttemptID(), new TestTaskLocalStateStore(), new InMemoryStateChangelogStorage(), jobManagerTaskRestore, mock(CheckpointResponder.class));
    DummyEnvironment environment = new DummyEnvironment("test", 1, 0, prev);
    environment.setTaskStateManager(manager);
    StateBackend stateBackend = new MemoryStateBackend(1024);
    StreamTaskStateInitializer streamTaskStateManager = new StreamTaskStateInitializerImpl(environment, stateBackend, TtlTimeProvider.DEFAULT, new InternalTimeServiceManager.Provider() {

        @Override
        public <K> InternalTimeServiceManager<K> create(CheckpointableKeyedStateBackend<K> keyedStatedBackend, ClassLoader userClassloader, KeyContext keyContext, ProcessingTimeService processingTimeService, Iterable<KeyGroupStatePartitionStreamProvider> rawKeyedStates) throws Exception {
            // stream.
            return null;
        }
    });
    AbstractStreamOperator<?> mockOperator = mock(AbstractStreamOperator.class);
    when(mockOperator.getOperatorID()).thenReturn(operatorID);
    StreamOperatorStateContext stateContext = streamTaskStateManager.streamOperatorStateContext(operatorID, "TestOperatorClass", mock(ProcessingTimeService.class), mockOperator, // consumed by the timer service.
    IntSerializer.INSTANCE, closableRegistry, new UnregisteredMetricsGroup(), 1.0, false);
    OptionalLong restoredCheckpointId = stateContext.getRestoredCheckpointId();
    this.initializationContext = new StateInitializationContextImpl(restoredCheckpointId.isPresent() ? restoredCheckpointId.getAsLong() : null, stateContext.operatorStateBackend(), mock(KeyedStateStore.class), stateContext.rawKeyedStateInputs(), stateContext.rawOperatorStateInputs());
}
Also used : HashMap(java.util.HashMap) KeyGroupRangeOffsets(org.apache.flink.runtime.state.KeyGroupRangeOffsets) LongArrayList(org.apache.flink.runtime.util.LongArrayList) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) StateBackend(org.apache.flink.runtime.state.StateBackend) DefaultOperatorStateBackend(org.apache.flink.runtime.state.DefaultOperatorStateBackend) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) CheckpointableKeyedStateBackend(org.apache.flink.runtime.state.CheckpointableKeyedStateBackend) KeyGroupStatePartitionStreamProvider(org.apache.flink.runtime.state.KeyGroupStatePartitionStreamProvider) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) ByteArrayOutputStreamWithPos(org.apache.flink.core.memory.ByteArrayOutputStreamWithPos) TestTaskLocalStateStore(org.apache.flink.runtime.state.TestTaskLocalStateStore) DataOutputView(org.apache.flink.core.memory.DataOutputView) StateObjectCollection(org.apache.flink.runtime.checkpoint.StateObjectCollection) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) StateInitializationContextImpl(org.apache.flink.runtime.state.StateInitializationContextImpl) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) ProcessingTimeService(org.apache.flink.streaming.runtime.tasks.ProcessingTimeService) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) LongArrayList(org.apache.flink.runtime.util.LongArrayList) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) DummyEnvironment(org.apache.flink.runtime.operators.testutils.DummyEnvironment) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) IOException(java.io.IOException) DataOutputViewStreamWrapper(org.apache.flink.core.memory.DataOutputViewStreamWrapper) OptionalLong(java.util.OptionalLong) Before(org.junit.Before)

Example 3 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class AsyncWaitOperatorTest method testStateSnapshotAndRestore.

@Test
public void testStateSnapshotAndRestore() throws Exception {
    final OneInputStreamTaskTestHarness<Integer, Integer> testHarness = new OneInputStreamTaskTestHarness<>(OneInputStreamTask::new, 1, 1, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO);
    testHarness.setupOutputForSingletonOperatorChain();
    AsyncWaitOperatorFactory<Integer, Integer> factory = new AsyncWaitOperatorFactory<>(new LazyAsyncFunction(), TIMEOUT, 4, AsyncDataStream.OutputMode.ORDERED);
    final StreamConfig streamConfig = testHarness.getStreamConfig();
    OperatorID operatorID = new OperatorID(42L, 4711L);
    streamConfig.setStreamOperatorFactory(factory);
    streamConfig.setOperatorID(operatorID);
    final TestTaskStateManager taskStateManagerMock = testHarness.getTaskStateManager();
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    final OneInputStreamTask<Integer, Integer> task = testHarness.getTask();
    final long initialTime = 0L;
    testHarness.processElement(new StreamRecord<>(1, initialTime + 1));
    testHarness.processElement(new StreamRecord<>(2, initialTime + 2));
    testHarness.processElement(new StreamRecord<>(3, initialTime + 3));
    testHarness.processElement(new StreamRecord<>(4, initialTime + 4));
    testHarness.waitForInputProcessing();
    final long checkpointId = 1L;
    final long checkpointTimestamp = 1L;
    final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, checkpointTimestamp);
    task.triggerCheckpointAsync(checkpointMetaData, CheckpointOptions.forCheckpointWithDefaultLocation());
    taskStateManagerMock.getWaitForReportLatch().await();
    assertEquals(checkpointId, taskStateManagerMock.getReportedCheckpointId());
    LazyAsyncFunction.countDown();
    testHarness.endInput();
    testHarness.waitForTaskCompletion();
    // set the operator state from previous attempt into the restored one
    TaskStateSnapshot subtaskStates = taskStateManagerMock.getLastJobManagerTaskStateSnapshot();
    final OneInputStreamTaskTestHarness<Integer, Integer> restoredTaskHarness = new OneInputStreamTaskTestHarness<>(OneInputStreamTask::new, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO);
    restoredTaskHarness.setTaskStateSnapshot(checkpointId, subtaskStates);
    restoredTaskHarness.setupOutputForSingletonOperatorChain();
    AsyncWaitOperatorFactory<Integer, Integer> restoredOperator = new AsyncWaitOperatorFactory<>(new MyAsyncFunction(), TIMEOUT, 6, AsyncDataStream.OutputMode.ORDERED);
    restoredTaskHarness.getStreamConfig().setStreamOperatorFactory(restoredOperator);
    restoredTaskHarness.getStreamConfig().setOperatorID(operatorID);
    restoredTaskHarness.invoke();
    restoredTaskHarness.waitForTaskRunning();
    final OneInputStreamTask<Integer, Integer> restoredTask = restoredTaskHarness.getTask();
    restoredTaskHarness.processElement(new StreamRecord<>(5, initialTime + 5));
    restoredTaskHarness.processElement(new StreamRecord<>(6, initialTime + 6));
    restoredTaskHarness.processElement(new StreamRecord<>(7, initialTime + 7));
    // trigger the checkpoint while processing stream elements
    restoredTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, checkpointTimestamp), CheckpointOptions.forCheckpointWithDefaultLocation()).get();
    restoredTaskHarness.processElement(new StreamRecord<>(8, initialTime + 8));
    restoredTaskHarness.endInput();
    restoredTaskHarness.waitForTaskCompletion();
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    expectedOutput.add(new StreamRecord<>(2, initialTime + 1));
    expectedOutput.add(new StreamRecord<>(4, initialTime + 2));
    expectedOutput.add(new StreamRecord<>(6, initialTime + 3));
    expectedOutput.add(new StreamRecord<>(8, initialTime + 4));
    expectedOutput.add(new StreamRecord<>(10, initialTime + 5));
    expectedOutput.add(new StreamRecord<>(12, initialTime + 6));
    expectedOutput.add(new StreamRecord<>(14, initialTime + 7));
    expectedOutput.add(new StreamRecord<>(16, initialTime + 8));
    // remove CheckpointBarrier which is not expected
    restoredTaskHarness.getOutput().removeIf(record -> record instanceof CheckpointBarrier);
    TestHarnessUtil.assertOutputEquals("StateAndRestored Test Output was not correct.", expectedOutput, restoredTaskHarness.getOutput());
}
Also used : OneInputStreamTask(org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) CheckpointBarrier(org.apache.flink.runtime.io.network.api.CheckpointBarrier) TestTaskStateManager(org.apache.flink.runtime.state.TestTaskStateManager) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) OneInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.OneInputStreamTaskTestHarness) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 4 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class TaskStateManagerImplTest method testAcquringRestoreCheckpointId.

public void testAcquringRestoreCheckpointId() {
    TaskStateManagerImpl emptyStateManager = new TaskStateManagerImpl(new JobID(), new ExecutionAttemptID(), new TestTaskLocalStateStore(), null, null, new TestCheckpointResponder());
    Assert.assertFalse(emptyStateManager.getRestoreCheckpointId().isPresent());
    TaskStateManagerImpl nonEmptyStateManager = new TaskStateManagerImpl(new JobID(), new ExecutionAttemptID(), new TestTaskLocalStateStore(), null, new JobManagerTaskRestore(2, new TaskStateSnapshot()), new TestCheckpointResponder());
    Assert.assertEquals(2L, (long) nonEmptyStateManager.getRestoreCheckpointId().get());
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) TestCheckpointResponder(org.apache.flink.runtime.taskmanager.TestCheckpointResponder) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) JobID(org.apache.flink.api.common.JobID)

Example 5 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class TaskLocalStateStoreImplTest method deletesLocalStateIfRetrievalFails.

@Test
public void deletesLocalStateIfRetrievalFails() throws IOException {
    final TaskStateSnapshot taskStateSnapshot = createTaskStateSnapshot();
    final long checkpointId = 0L;
    taskLocalStateStore.storeLocalState(checkpointId, taskStateSnapshot);
    final File taskStateSnapshotFile = taskLocalStateStore.getTaskStateSnapshotFile(checkpointId);
    Files.write(taskStateSnapshotFile.toPath(), new byte[] { 1, 2, 3, 4 }, StandardOpenOption.WRITE);
    final TaskLocalStateStoreImpl newTaskLocalStateStore = createTaskLocalStateStoreImpl(allocationBaseDirs, jobID, allocationID, jobVertexID, subtaskIdx);
    assertThat(newTaskLocalStateStore.retrieveLocalState(checkpointId)).isNull();
    assertThat(taskStateSnapshotFile.getParentFile()).doesNotExist();
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) File(java.io.File) Test(org.junit.Test)

Aggregations

TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)42 Test (org.junit.Test)28 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)19 JobID (org.apache.flink.api.common.JobID)17 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)16 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)13 JobManagerTaskRestore (org.apache.flink.runtime.checkpoint.JobManagerTaskRestore)13 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)13 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)12 TestCheckpointResponder (org.apache.flink.runtime.taskmanager.TestCheckpointResponder)9 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)8 IOException (java.io.IOException)7 HashMap (java.util.HashMap)6 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)6 CompletableFuture (java.util.concurrent.CompletableFuture)5 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)5 TestTaskStateManager (org.apache.flink.runtime.state.TestTaskStateManager)5 InMemoryStateChangelogStorage (org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4