Search in sources :

Example 26 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class AsyncCheckpointRunnable method finalizeNonFinishedSnapshots.

private SnapshotsFinalizeResult finalizeNonFinishedSnapshots() throws Exception {
    TaskStateSnapshot jobManagerTaskOperatorSubtaskStates = new TaskStateSnapshot(operatorSnapshotsInProgress.size(), isTaskFinished);
    TaskStateSnapshot localTaskOperatorSubtaskStates = new TaskStateSnapshot(operatorSnapshotsInProgress.size(), isTaskFinished);
    long bytesPersistedDuringAlignment = 0;
    for (Map.Entry<OperatorID, OperatorSnapshotFutures> entry : operatorSnapshotsInProgress.entrySet()) {
        OperatorID operatorID = entry.getKey();
        OperatorSnapshotFutures snapshotInProgress = entry.getValue();
        // finalize the async part of all by executing all snapshot runnables
        OperatorSnapshotFinalizer finalizedSnapshots = new OperatorSnapshotFinalizer(snapshotInProgress);
        jobManagerTaskOperatorSubtaskStates.putSubtaskStateByOperatorID(operatorID, finalizedSnapshots.getJobManagerOwnedState());
        localTaskOperatorSubtaskStates.putSubtaskStateByOperatorID(operatorID, finalizedSnapshots.getTaskLocalState());
        bytesPersistedDuringAlignment += finalizedSnapshots.getJobManagerOwnedState().getResultSubpartitionState().getStateSize();
        bytesPersistedDuringAlignment += finalizedSnapshots.getJobManagerOwnedState().getInputChannelState().getStateSize();
    }
    return new SnapshotsFinalizeResult(jobManagerTaskOperatorSubtaskStates, localTaskOperatorSubtaskStates, bytesPersistedDuringAlignment);
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Map(java.util.Map) OperatorSnapshotFinalizer(org.apache.flink.streaming.api.operators.OperatorSnapshotFinalizer)

Example 27 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseAfterAcknowledge.

/**
 * FLINK-5667
 *
 * <p>Tests that a concurrent cancel operation does not discard the state handles of an
 * acknowledged checkpoint. The situation can only happen if the cancel call is executed after
 * Environment.acknowledgeCheckpoint() and before the CloseableRegistry.unregisterClosable()
 * call.
 */
@Test
public void testAsyncCheckpointingConcurrentCloseAfterAcknowledge() throws Exception {
    final OneShotLatch acknowledgeCheckpointLatch = new OneShotLatch();
    final OneShotLatch completeAcknowledge = new OneShotLatch();
    CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
    doAnswer(new Answer() {

        @Override
        public Object answer(InvocationOnMock invocation) {
            acknowledgeCheckpointLatch.trigger();
            // block here so that we can issue the concurrent cancel call
            while (true) {
                try {
                    // wait until we successfully await (no pun intended)
                    completeAcknowledge.await();
                    // when await() returns normally, we break out of the loop
                    break;
                } catch (InterruptedException e) {
                // survive interruptions that arise from thread pool
                // shutdown
                // production code cannot actually throw
                // InterruptedException from
                // checkpoint acknowledgement
                }
            }
            return null;
        }
    }).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), any(TaskStateSnapshot.class));
    TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
    KeyedStateHandle managedKeyedStateHandle = mock(KeyedStateHandle.class);
    KeyedStateHandle rawKeyedStateHandle = mock(KeyedStateHandle.class);
    OperatorStateHandle managedOperatorStateHandle = mock(OperatorStreamStateHandle.class);
    OperatorStateHandle rawOperatorStateHandle = mock(OperatorStreamStateHandle.class);
    OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(rawKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskName("mock-task").setTaskStateManager(taskStateManager).build()) {
        RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult))));
        MockStreamTask streamTask = task.streamTask;
        waitTaskIsRunning(streamTask, task.invocationFuture);
        final long checkpointId = 42L;
        streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
        acknowledgeCheckpointLatch.await();
        ArgumentCaptor<TaskStateSnapshot> subtaskStateCaptor = ArgumentCaptor.forClass(TaskStateSnapshot.class);
        // check that the checkpoint has been completed
        verify(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), eq(checkpointId), any(CheckpointMetrics.class), subtaskStateCaptor.capture());
        TaskStateSnapshot subtaskStates = subtaskStateCaptor.getValue();
        OperatorSubtaskState subtaskState = subtaskStates.getSubtaskStateMappings().iterator().next().getValue();
        // check that the subtask state contains the expected state handles
        assertEquals(singleton(managedKeyedStateHandle), subtaskState.getManagedKeyedState());
        assertEquals(singleton(rawKeyedStateHandle), subtaskState.getRawKeyedState());
        assertEquals(singleton(managedOperatorStateHandle), subtaskState.getManagedOperatorState());
        assertEquals(singleton(rawOperatorStateHandle), subtaskState.getRawOperatorState());
        // check that the state handles have not been discarded
        verify(managedKeyedStateHandle, never()).discardState();
        verify(rawKeyedStateHandle, never()).discardState();
        verify(managedOperatorStateHandle, never()).discardState();
        verify(rawOperatorStateHandle, never()).discardState();
        streamTask.cancel();
        completeAcknowledge.trigger();
        // canceling the stream task after it has acknowledged the checkpoint should not discard
        // the state handles
        verify(managedKeyedStateHandle, never()).discardState();
        verify(rawKeyedStateHandle, never()).discardState();
        verify(managedOperatorStateHandle, never()).discardState();
        verify(rawOperatorStateHandle, never()).discardState();
        task.waitForTaskCompletion(true);
    }
}
Also used : OperatorSnapshotFutures(org.apache.flink.streaming.api.operators.OperatorSnapshotFutures) TaskStateManagerImpl(org.apache.flink.runtime.state.TaskStateManagerImpl) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) InMemoryStateChangelogStorage(org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) TaskLocalStateStoreImpl(org.apache.flink.runtime.state.TaskLocalStateStoreImpl) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) CheckpointResponder(org.apache.flink.runtime.taskmanager.CheckpointResponder) TaskStateManager(org.apache.flink.runtime.state.TaskStateManager) CheckpointMetaData(org.apache.flink.runtime.checkpoint.CheckpointMetaData) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 28 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class RestoreStreamTaskTest method testRestoreTailWithNewId.

@Test
public void testRestoreTailWithNewId() throws Exception {
    OperatorID headOperatorID = new OperatorID(42L, 42L);
    JobManagerTaskRestore restore = createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), new OperatorID(44L, 44L), new CounterOperator(), Optional.empty());
    TaskStateSnapshot stateHandles = restore.getTaskStateSnapshot();
    assertEquals(2, stateHandles.getSubtaskStateMappings().size());
    createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), new OperatorID(4444L, 4444L), new CounterOperator(), Optional.of(restore));
    assertEquals(Collections.singleton(headOperatorID), RESTORED_OPERATORS.keySet());
    assertThat(new HashSet<>(RESTORED_OPERATORS.values()), contains(restore.getRestoreCheckpointId()));
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) JobManagerTaskRestore(org.apache.flink.runtime.checkpoint.JobManagerTaskRestore) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Test(org.junit.Test)

Example 29 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class SourceOperatorStreamTaskTest method testSkipExecutionIfFinishedOnRestore.

@Test
public void testSkipExecutionIfFinishedOnRestore() throws Exception {
    TaskStateSnapshot taskStateSnapshot = TaskStateSnapshot.FINISHED_ON_RESTORE;
    LifeCycleMonitorSource testingSource = new LifeCycleMonitorSource(Boundedness.CONTINUOUS_UNBOUNDED, 10);
    SourceOperatorFactory<Integer> sourceOperatorFactory = new SourceOperatorFactory<>(testingSource, WatermarkStrategy.noWatermarks());
    List<Object> output = new ArrayList<>();
    try (StreamTaskMailboxTestHarness<Integer> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceOperatorStreamTask::new, BasicTypeInfo.INT_TYPE_INFO).setTaskStateSnapshot(1, taskStateSnapshot).addAdditionalOutput(new RecordOrEventCollectingResultPartitionWriter<StreamElement>(output, new StreamElementSerializer<>(IntSerializer.INSTANCE)) {

        @Override
        public void notifyEndOfData(StopMode mode) throws IOException {
            broadcastEvent(new EndOfData(mode), false);
        }
    }).setupOperatorChain(sourceOperatorFactory).chain(new TestFinishedOnRestoreStreamOperator(), StringSerializer.INSTANCE).finish().build()) {
        testHarness.getStreamTask().invoke();
        testHarness.processAll();
        assertThat(output, contains(Watermark.MAX_WATERMARK, new EndOfData(StopMode.DRAIN)));
        LifeCycleMonitorSourceReader sourceReader = (LifeCycleMonitorSourceReader) ((SourceOperator<?, ?>) testHarness.getStreamTask().getMainOperator()).getSourceReader();
        sourceReader.getLifeCycleMonitor().assertCallTimes(0, LifeCyclePhase.values());
    }
}
Also used : ArrayList(java.util.ArrayList) StopMode(org.apache.flink.runtime.io.network.api.StopMode) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) IOException(java.io.IOException) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) EndOfData(org.apache.flink.runtime.io.network.api.EndOfData) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) StreamElementSerializer(org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer) Test(org.junit.Test)

Example 30 with TaskStateSnapshot

use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.

the class SourceOperatorStreamTaskTest method testSnapshotAndRestore.

/**
 * Tests that the stream operator can snapshot and restore the operator state of chained
 * operators.
 */
@Test
public void testSnapshotAndRestore() throws Exception {
    // process NUM_RECORDS records and take a snapshot.
    TaskStateSnapshot taskStateSnapshot = executeAndWaitForCheckpoint(1, null, IntStream.range(0, NUM_RECORDS));
    // Resume from the snapshot and continue to process another NUM_RECORDS records.
    executeAndWaitForCheckpoint(2, taskStateSnapshot, IntStream.range(NUM_RECORDS, NUM_RECORDS * 2));
}
Also used : TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) Test(org.junit.Test)

Aggregations

TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)42 Test (org.junit.Test)28 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)19 JobID (org.apache.flink.api.common.JobID)17 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)16 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)13 JobManagerTaskRestore (org.apache.flink.runtime.checkpoint.JobManagerTaskRestore)13 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)13 CheckpointMetrics (org.apache.flink.runtime.checkpoint.CheckpointMetrics)12 TestCheckpointResponder (org.apache.flink.runtime.taskmanager.TestCheckpointResponder)9 CheckpointResponder (org.apache.flink.runtime.taskmanager.CheckpointResponder)8 IOException (java.io.IOException)7 HashMap (java.util.HashMap)6 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)6 CompletableFuture (java.util.concurrent.CompletableFuture)5 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)5 TestTaskStateManager (org.apache.flink.runtime.state.TestTaskStateManager)5 InMemoryStateChangelogStorage (org.apache.flink.runtime.state.changelog.inmemory.InMemoryStateChangelogStorage)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4