use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class AsyncCheckpointRunnable method finalizeNonFinishedSnapshots.
private SnapshotsFinalizeResult finalizeNonFinishedSnapshots() throws Exception {
TaskStateSnapshot jobManagerTaskOperatorSubtaskStates = new TaskStateSnapshot(operatorSnapshotsInProgress.size(), isTaskFinished);
TaskStateSnapshot localTaskOperatorSubtaskStates = new TaskStateSnapshot(operatorSnapshotsInProgress.size(), isTaskFinished);
long bytesPersistedDuringAlignment = 0;
for (Map.Entry<OperatorID, OperatorSnapshotFutures> entry : operatorSnapshotsInProgress.entrySet()) {
OperatorID operatorID = entry.getKey();
OperatorSnapshotFutures snapshotInProgress = entry.getValue();
// finalize the async part of all by executing all snapshot runnables
OperatorSnapshotFinalizer finalizedSnapshots = new OperatorSnapshotFinalizer(snapshotInProgress);
jobManagerTaskOperatorSubtaskStates.putSubtaskStateByOperatorID(operatorID, finalizedSnapshots.getJobManagerOwnedState());
localTaskOperatorSubtaskStates.putSubtaskStateByOperatorID(operatorID, finalizedSnapshots.getTaskLocalState());
bytesPersistedDuringAlignment += finalizedSnapshots.getJobManagerOwnedState().getResultSubpartitionState().getStateSize();
bytesPersistedDuringAlignment += finalizedSnapshots.getJobManagerOwnedState().getInputChannelState().getStateSize();
}
return new SnapshotsFinalizeResult(jobManagerTaskOperatorSubtaskStates, localTaskOperatorSubtaskStates, bytesPersistedDuringAlignment);
}
use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class StreamTaskTest method testAsyncCheckpointingConcurrentCloseAfterAcknowledge.
/**
* FLINK-5667
*
* <p>Tests that a concurrent cancel operation does not discard the state handles of an
* acknowledged checkpoint. The situation can only happen if the cancel call is executed after
* Environment.acknowledgeCheckpoint() and before the CloseableRegistry.unregisterClosable()
* call.
*/
@Test
public void testAsyncCheckpointingConcurrentCloseAfterAcknowledge() throws Exception {
final OneShotLatch acknowledgeCheckpointLatch = new OneShotLatch();
final OneShotLatch completeAcknowledge = new OneShotLatch();
CheckpointResponder checkpointResponder = mock(CheckpointResponder.class);
doAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) {
acknowledgeCheckpointLatch.trigger();
// block here so that we can issue the concurrent cancel call
while (true) {
try {
// wait until we successfully await (no pun intended)
completeAcknowledge.await();
// when await() returns normally, we break out of the loop
break;
} catch (InterruptedException e) {
// survive interruptions that arise from thread pool
// shutdown
// production code cannot actually throw
// InterruptedException from
// checkpoint acknowledgement
}
}
return null;
}
}).when(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), anyLong(), any(CheckpointMetrics.class), any(TaskStateSnapshot.class));
TaskStateManager taskStateManager = new TaskStateManagerImpl(new JobID(1L, 2L), new ExecutionAttemptID(), mock(TaskLocalStateStoreImpl.class), new InMemoryStateChangelogStorage(), null, checkpointResponder);
KeyedStateHandle managedKeyedStateHandle = mock(KeyedStateHandle.class);
KeyedStateHandle rawKeyedStateHandle = mock(KeyedStateHandle.class);
OperatorStateHandle managedOperatorStateHandle = mock(OperatorStreamStateHandle.class);
OperatorStateHandle rawOperatorStateHandle = mock(OperatorStreamStateHandle.class);
OperatorSnapshotFutures operatorSnapshotResult = new OperatorSnapshotFutures(DoneFuture.of(SnapshotResult.of(managedKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(rawKeyedStateHandle)), DoneFuture.of(SnapshotResult.of(managedOperatorStateHandle)), DoneFuture.of(SnapshotResult.of(rawOperatorStateHandle)), DoneFuture.of(SnapshotResult.empty()), DoneFuture.of(SnapshotResult.empty()));
try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().setTaskName("mock-task").setTaskStateManager(taskStateManager).build()) {
RunningTask<MockStreamTask> task = runTask(() -> createMockStreamTask(mockEnvironment, operatorChain(streamOperatorWithSnapshot(operatorSnapshotResult))));
MockStreamTask streamTask = task.streamTask;
waitTaskIsRunning(streamTask, task.invocationFuture);
final long checkpointId = 42L;
streamTask.triggerCheckpointAsync(new CheckpointMetaData(checkpointId, 1L), CheckpointOptions.forCheckpointWithDefaultLocation());
acknowledgeCheckpointLatch.await();
ArgumentCaptor<TaskStateSnapshot> subtaskStateCaptor = ArgumentCaptor.forClass(TaskStateSnapshot.class);
// check that the checkpoint has been completed
verify(checkpointResponder).acknowledgeCheckpoint(any(JobID.class), any(ExecutionAttemptID.class), eq(checkpointId), any(CheckpointMetrics.class), subtaskStateCaptor.capture());
TaskStateSnapshot subtaskStates = subtaskStateCaptor.getValue();
OperatorSubtaskState subtaskState = subtaskStates.getSubtaskStateMappings().iterator().next().getValue();
// check that the subtask state contains the expected state handles
assertEquals(singleton(managedKeyedStateHandle), subtaskState.getManagedKeyedState());
assertEquals(singleton(rawKeyedStateHandle), subtaskState.getRawKeyedState());
assertEquals(singleton(managedOperatorStateHandle), subtaskState.getManagedOperatorState());
assertEquals(singleton(rawOperatorStateHandle), subtaskState.getRawOperatorState());
// check that the state handles have not been discarded
verify(managedKeyedStateHandle, never()).discardState();
verify(rawKeyedStateHandle, never()).discardState();
verify(managedOperatorStateHandle, never()).discardState();
verify(rawOperatorStateHandle, never()).discardState();
streamTask.cancel();
completeAcknowledge.trigger();
// canceling the stream task after it has acknowledged the checkpoint should not discard
// the state handles
verify(managedKeyedStateHandle, never()).discardState();
verify(rawKeyedStateHandle, never()).discardState();
verify(managedOperatorStateHandle, never()).discardState();
verify(rawOperatorStateHandle, never()).discardState();
task.waitForTaskCompletion(true);
}
}
use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class RestoreStreamTaskTest method testRestoreTailWithNewId.
@Test
public void testRestoreTailWithNewId() throws Exception {
OperatorID headOperatorID = new OperatorID(42L, 42L);
JobManagerTaskRestore restore = createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), new OperatorID(44L, 44L), new CounterOperator(), Optional.empty());
TaskStateSnapshot stateHandles = restore.getTaskStateSnapshot();
assertEquals(2, stateHandles.getSubtaskStateMappings().size());
createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), new OperatorID(4444L, 4444L), new CounterOperator(), Optional.of(restore));
assertEquals(Collections.singleton(headOperatorID), RESTORED_OPERATORS.keySet());
assertThat(new HashSet<>(RESTORED_OPERATORS.values()), contains(restore.getRestoreCheckpointId()));
}
use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class SourceOperatorStreamTaskTest method testSkipExecutionIfFinishedOnRestore.
@Test
public void testSkipExecutionIfFinishedOnRestore() throws Exception {
TaskStateSnapshot taskStateSnapshot = TaskStateSnapshot.FINISHED_ON_RESTORE;
LifeCycleMonitorSource testingSource = new LifeCycleMonitorSource(Boundedness.CONTINUOUS_UNBOUNDED, 10);
SourceOperatorFactory<Integer> sourceOperatorFactory = new SourceOperatorFactory<>(testingSource, WatermarkStrategy.noWatermarks());
List<Object> output = new ArrayList<>();
try (StreamTaskMailboxTestHarness<Integer> testHarness = new StreamTaskMailboxTestHarnessBuilder<>(SourceOperatorStreamTask::new, BasicTypeInfo.INT_TYPE_INFO).setTaskStateSnapshot(1, taskStateSnapshot).addAdditionalOutput(new RecordOrEventCollectingResultPartitionWriter<StreamElement>(output, new StreamElementSerializer<>(IntSerializer.INSTANCE)) {
@Override
public void notifyEndOfData(StopMode mode) throws IOException {
broadcastEvent(new EndOfData(mode), false);
}
}).setupOperatorChain(sourceOperatorFactory).chain(new TestFinishedOnRestoreStreamOperator(), StringSerializer.INSTANCE).finish().build()) {
testHarness.getStreamTask().invoke();
testHarness.processAll();
assertThat(output, contains(Watermark.MAX_WATERMARK, new EndOfData(StopMode.DRAIN)));
LifeCycleMonitorSourceReader sourceReader = (LifeCycleMonitorSourceReader) ((SourceOperator<?, ?>) testHarness.getStreamTask().getMainOperator()).getSourceReader();
sourceReader.getLifeCycleMonitor().assertCallTimes(0, LifeCyclePhase.values());
}
}
use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class SourceOperatorStreamTaskTest method testSnapshotAndRestore.
/**
* Tests that the stream operator can snapshot and restore the operator state of chained
* operators.
*/
@Test
public void testSnapshotAndRestore() throws Exception {
// process NUM_RECORDS records and take a snapshot.
TaskStateSnapshot taskStateSnapshot = executeAndWaitForCheckpoint(1, null, IntStream.range(0, NUM_RECORDS));
// Resume from the snapshot and continue to process another NUM_RECORDS records.
executeAndWaitForCheckpoint(2, taskStateSnapshot, IntStream.range(NUM_RECORDS, NUM_RECORDS * 2));
}
Aggregations