use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class RocksDBAsyncSnapshotTest method testFullyAsyncSnapshot.
/**
* This ensures that asynchronous state handles are actually materialized asynchronously.
*
* <p>We use latches to block at various stages and see if the code still continues through the
* parts that are not asynchronous. If the checkpoint is not done asynchronously the test will
* simply lock forever.
*/
@Test
public void testFullyAsyncSnapshot() throws Exception {
final OneInputStreamTaskTestHarness<String, String> testHarness = new OneInputStreamTaskTestHarness<>(OneInputStreamTask::new, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
testHarness.setupOutputForSingletonOperatorChain();
testHarness.configureForKeyedStream(new KeySelector<String, String>() {
@Override
public String getKey(String value) throws Exception {
return value;
}
}, BasicTypeInfo.STRING_TYPE_INFO);
StreamConfig streamConfig = testHarness.getStreamConfig();
File dbDir = temporaryFolder.newFolder();
RocksDBStateBackend backend = new RocksDBStateBackend(new MemoryStateBackend());
backend.setDbStoragePath(dbDir.getAbsolutePath());
streamConfig.setStateBackend(backend);
streamConfig.setStreamOperator(new AsyncCheckpointOperator());
streamConfig.setOperatorID(new OperatorID());
final OneShotLatch delayCheckpointLatch = new OneShotLatch();
final OneShotLatch ensureCheckpointLatch = new OneShotLatch();
CheckpointResponder checkpointResponderMock = new CheckpointResponder() {
@Override
public void acknowledgeCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics, TaskStateSnapshot subtaskState) {
// even though the async checkpoint would not finish
try {
delayCheckpointLatch.await();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
boolean hasManagedKeyedState = false;
for (Map.Entry<OperatorID, OperatorSubtaskState> entry : subtaskState.getSubtaskStateMappings()) {
OperatorSubtaskState state = entry.getValue();
if (state != null) {
hasManagedKeyedState |= state.getManagedKeyedState() != null;
}
}
// should be one k/v state
assertTrue(hasManagedKeyedState);
// we now know that the checkpoint went through
ensureCheckpointLatch.trigger();
}
@Override
public void reportCheckpointMetrics(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointMetrics checkpointMetrics) {
}
@Override
public void declineCheckpoint(JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, CheckpointException checkpointException) {
}
};
JobID jobID = new JobID();
ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
TestTaskStateManager taskStateManagerTestMock = new TestTaskStateManager(jobID, executionAttemptID, checkpointResponderMock, TestLocalRecoveryConfig.disabled(), new InMemoryStateChangelogStorage(), new HashMap<>(), -1L, new OneShotLatch());
StreamMockEnvironment mockEnv = new StreamMockEnvironment(testHarness.jobConfig, testHarness.taskConfig, testHarness.memorySize, new MockInputSplitProvider(), testHarness.bufferSize, taskStateManagerTestMock);
AtomicReference<Throwable> errorRef = new AtomicReference<>();
mockEnv.setExternalExceptionHandler(errorRef::set);
testHarness.invoke(mockEnv);
testHarness.waitForTaskRunning();
final OneInputStreamTask<String, String> task = testHarness.getTask();
task.triggerCheckpointAsync(new CheckpointMetaData(42, 17), CheckpointOptions.forCheckpointWithDefaultLocation()).get();
testHarness.processElement(new StreamRecord<>("Wohoo", 0));
// now we allow the checkpoint
delayCheckpointLatch.trigger();
// wait for the checkpoint to go through
ensureCheckpointLatch.await();
testHarness.endInput();
ExecutorService threadPool = task.getAsyncOperationsThreadPool();
threadPool.shutdown();
Assert.assertTrue(threadPool.awaitTermination(60_000, TimeUnit.MILLISECONDS));
testHarness.waitForTaskCompletion();
if (errorRef.get() != null) {
fail("Unexpected exception during execution.");
}
}
use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class RestoreStreamTaskTest method testRestore.
@Test
public void testRestore() throws Exception {
OperatorID headOperatorID = new OperatorID(42L, 42L);
OperatorID tailOperatorID = new OperatorID(44L, 44L);
JobManagerTaskRestore restore = createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), tailOperatorID, new CounterOperator(), Optional.empty());
TaskStateSnapshot stateHandles = restore.getTaskStateSnapshot();
assertEquals(2, stateHandles.getSubtaskStateMappings().size());
createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), tailOperatorID, new CounterOperator(), Optional.of(restore));
assertEquals(new HashSet<>(Arrays.asList(headOperatorID, tailOperatorID)), RESTORED_OPERATORS.keySet());
assertThat(new HashSet<>(RESTORED_OPERATORS.values()), contains(restore.getRestoreCheckpointId()));
}
use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class RestoreStreamTaskTest method testRestoreWithoutState.
@Test
public void testRestoreWithoutState() throws Exception {
OperatorID headOperatorID = new OperatorID(42L, 42L);
OperatorID tailOperatorID = new OperatorID(44L, 44L);
JobManagerTaskRestore restore = createRunAndCheckpointOperatorChain(headOperatorID, new StatelessOperator(), tailOperatorID, new CounterOperator(), Optional.empty());
TaskStateSnapshot stateHandles = restore.getTaskStateSnapshot();
assertEquals(2, stateHandles.getSubtaskStateMappings().size());
createRunAndCheckpointOperatorChain(headOperatorID, new StatelessOperator(), tailOperatorID, new CounterOperator(), Optional.of(restore));
assertEquals(new HashSet<>(Arrays.asList(headOperatorID, tailOperatorID)), RESTORED_OPERATORS.keySet());
assertThat(new HashSet<>(RESTORED_OPERATORS.values()), contains(restore.getRestoreCheckpointId()));
}
use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class RestoreStreamTaskTest method testRestoreHeadWithNewId.
@Test
public void testRestoreHeadWithNewId() throws Exception {
OperatorID tailOperatorID = new OperatorID(44L, 44L);
JobManagerTaskRestore restore = createRunAndCheckpointOperatorChain(new OperatorID(42L, 42L), new CounterOperator(), tailOperatorID, new CounterOperator(), Optional.empty());
TaskStateSnapshot stateHandles = restore.getTaskStateSnapshot();
assertEquals(2, stateHandles.getSubtaskStateMappings().size());
createRunAndCheckpointOperatorChain(new OperatorID(4242L, 4242L), new CounterOperator(), tailOperatorID, new CounterOperator(), Optional.of(restore));
assertEquals(Collections.singleton(tailOperatorID), RESTORED_OPERATORS.keySet());
assertThat(new HashSet<>(RESTORED_OPERATORS.values()), contains(restore.getRestoreCheckpointId()));
}
use of org.apache.flink.runtime.checkpoint.TaskStateSnapshot in project flink by apache.
the class RestoreStreamTaskTest method testRestoreAfterScaleUp.
@Test
public void testRestoreAfterScaleUp() throws Exception {
OperatorID headOperatorID = new OperatorID(42L, 42L);
OperatorID tailOperatorID = new OperatorID(44L, 44L);
JobManagerTaskRestore restore = createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), tailOperatorID, new CounterOperator(), Optional.empty());
TaskStateSnapshot stateHandles = restore.getTaskStateSnapshot();
assertEquals(2, stateHandles.getSubtaskStateMappings().size());
// test empty state in case of scale up
OperatorSubtaskState emptyHeadOperatorState = OperatorSubtaskState.builder().build();
stateHandles.putSubtaskStateByOperatorID(headOperatorID, emptyHeadOperatorState);
createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), tailOperatorID, new CounterOperator(), Optional.of(restore));
assertEquals(new HashSet<>(Arrays.asList(headOperatorID, tailOperatorID)), RESTORED_OPERATORS.keySet());
assertThat(new HashSet<>(RESTORED_OPERATORS.values()), contains(restore.getRestoreCheckpointId()));
}
Aggregations