use of org.apache.flink.runtime.checkpoint.JobManagerTaskRestore in project flink by apache.
the class ExecutionTest method testTaskRestoreStateIsNulledAfterDeployment.
/**
* Tests that the task restore state is nulled after the {@link Execution} has been deployed.
* See FLINK-9693.
*/
@Test
public void testTaskRestoreStateIsNulledAfterDeployment() throws Exception {
final JobVertex jobVertex = createNoOpJobVertex();
final JobVertexID jobVertexId = jobVertex.getID();
final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(JobGraphTestUtils.streamingJobGraph(jobVertex), ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(TestingPhysicalSlotProvider.createWithLimitedAmountOfPhysicalSlots(1))).build();
ExecutionJobVertex executionJobVertex = scheduler.getExecutionJobVertex(jobVertexId);
ExecutionVertex executionVertex = executionJobVertex.getTaskVertices()[0];
final Execution execution = executionVertex.getCurrentExecutionAttempt();
final JobManagerTaskRestore taskRestoreState = new JobManagerTaskRestore(1L, new TaskStateSnapshot());
execution.setInitialState(taskRestoreState);
assertThat(execution.getTaskRestore(), is(notNullValue()));
// schedule the execution vertex and wait for its deployment
scheduler.startScheduling();
assertThat(execution.getTaskRestore(), is(nullValue()));
}
use of org.apache.flink.runtime.checkpoint.JobManagerTaskRestore in project flink by apache.
the class RestoreStreamTaskTest method testRestoreTailWithNewId.
@Test
public void testRestoreTailWithNewId() throws Exception {
OperatorID headOperatorID = new OperatorID(42L, 42L);
JobManagerTaskRestore restore = createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), new OperatorID(44L, 44L), new CounterOperator(), Optional.empty());
TaskStateSnapshot stateHandles = restore.getTaskStateSnapshot();
assertEquals(2, stateHandles.getSubtaskStateMappings().size());
createRunAndCheckpointOperatorChain(headOperatorID, new CounterOperator(), new OperatorID(4444L, 4444L), new CounterOperator(), Optional.of(restore));
assertEquals(Collections.singleton(headOperatorID), RESTORED_OPERATORS.keySet());
assertThat(new HashSet<>(RESTORED_OPERATORS.values()), contains(restore.getRestoreCheckpointId()));
}
use of org.apache.flink.runtime.checkpoint.JobManagerTaskRestore in project flink by apache.
the class TaskStateManagerImplTest method testStateReportingAndRetrieving.
/**
* Test reporting and retrieving prioritized local and remote state.
*/
@Test
public void testStateReportingAndRetrieving() {
JobID jobID = new JobID();
ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
TestCheckpointResponder testCheckpointResponder = new TestCheckpointResponder();
TestTaskLocalStateStore testTaskLocalStateStore = new TestTaskLocalStateStore();
InMemoryStateChangelogStorage changelogStorage = new InMemoryStateChangelogStorage();
TaskStateManager taskStateManager = taskStateManager(jobID, executionAttemptID, testCheckpointResponder, null, testTaskLocalStateStore, changelogStorage);
// ---------------------------------------- test reporting
// -----------------------------------------
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(74L, 11L);
CheckpointMetrics checkpointMetrics = new CheckpointMetrics();
TaskStateSnapshot jmTaskStateSnapshot = new TaskStateSnapshot();
OperatorID operatorID_1 = new OperatorID(1L, 1L);
OperatorID operatorID_2 = new OperatorID(2L, 2L);
OperatorID operatorID_3 = new OperatorID(3L, 3L);
Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_1).isRestored());
Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_2).isRestored());
Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_3).isRestored());
KeyGroupRange keyGroupRange = new KeyGroupRange(0, 1);
// Remote state of operator 1 has only managed keyed state.
OperatorSubtaskState jmOperatorSubtaskState_1 = OperatorSubtaskState.builder().setManagedKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
// Remote state of operator 1 has only raw keyed state.
OperatorSubtaskState jmOperatorSubtaskState_2 = OperatorSubtaskState.builder().setRawKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
jmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_1, jmOperatorSubtaskState_1);
jmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_2, jmOperatorSubtaskState_2);
TaskStateSnapshot tmTaskStateSnapshot = new TaskStateSnapshot();
// Only operator 1 has a local alternative for the managed keyed state.
OperatorSubtaskState tmOperatorSubtaskState_1 = OperatorSubtaskState.builder().setManagedKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
tmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_1, tmOperatorSubtaskState_1);
taskStateManager.reportTaskStateSnapshots(checkpointMetaData, checkpointMetrics, jmTaskStateSnapshot, tmTaskStateSnapshot);
TestCheckpointResponder.AcknowledgeReport acknowledgeReport = testCheckpointResponder.getAcknowledgeReports().get(0);
// checks that the checkpoint responder and the local state store received state as
// expected.
Assert.assertEquals(checkpointMetaData.getCheckpointId(), acknowledgeReport.getCheckpointId());
Assert.assertEquals(checkpointMetrics, acknowledgeReport.getCheckpointMetrics());
Assert.assertEquals(executionAttemptID, acknowledgeReport.getExecutionAttemptID());
Assert.assertEquals(jobID, acknowledgeReport.getJobID());
Assert.assertEquals(jmTaskStateSnapshot, acknowledgeReport.getSubtaskState());
Assert.assertEquals(tmTaskStateSnapshot, testTaskLocalStateStore.retrieveLocalState(checkpointMetaData.getCheckpointId()));
// -------------------------------------- test prio retrieving
// ---------------------------------------
JobManagerTaskRestore taskRestore = new JobManagerTaskRestore(checkpointMetaData.getCheckpointId(), acknowledgeReport.getSubtaskState());
taskStateManager = taskStateManager(jobID, executionAttemptID, testCheckpointResponder, taskRestore, testTaskLocalStateStore, changelogStorage);
// this has remote AND local managed keyed state.
PrioritizedOperatorSubtaskState prioritized_1 = taskStateManager.prioritizedOperatorState(operatorID_1);
// this has only remote raw keyed state.
PrioritizedOperatorSubtaskState prioritized_2 = taskStateManager.prioritizedOperatorState(operatorID_2);
// not restored.
PrioritizedOperatorSubtaskState prioritized_3 = taskStateManager.prioritizedOperatorState(operatorID_3);
Assert.assertTrue(prioritized_1.isRestored());
Assert.assertTrue(prioritized_2.isRestored());
Assert.assertTrue(prioritized_3.isRestored());
Assert.assertTrue(taskStateManager.prioritizedOperatorState(new OperatorID()).isRestored());
// checks for operator 1.
Iterator<StateObjectCollection<KeyedStateHandle>> prioritizedManagedKeyedState_1 = prioritized_1.getPrioritizedManagedKeyedState().iterator();
Assert.assertTrue(prioritizedManagedKeyedState_1.hasNext());
StateObjectCollection<KeyedStateHandle> current = prioritizedManagedKeyedState_1.next();
KeyedStateHandle keyedStateHandleExp = tmOperatorSubtaskState_1.getManagedKeyedState().iterator().next();
KeyedStateHandle keyedStateHandleAct = current.iterator().next();
Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
Assert.assertTrue(prioritizedManagedKeyedState_1.hasNext());
current = prioritizedManagedKeyedState_1.next();
keyedStateHandleExp = jmOperatorSubtaskState_1.getManagedKeyedState().iterator().next();
keyedStateHandleAct = current.iterator().next();
Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
Assert.assertFalse(prioritizedManagedKeyedState_1.hasNext());
// checks for operator 2.
Iterator<StateObjectCollection<KeyedStateHandle>> prioritizedRawKeyedState_2 = prioritized_2.getPrioritizedRawKeyedState().iterator();
Assert.assertTrue(prioritizedRawKeyedState_2.hasNext());
current = prioritizedRawKeyedState_2.next();
keyedStateHandleExp = jmOperatorSubtaskState_2.getRawKeyedState().iterator().next();
keyedStateHandleAct = current.iterator().next();
Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
Assert.assertFalse(prioritizedRawKeyedState_2.hasNext());
}
use of org.apache.flink.runtime.checkpoint.JobManagerTaskRestore in project flink by apache.
the class TaskStateManagerImplTest method testStateRetrievingWithFinishedOperator.
@Test
public void testStateRetrievingWithFinishedOperator() {
TaskStateSnapshot taskStateSnapshot = TaskStateSnapshot.FINISHED_ON_RESTORE;
JobManagerTaskRestore jobManagerTaskRestore = new JobManagerTaskRestore(2, taskStateSnapshot);
TaskStateManagerImpl stateManager = new TaskStateManagerImpl(new JobID(), new ExecutionAttemptID(), new TestTaskLocalStateStore(), null, jobManagerTaskRestore, new TestCheckpointResponder());
Assert.assertTrue(stateManager.isTaskDeployedAsFinished());
}
use of org.apache.flink.runtime.checkpoint.JobManagerTaskRestore in project flink by apache.
the class StreamTaskStateInitializerImplTest method testWithRestore.
@SuppressWarnings("unchecked")
@Test
public void testWithRestore() throws Exception {
StateBackend mockingBackend = spy(new StateBackend() {
@Override
public <K> AbstractKeyedStateBackend<K> createKeyedStateBackend(Environment env, JobID jobID, String operatorIdentifier, TypeSerializer<K> keySerializer, int numberOfKeyGroups, KeyGroupRange keyGroupRange, TaskKvStateRegistry kvStateRegistry, TtlTimeProvider ttlTimeProvider, MetricGroup metricGroup, @Nonnull Collection<KeyedStateHandle> stateHandles, CloseableRegistry cancelStreamRegistry) throws Exception {
return mock(AbstractKeyedStateBackend.class);
}
@Override
public OperatorStateBackend createOperatorStateBackend(Environment env, String operatorIdentifier, @Nonnull Collection<OperatorStateHandle> stateHandles, CloseableRegistry cancelStreamRegistry) throws Exception {
return mock(OperatorStateBackend.class);
}
});
OperatorID operatorID = new OperatorID(47L, 11L);
TaskStateSnapshot taskStateSnapshot = new TaskStateSnapshot();
Random random = new Random(0x42);
OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(new OperatorStreamStateHandle(Collections.singletonMap("a", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 10 }, SPLIT_DISTRIBUTE)), CheckpointTestUtils.createDummyStreamStateHandle(random, null))).setRawOperatorState(new OperatorStreamStateHandle(Collections.singletonMap("_default_", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 20, 30 }, SPLIT_DISTRIBUTE)), CheckpointTestUtils.createDummyStreamStateHandle(random, null))).setManagedKeyedState(CheckpointTestUtils.createDummyKeyGroupStateHandle(random, null)).setRawKeyedState(CheckpointTestUtils.createDummyKeyGroupStateHandle(random, null)).setInputChannelState(singleton(createNewInputChannelStateHandle(10, random))).setResultSubpartitionState(singleton(createNewResultSubpartitionStateHandle(10, random))).build();
taskStateSnapshot.putSubtaskStateByOperatorID(operatorID, operatorSubtaskState);
JobManagerTaskRestore jobManagerTaskRestore = new JobManagerTaskRestore(42L, taskStateSnapshot);
StreamTaskStateInitializer streamTaskStateManager = streamTaskStateManager(mockingBackend, jobManagerTaskRestore, false);
AbstractStreamOperator<?> streamOperator = mock(AbstractStreamOperator.class);
when(streamOperator.getOperatorID()).thenReturn(operatorID);
TypeSerializer<?> typeSerializer = new IntSerializer();
CloseableRegistry closeableRegistry = new CloseableRegistry();
StreamOperatorStateContext stateContext = streamTaskStateManager.streamOperatorStateContext(streamOperator.getOperatorID(), streamOperator.getClass().getSimpleName(), new TestProcessingTimeService(), streamOperator, typeSerializer, closeableRegistry, new UnregisteredMetricsGroup(), 1.0, false);
OperatorStateBackend operatorStateBackend = stateContext.operatorStateBackend();
CheckpointableKeyedStateBackend<?> keyedStateBackend = stateContext.keyedStateBackend();
InternalTimeServiceManager<?> timeServiceManager = stateContext.internalTimerServiceManager();
CloseableIterable<KeyGroupStatePartitionStreamProvider> keyedStateInputs = stateContext.rawKeyedStateInputs();
CloseableIterable<StatePartitionStreamProvider> operatorStateInputs = stateContext.rawOperatorStateInputs();
Assert.assertTrue("Expected the context to be restored", stateContext.isRestored());
Assert.assertEquals(OptionalLong.of(42L), stateContext.getRestoredCheckpointId());
Assert.assertNotNull(operatorStateBackend);
Assert.assertNotNull(keyedStateBackend);
// this is deactivated on purpose so that it does not attempt to consume the raw keyed
// state.
Assert.assertNull(timeServiceManager);
Assert.assertNotNull(keyedStateInputs);
Assert.assertNotNull(operatorStateInputs);
int count = 0;
for (KeyGroupStatePartitionStreamProvider keyedStateInput : keyedStateInputs) {
++count;
}
Assert.assertEquals(1, count);
count = 0;
for (StatePartitionStreamProvider operatorStateInput : operatorStateInputs) {
++count;
}
Assert.assertEquals(3, count);
checkCloseablesRegistered(closeableRegistry, operatorStateBackend, keyedStateBackend, keyedStateInputs, operatorStateInputs);
}
Aggregations