use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class CheckpointCoordinatorRestoringTest method testRestoreLatestCheckpointedStateWithChangingParallelism.
/**
* Tests the checkpoint restoration with changing parallelism of job vertex with partitioned
* state.
*/
private void testRestoreLatestCheckpointedStateWithChangingParallelism(boolean scaleOut) throws Exception {
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = scaleOut ? 2 : 13;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
int newParallelism2 = scaleOut ? 13 : 2;
CompletedCheckpointStore completedCheckpointStore = new EmbeddedCompletedCheckpointStore();
final ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).addJobVertex(jobVertexID2, parallelism2, maxParallelism2).build();
final ExecutionJobVertex jobVertex1 = graph.getJobVertex(jobVertexID1);
final ExecutionJobVertex jobVertex2 = graph.getJobVertex(jobVertexID2);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
// trigger the checkpoint
coord.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertEquals(1, coord.getPendingCheckpoints().size());
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
// vertex 1
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
OperatorStateHandle opStateBackend = generatePartitionableStateHandle(jobVertexID1, index, 2, 8, false);
KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), true);
OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(opStateBackend).setManagedKeyedState(keyedStateBackend).setRawKeyedState(keyedStateRaw).setInputChannelState(StateObjectCollection.singleton(createNewInputChannelStateHandle(3, new Random()))).build();
TaskStateSnapshot taskOperatorSubtaskStates = new TaskStateSnapshot();
taskOperatorSubtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(jobVertexID1), operatorSubtaskState);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(graph.getJobID(), jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint, TASK_MANAGER_LOCATION_INFO);
}
// vertex 2
final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesBackend = new ArrayList<>(jobVertex2.getParallelism());
final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesRaw = new ArrayList<>(jobVertex2.getParallelism());
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), true);
OperatorStateHandle opStateBackend = generatePartitionableStateHandle(jobVertexID2, index, 2, 8, false);
OperatorStateHandle opStateRaw = generatePartitionableStateHandle(jobVertexID2, index, 2, 8, true);
expectedOpStatesBackend.add(new ChainedStateHandle<>(singletonList(opStateBackend)));
expectedOpStatesRaw.add(new ChainedStateHandle<>(singletonList(opStateRaw)));
OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(opStateBackend).setRawOperatorState(opStateRaw).setManagedKeyedState(keyedStateBackend).setRawKeyedState(keyedStateRaw).build();
TaskStateSnapshot taskOperatorSubtaskStates = new TaskStateSnapshot();
taskOperatorSubtaskStates.putSubtaskStateByOperatorID(OperatorID.fromJobVertexID(jobVertexID2), operatorSubtaskState);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(graph.getJobID(), jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint, TASK_MANAGER_LOCATION_INFO);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
// rescale vertex 2
final ExecutionGraph newGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1, parallelism1, maxParallelism1).addJobVertex(jobVertexID2, newParallelism2, maxParallelism2).build();
final ExecutionJobVertex newJobVertex1 = newGraph.getJobVertex(jobVertexID1);
final ExecutionJobVertex newJobVertex2 = newGraph.getJobVertex(jobVertexID2);
// set up the coordinator and validate the initial state
CheckpointCoordinator newCoord = new CheckpointCoordinatorBuilder().setExecutionGraph(newGraph).setCompletedCheckpointStore(completedCheckpointStore).setTimer(manuallyTriggeredScheduledExecutor).build();
Set<ExecutionJobVertex> tasks = new HashSet<>();
tasks.add(newJobVertex1);
tasks.add(newJobVertex2);
assertTrue(newCoord.restoreLatestCheckpointedStateToAll(tasks, false));
// verify the restored state
verifyStateRestore(jobVertexID1, newJobVertex1, keyGroupPartitions1);
List<List<Collection<OperatorStateHandle>>> actualOpStatesBackend = new ArrayList<>(newJobVertex2.getParallelism());
List<List<Collection<OperatorStateHandle>>> actualOpStatesRaw = new ArrayList<>(newJobVertex2.getParallelism());
for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
List<OperatorIDPair> operatorIDs = newJobVertex2.getOperatorIDs();
KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), false);
KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), true);
JobManagerTaskRestore taskRestore = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
Assert.assertEquals(1L, taskRestore.getRestoreCheckpointId());
TaskStateSnapshot taskStateHandles = taskRestore.getTaskStateSnapshot();
final int headOpIndex = operatorIDs.size() - 1;
List<Collection<OperatorStateHandle>> allParallelManagedOpStates = new ArrayList<>(operatorIDs.size());
List<Collection<OperatorStateHandle>> allParallelRawOpStates = new ArrayList<>(operatorIDs.size());
for (int idx = 0; idx < operatorIDs.size(); ++idx) {
OperatorID operatorID = operatorIDs.get(idx).getGeneratedOperatorID();
OperatorSubtaskState opState = taskStateHandles.getSubtaskStateByOperatorID(operatorID);
Collection<OperatorStateHandle> opStateBackend = opState.getManagedOperatorState();
Collection<OperatorStateHandle> opStateRaw = opState.getRawOperatorState();
allParallelManagedOpStates.add(opStateBackend);
allParallelRawOpStates.add(opStateRaw);
if (idx == headOpIndex) {
Collection<KeyedStateHandle> keyedStateBackend = opState.getManagedKeyedState();
Collection<KeyedStateHandle> keyGroupStateRaw = opState.getRawKeyedState();
compareKeyedState(singletonList(originalKeyedStateBackend), keyedStateBackend);
compareKeyedState(singletonList(originalKeyedStateRaw), keyGroupStateRaw);
}
}
actualOpStatesBackend.add(allParallelManagedOpStates);
actualOpStatesRaw.add(allParallelRawOpStates);
}
comparePartitionableState(expectedOpStatesBackend, actualOpStatesBackend);
comparePartitionableState(expectedOpStatesRaw, actualOpStatesRaw);
}
use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class StreamTaskStateInitializerImplTest method testWithRestore.
@SuppressWarnings("unchecked")
@Test
public void testWithRestore() throws Exception {
StateBackend mockingBackend = spy(new StateBackend() {
@Override
public <K> AbstractKeyedStateBackend<K> createKeyedStateBackend(Environment env, JobID jobID, String operatorIdentifier, TypeSerializer<K> keySerializer, int numberOfKeyGroups, KeyGroupRange keyGroupRange, TaskKvStateRegistry kvStateRegistry, TtlTimeProvider ttlTimeProvider, MetricGroup metricGroup, @Nonnull Collection<KeyedStateHandle> stateHandles, CloseableRegistry cancelStreamRegistry) throws Exception {
return mock(AbstractKeyedStateBackend.class);
}
@Override
public OperatorStateBackend createOperatorStateBackend(Environment env, String operatorIdentifier, @Nonnull Collection<OperatorStateHandle> stateHandles, CloseableRegistry cancelStreamRegistry) throws Exception {
return mock(OperatorStateBackend.class);
}
});
OperatorID operatorID = new OperatorID(47L, 11L);
TaskStateSnapshot taskStateSnapshot = new TaskStateSnapshot();
Random random = new Random(0x42);
OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().setManagedOperatorState(new OperatorStreamStateHandle(Collections.singletonMap("a", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 10 }, SPLIT_DISTRIBUTE)), CheckpointTestUtils.createDummyStreamStateHandle(random, null))).setRawOperatorState(new OperatorStreamStateHandle(Collections.singletonMap("_default_", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 20, 30 }, SPLIT_DISTRIBUTE)), CheckpointTestUtils.createDummyStreamStateHandle(random, null))).setManagedKeyedState(CheckpointTestUtils.createDummyKeyGroupStateHandle(random, null)).setRawKeyedState(CheckpointTestUtils.createDummyKeyGroupStateHandle(random, null)).setInputChannelState(singleton(createNewInputChannelStateHandle(10, random))).setResultSubpartitionState(singleton(createNewResultSubpartitionStateHandle(10, random))).build();
taskStateSnapshot.putSubtaskStateByOperatorID(operatorID, operatorSubtaskState);
JobManagerTaskRestore jobManagerTaskRestore = new JobManagerTaskRestore(42L, taskStateSnapshot);
StreamTaskStateInitializer streamTaskStateManager = streamTaskStateManager(mockingBackend, jobManagerTaskRestore, false);
AbstractStreamOperator<?> streamOperator = mock(AbstractStreamOperator.class);
when(streamOperator.getOperatorID()).thenReturn(operatorID);
TypeSerializer<?> typeSerializer = new IntSerializer();
CloseableRegistry closeableRegistry = new CloseableRegistry();
StreamOperatorStateContext stateContext = streamTaskStateManager.streamOperatorStateContext(streamOperator.getOperatorID(), streamOperator.getClass().getSimpleName(), new TestProcessingTimeService(), streamOperator, typeSerializer, closeableRegistry, new UnregisteredMetricsGroup(), 1.0, false);
OperatorStateBackend operatorStateBackend = stateContext.operatorStateBackend();
CheckpointableKeyedStateBackend<?> keyedStateBackend = stateContext.keyedStateBackend();
InternalTimeServiceManager<?> timeServiceManager = stateContext.internalTimerServiceManager();
CloseableIterable<KeyGroupStatePartitionStreamProvider> keyedStateInputs = stateContext.rawKeyedStateInputs();
CloseableIterable<StatePartitionStreamProvider> operatorStateInputs = stateContext.rawOperatorStateInputs();
Assert.assertTrue("Expected the context to be restored", stateContext.isRestored());
Assert.assertEquals(OptionalLong.of(42L), stateContext.getRestoredCheckpointId());
Assert.assertNotNull(operatorStateBackend);
Assert.assertNotNull(keyedStateBackend);
// this is deactivated on purpose so that it does not attempt to consume the raw keyed
// state.
Assert.assertNull(timeServiceManager);
Assert.assertNotNull(keyedStateInputs);
Assert.assertNotNull(operatorStateInputs);
int count = 0;
for (KeyGroupStatePartitionStreamProvider keyedStateInput : keyedStateInputs) {
++count;
}
Assert.assertEquals(1, count);
count = 0;
for (StatePartitionStreamProvider operatorStateInput : operatorStateInputs) {
++count;
}
Assert.assertEquals(3, count);
checkCloseablesRegistered(closeableRegistry, operatorStateBackend, keyedStateBackend, keyedStateInputs, operatorStateInputs);
}
use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class BackendRestorerProcedureTest method testExceptionThrownIfAllRestoresFailed.
/**
* Tests if there is an exception if all restore attempts are exhausted and failed.
*/
@Test
public void testExceptionThrownIfAllRestoresFailed() throws Exception {
CloseableRegistry closeableRegistry = new CloseableRegistry();
OperatorStateHandle firstFailHandle = mock(OperatorStateHandle.class);
OperatorStateHandle secondFailHandle = mock(OperatorStateHandle.class);
OperatorStateHandle thirdFailHandle = mock(OperatorStateHandle.class);
List<StateObjectCollection<OperatorStateHandle>> sortedRestoreOptions = Arrays.asList(new StateObjectCollection<>(Collections.singletonList(firstFailHandle)), new StateObjectCollection<>(Collections.singletonList(secondFailHandle)), new StateObjectCollection<>(Collections.singletonList(thirdFailHandle)));
BackendRestorerProcedure<OperatorStateBackend, OperatorStateHandle> restorerProcedure = new BackendRestorerProcedure<>(backendSupplier, closeableRegistry, "test op state backend");
try {
restorerProcedure.createAndRestore(sortedRestoreOptions);
Assert.fail();
} catch (Exception ignore) {
}
verify(firstFailHandle).openInputStream();
verify(secondFailHandle).openInputStream();
verify(thirdFailHandle).openInputStream();
}
use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class BackendRestorerProcedureTest method testCanBeCanceledViaRegistry.
/**
* Test that the restore can be stopped via the provided closeable registry.
*/
@Test
public void testCanBeCanceledViaRegistry() throws Exception {
CloseableRegistry closeableRegistry = new CloseableRegistry();
OneShotLatch waitForBlock = new OneShotLatch();
OneShotLatch unblock = new OneShotLatch();
OperatorStateHandle blockingRestoreHandle = mock(OperatorStateHandle.class);
when(blockingRestoreHandle.openInputStream()).thenReturn(new BlockingFSDataInputStream(waitForBlock, unblock));
List<StateObjectCollection<OperatorStateHandle>> sortedRestoreOptions = Collections.singletonList(new StateObjectCollection<>(Collections.singletonList(blockingRestoreHandle)));
BackendRestorerProcedure<OperatorStateBackend, OperatorStateHandle> restorerProcedure = new BackendRestorerProcedure<>(backendSupplier, closeableRegistry, "test op state backend");
AtomicReference<Exception> exceptionReference = new AtomicReference<>(null);
Thread restoreThread = new Thread(() -> {
try {
restorerProcedure.createAndRestore(sortedRestoreOptions);
} catch (Exception e) {
exceptionReference.set(e);
}
});
restoreThread.start();
waitForBlock.await();
closeableRegistry.close();
unblock.trigger();
restoreThread.join();
Exception exception = exceptionReference.get();
Assert.assertTrue(exception instanceof FlinkException);
}
Aggregations