use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.
the class HeapSnapshotStrategy method asyncSnapshot.
@Override
public SnapshotResultSupplier<KeyedStateHandle> asyncSnapshot(HeapSnapshotResources<K> syncPartResource, long checkpointId, long timestamp, @Nonnull CheckpointStreamFactory streamFactory, @Nonnull CheckpointOptions checkpointOptions) {
List<StateMetaInfoSnapshot> metaInfoSnapshots = syncPartResource.getMetaInfoSnapshots();
if (metaInfoSnapshots.isEmpty()) {
return snapshotCloseableRegistry -> SnapshotResult.empty();
}
final KeyedBackendSerializationProxy<K> serializationProxy = new KeyedBackendSerializationProxy<>(// get a serialized form already at state registration time in the future
syncPartResource.getKeySerializer(), metaInfoSnapshots, !Objects.equals(UncompressedStreamCompressionDecorator.INSTANCE, keyGroupCompressionDecorator));
final SupplierWithException<CheckpointStreamWithResultProvider, Exception> checkpointStreamSupplier = localRecoveryConfig.isLocalRecoveryEnabled() && !checkpointOptions.getCheckpointType().isSavepoint() ? () -> createDuplicatingStream(checkpointId, CheckpointedStateScope.EXCLUSIVE, streamFactory, localRecoveryConfig.getLocalStateDirectoryProvider().orElseThrow(LocalRecoveryConfig.localRecoveryNotEnabled())) : () -> createSimpleStream(CheckpointedStateScope.EXCLUSIVE, streamFactory);
return (snapshotCloseableRegistry) -> {
final Map<StateUID, Integer> stateNamesToId = syncPartResource.getStateNamesToId();
final Map<StateUID, StateSnapshot> cowStateStableSnapshots = syncPartResource.getCowStateStableSnapshots();
final CheckpointStreamWithResultProvider streamWithResultProvider = checkpointStreamSupplier.get();
snapshotCloseableRegistry.registerCloseable(streamWithResultProvider);
final CheckpointStateOutputStream localStream = streamWithResultProvider.getCheckpointOutputStream();
final DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(localStream);
serializationProxy.write(outView);
final long[] keyGroupRangeOffsets = new long[keyGroupRange.getNumberOfKeyGroups()];
for (int keyGroupPos = 0; keyGroupPos < keyGroupRange.getNumberOfKeyGroups(); ++keyGroupPos) {
int keyGroupId = keyGroupRange.getKeyGroupId(keyGroupPos);
keyGroupRangeOffsets[keyGroupPos] = localStream.getPos();
outView.writeInt(keyGroupId);
for (Map.Entry<StateUID, StateSnapshot> stateSnapshot : cowStateStableSnapshots.entrySet()) {
StateSnapshot.StateKeyGroupWriter partitionedSnapshot = stateSnapshot.getValue().getKeyGroupWriter();
try (OutputStream kgCompressionOut = keyGroupCompressionDecorator.decorateWithCompression(localStream)) {
DataOutputViewStreamWrapper kgCompressionView = new DataOutputViewStreamWrapper(kgCompressionOut);
kgCompressionView.writeShort(stateNamesToId.get(stateSnapshot.getKey()));
partitionedSnapshot.writeStateInKeyGroup(kgCompressionView, keyGroupId);
}
// this will just close the outer compression stream
}
}
if (snapshotCloseableRegistry.unregisterCloseable(streamWithResultProvider)) {
KeyGroupRangeOffsets kgOffs = new KeyGroupRangeOffsets(keyGroupRange, keyGroupRangeOffsets);
SnapshotResult<StreamStateHandle> result = streamWithResultProvider.closeAndFinalizeCheckpointStreamResult();
return toKeyedStateHandleSnapshotResult(result, kgOffs, KeyGroupsStateHandle::new);
} else {
throw new IOException("Stream already unregistered.");
}
};
}
use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.
the class MetadataV3SerializerTest method testSerializeChangelogStateBackendHandle.
private void testSerializeChangelogStateBackendHandle(boolean fullSnapshot) throws IOException {
ChangelogStateBackendHandle handle = createChangelogStateBackendHandle(fullSnapshot);
try (ByteArrayOutputStreamWithPos out = new ByteArrayOutputStreamWithPos()) {
MetadataV2V3SerializerBase.serializeKeyedStateHandle(handle, new DataOutputStream(out));
try (ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray())) {
KeyedStateHandle deserialized = MetadataV2V3SerializerBase.deserializeKeyedStateHandle(new DataInputStream(in), null);
assertTrue(deserialized instanceof ChangelogStateBackendHandleImpl);
assertEquals(((ChangelogStateBackendHandleImpl) deserialized).getMaterializedStateHandles(), handle.getMaterializedStateHandles());
}
}
}
use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.
the class CheckpointCoordinatorRestoringTest method testStateRecoveryWithTopologyChange.
/**
* old topology. [operator1,operator2] * parallelism1 -> [operator3,operator4] * parallelism2
*
* <p>new topology
*
* <p>[operator5,operator1,operator3] * newParallelism1 -> [operator3, operator6] *
* newParallelism2
*/
public void testStateRecoveryWithTopologyChange(TestScaleType scaleType) throws Exception {
/*
* Old topology
* CHAIN(op1 -> op2) * parallelism1 -> CHAIN(op3 -> op4) * parallelism2
*/
Tuple2<JobVertexID, OperatorID> id1 = generateIDPair();
Tuple2<JobVertexID, OperatorID> id2 = generateIDPair();
int parallelism1 = 10;
int maxParallelism1 = 64;
Tuple2<JobVertexID, OperatorID> id3 = generateIDPair();
Tuple2<JobVertexID, OperatorID> id4 = generateIDPair();
int parallelism2 = 10;
int maxParallelism2 = 64;
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
Map<OperatorID, OperatorState> operatorStates = new HashMap<>();
// prepare vertex1 state
for (Tuple2<JobVertexID, OperatorID> id : Arrays.asList(id1, id2)) {
OperatorState taskState = new OperatorState(id.f1, parallelism1, maxParallelism1);
operatorStates.put(id.f1, taskState);
for (int index = 0; index < taskState.getParallelism(); index++) {
OperatorSubtaskState subtaskState = OperatorSubtaskState.builder().setManagedOperatorState(generatePartitionableStateHandle(id.f0, index, 2, 8, false)).setRawOperatorState(generatePartitionableStateHandle(id.f0, index, 2, 8, true)).build();
taskState.putState(index, subtaskState);
}
}
List<List<ChainedStateHandle<OperatorStateHandle>>> expectedManagedOperatorStates = new ArrayList<>();
List<List<ChainedStateHandle<OperatorStateHandle>>> expectedRawOperatorStates = new ArrayList<>();
// prepare vertex2 state
for (Tuple2<JobVertexID, OperatorID> id : Arrays.asList(id3, id4)) {
OperatorState operatorState = new OperatorState(id.f1, parallelism2, maxParallelism2);
operatorStates.put(id.f1, operatorState);
List<ChainedStateHandle<OperatorStateHandle>> expectedManagedOperatorState = new ArrayList<>();
List<ChainedStateHandle<OperatorStateHandle>> expectedRawOperatorState = new ArrayList<>();
expectedManagedOperatorStates.add(expectedManagedOperatorState);
expectedRawOperatorStates.add(expectedRawOperatorState);
for (int index = 0; index < operatorState.getParallelism(); index++) {
final OperatorSubtaskState.Builder stateBuilder = OperatorSubtaskState.builder();
OperatorStateHandle subManagedOperatorState = generateChainedPartitionableStateHandle(id.f0, index, 2, 8, false).get(0);
OperatorStateHandle subRawOperatorState = generateChainedPartitionableStateHandle(id.f0, index, 2, 8, true).get(0);
if (id.f0.equals(id3.f0)) {
stateBuilder.setManagedKeyedState(generateKeyGroupState(id.f0, keyGroupPartitions2.get(index), false));
}
if (id.f0.equals(id3.f0)) {
stateBuilder.setRawKeyedState(generateKeyGroupState(id.f0, keyGroupPartitions2.get(index), true));
}
expectedManagedOperatorState.add(ChainedStateHandle.wrapSingleHandle(subManagedOperatorState));
expectedRawOperatorState.add(ChainedStateHandle.wrapSingleHandle(subRawOperatorState));
OperatorSubtaskState subtaskState = stateBuilder.setManagedOperatorState(subManagedOperatorState).setRawOperatorState(subRawOperatorState).build();
operatorState.putState(index, subtaskState);
}
}
/*
* New topology
* CHAIN(op5 -> op1 -> op2) * newParallelism1 -> CHAIN(op3 -> op6) * newParallelism2
*/
Tuple2<JobVertexID, OperatorID> id5 = generateIDPair();
int newParallelism1 = 10;
Tuple2<JobVertexID, OperatorID> id6 = generateIDPair();
int newParallelism2 = parallelism2;
if (scaleType == TestScaleType.INCREASE_PARALLELISM) {
newParallelism2 = 20;
} else if (scaleType == TestScaleType.DECREASE_PARALLELISM) {
newParallelism2 = 8;
}
List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
ExecutionGraph newGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(id5.f0, newParallelism1, maxParallelism1, Stream.of(id2.f1, id1.f1, id5.f1).map(OperatorIDPair::generatedIDOnly).collect(Collectors.toList()), true).addJobVertex(id3.f0, newParallelism2, maxParallelism2, Stream.of(id6.f1, id3.f1).map(OperatorIDPair::generatedIDOnly).collect(Collectors.toList()), true).build();
ExecutionJobVertex newJobVertex1 = newGraph.getJobVertex(id5.f0);
ExecutionJobVertex newJobVertex2 = newGraph.getJobVertex(id3.f0);
Set<ExecutionJobVertex> tasks = new HashSet<>();
tasks.add(newJobVertex1);
tasks.add(newJobVertex2);
CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(newGraph.getJobID(), 2, System.currentTimeMillis(), System.currentTimeMillis() + 3000, operatorStates, Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
// set up the coordinator and validate the initial state
SharedStateRegistry sharedStateRegistry = SharedStateRegistry.DEFAULT_FACTORY.create(Executors.directExecutor(), emptyList());
CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(newGraph).setCompletedCheckpointStore(storeFor(sharedStateRegistry, () -> {
}, completedCheckpoint)).setTimer(manuallyTriggeredScheduledExecutor).build();
coord.restoreLatestCheckpointedStateToAll(tasks, true);
for (int i = 0; i < newJobVertex1.getParallelism(); i++) {
final List<OperatorIDPair> operatorIDs = newJobVertex1.getOperatorIDs();
JobManagerTaskRestore taskRestore = newJobVertex1.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
Assert.assertEquals(2L, taskRestore.getRestoreCheckpointId());
TaskStateSnapshot stateSnapshot = taskRestore.getTaskStateSnapshot();
OperatorSubtaskState headOpState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIDs.size() - 1).getGeneratedOperatorID());
assertTrue(headOpState.getManagedKeyedState().isEmpty());
assertTrue(headOpState.getRawKeyedState().isEmpty());
// operator5
{
int operatorIndexInChain = 2;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
assertTrue(opState.getManagedOperatorState().isEmpty());
assertTrue(opState.getRawOperatorState().isEmpty());
}
// operator1
{
int operatorIndexInChain = 1;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
OperatorStateHandle expectedManagedOpState = generatePartitionableStateHandle(id1.f0, i, 2, 8, false);
OperatorStateHandle expectedRawOpState = generatePartitionableStateHandle(id1.f0, i, 2, 8, true);
Collection<OperatorStateHandle> managedOperatorState = opState.getManagedOperatorState();
assertEquals(1, managedOperatorState.size());
assertTrue(CommonTestUtils.isStreamContentEqual(expectedManagedOpState.openInputStream(), managedOperatorState.iterator().next().openInputStream()));
Collection<OperatorStateHandle> rawOperatorState = opState.getRawOperatorState();
assertEquals(1, rawOperatorState.size());
assertTrue(CommonTestUtils.isStreamContentEqual(expectedRawOpState.openInputStream(), rawOperatorState.iterator().next().openInputStream()));
}
// operator2
{
int operatorIndexInChain = 0;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
OperatorStateHandle expectedManagedOpState = generatePartitionableStateHandle(id2.f0, i, 2, 8, false);
OperatorStateHandle expectedRawOpState = generatePartitionableStateHandle(id2.f0, i, 2, 8, true);
Collection<OperatorStateHandle> managedOperatorState = opState.getManagedOperatorState();
assertEquals(1, managedOperatorState.size());
assertTrue(CommonTestUtils.isStreamContentEqual(expectedManagedOpState.openInputStream(), managedOperatorState.iterator().next().openInputStream()));
Collection<OperatorStateHandle> rawOperatorState = opState.getRawOperatorState();
assertEquals(1, rawOperatorState.size());
assertTrue(CommonTestUtils.isStreamContentEqual(expectedRawOpState.openInputStream(), rawOperatorState.iterator().next().openInputStream()));
}
}
List<List<Collection<OperatorStateHandle>>> actualManagedOperatorStates = new ArrayList<>(newJobVertex2.getParallelism());
List<List<Collection<OperatorStateHandle>>> actualRawOperatorStates = new ArrayList<>(newJobVertex2.getParallelism());
for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
final List<OperatorIDPair> operatorIDs = newJobVertex2.getOperatorIDs();
JobManagerTaskRestore taskRestore = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
Assert.assertEquals(2L, taskRestore.getRestoreCheckpointId());
TaskStateSnapshot stateSnapshot = taskRestore.getTaskStateSnapshot();
// operator 3
{
int operatorIndexInChain = 1;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
List<Collection<OperatorStateHandle>> actualSubManagedOperatorState = new ArrayList<>(1);
actualSubManagedOperatorState.add(opState.getManagedOperatorState());
List<Collection<OperatorStateHandle>> actualSubRawOperatorState = new ArrayList<>(1);
actualSubRawOperatorState.add(opState.getRawOperatorState());
actualManagedOperatorStates.add(actualSubManagedOperatorState);
actualRawOperatorStates.add(actualSubRawOperatorState);
}
// operator 6
{
int operatorIndexInChain = 0;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
assertTrue(opState.getManagedOperatorState().isEmpty());
assertTrue(opState.getRawOperatorState().isEmpty());
}
KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(id3.f0, newKeyGroupPartitions2.get(i), false);
KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(id3.f0, newKeyGroupPartitions2.get(i), true);
OperatorSubtaskState headOpState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIDs.size() - 1).getGeneratedOperatorID());
Collection<KeyedStateHandle> keyedStateBackend = headOpState.getManagedKeyedState();
Collection<KeyedStateHandle> keyGroupStateRaw = headOpState.getRawKeyedState();
compareKeyedState(singletonList(originalKeyedStateBackend), keyedStateBackend);
compareKeyedState(singletonList(originalKeyedStateRaw), keyGroupStateRaw);
}
comparePartitionableState(expectedManagedOperatorStates.get(0), actualManagedOperatorStates);
comparePartitionableState(expectedRawOperatorStates.get(0), actualRawOperatorStates);
}
use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.
the class CheckpointCoordinatorTestingUtils method compareKeyedState.
static void compareKeyedState(Collection<KeyGroupsStateHandle> expectPartitionedKeyGroupState, Collection<? extends KeyedStateHandle> actualPartitionedKeyGroupState) throws Exception {
KeyGroupsStateHandle expectedHeadOpKeyGroupStateHandle = expectPartitionedKeyGroupState.iterator().next();
int expectedTotalKeyGroups = expectedHeadOpKeyGroupStateHandle.getKeyGroupRange().getNumberOfKeyGroups();
int actualTotalKeyGroups = 0;
for (KeyedStateHandle keyedStateHandle : actualPartitionedKeyGroupState) {
assertTrue(keyedStateHandle instanceof KeyGroupsStateHandle);
actualTotalKeyGroups += keyedStateHandle.getKeyGroupRange().getNumberOfKeyGroups();
}
assertEquals(expectedTotalKeyGroups, actualTotalKeyGroups);
try (FSDataInputStream inputStream = expectedHeadOpKeyGroupStateHandle.openInputStream()) {
for (int groupId : expectedHeadOpKeyGroupStateHandle.getKeyGroupRange()) {
long offset = expectedHeadOpKeyGroupStateHandle.getOffsetForKeyGroup(groupId);
inputStream.seek(offset);
int expectedKeyGroupState = InstantiationUtil.deserializeObject(inputStream, Thread.currentThread().getContextClassLoader());
for (KeyedStateHandle oneActualKeyedStateHandle : actualPartitionedKeyGroupState) {
assertTrue(oneActualKeyedStateHandle instanceof KeyGroupsStateHandle);
KeyGroupsStateHandle oneActualKeyGroupStateHandle = (KeyGroupsStateHandle) oneActualKeyedStateHandle;
if (oneActualKeyGroupStateHandle.getKeyGroupRange().contains(groupId)) {
long actualOffset = oneActualKeyGroupStateHandle.getOffsetForKeyGroup(groupId);
try (FSDataInputStream actualInputStream = oneActualKeyGroupStateHandle.openInputStream()) {
actualInputStream.seek(actualOffset);
int actualGroupState = InstantiationUtil.deserializeObject(actualInputStream, Thread.currentThread().getContextClassLoader());
assertEquals(expectedKeyGroupState, actualGroupState);
}
}
}
}
}
}
use of org.apache.flink.runtime.state.KeyedStateHandle in project flink by apache.
the class MetadataV2V3SerializerBase method deserializeSubtaskState.
protected OperatorSubtaskState deserializeSubtaskState(DataInputStream dis, @Nullable DeserializationContext context) throws IOException {
final OperatorSubtaskState.Builder state = OperatorSubtaskState.builder();
final boolean hasManagedOperatorState = dis.readInt() != 0;
if (hasManagedOperatorState) {
state.setManagedOperatorState(deserializeOperatorStateHandle(dis, context));
}
final boolean hasRawOperatorState = dis.readInt() != 0;
if (hasRawOperatorState) {
state.setRawOperatorState(deserializeOperatorStateHandle(dis, context));
}
final KeyedStateHandle managedKeyedState = deserializeKeyedStateHandle(dis, context);
if (managedKeyedState != null) {
state.setManagedKeyedState(managedKeyedState);
}
final KeyedStateHandle rawKeyedState = deserializeKeyedStateHandle(dis, context);
if (rawKeyedState != null) {
state.setRawKeyedState(rawKeyedState);
}
state.setInputChannelState(deserializeInputChannelStateHandle(dis, context));
state.setResultSubpartitionState(deserializeResultSubpartitionStateHandle(dis, context));
return state.build();
}
Aggregations