use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedStateWithChangingParallelism.
/**
* Tests the checkpoint restoration with changing parallelism of job vertex with partitioned
* state.
*
* @throws Exception
*/
private void testRestoreLatestCheckpointedStateWithChangingParallelism(boolean scaleOut) throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = scaleOut ? 2 : 13;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
int newParallelism2 = scaleOut ? 13 : 2;
final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// trigger the checkpoint
coord.triggerCheckpoint(timestamp, false);
assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
//vertex 1
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), true);
SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, opStateBackend, null, keyedStateBackend, keyedStateRaw);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
//vertex 2
final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesBackend = new ArrayList<>(jobVertex2.getParallelism());
final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesRaw = new ArrayList<>(jobVertex2.getParallelism());
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), true);
ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
ChainedStateHandle<OperatorStateHandle> opStateRaw = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, true);
expectedOpStatesBackend.add(opStateBackend);
expectedOpStatesRaw.add(opStateRaw);
SubtaskState checkpointStateHandles = new SubtaskState(new ChainedStateHandle<>(Collections.<StreamStateHandle>singletonList(null)), opStateBackend, opStateRaw, keyedStateBackend, keyedStateRaw);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
// rescale vertex 2
final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
tasks.put(jobVertexID1, newJobVertex1);
tasks.put(jobVertexID2, newJobVertex2);
coord.restoreLatestCheckpointedState(tasks, true, false);
// verify the restored state
verifyStateRestore(jobVertexID1, newJobVertex1, keyGroupPartitions1);
List<List<Collection<OperatorStateHandle>>> actualOpStatesBackend = new ArrayList<>(newJobVertex2.getParallelism());
List<List<Collection<OperatorStateHandle>>> actualOpStatesRaw = new ArrayList<>(newJobVertex2.getParallelism());
for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), false);
KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), true);
TaskStateHandles taskStateHandles = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskStateHandles();
ChainedStateHandle<StreamStateHandle> operatorState = taskStateHandles.getLegacyOperatorState();
List<Collection<OperatorStateHandle>> opStateBackend = taskStateHandles.getManagedOperatorState();
List<Collection<OperatorStateHandle>> opStateRaw = taskStateHandles.getRawOperatorState();
Collection<KeyGroupsStateHandle> keyGroupStateBackend = taskStateHandles.getManagedKeyedState();
Collection<KeyGroupsStateHandle> keyGroupStateRaw = taskStateHandles.getRawKeyedState();
actualOpStatesBackend.add(opStateBackend);
actualOpStatesRaw.add(opStateRaw);
assertNull(operatorState);
compareKeyedState(Collections.singletonList(originalKeyedStateBackend), keyGroupStateBackend);
compareKeyedState(Collections.singletonList(originalKeyedStateRaw), keyGroupStateRaw);
}
comparePartitionableState(expectedOpStatesBackend, actualOpStatesBackend);
comparePartitionableState(expectedOpStatesRaw, actualOpStatesRaw);
}
use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method generateChainedPartitionableStateHandle.
private static ChainedStateHandle<OperatorStateHandle> generateChainedPartitionableStateHandle(Map<String, List<? extends Serializable>> states) throws IOException {
List<List<? extends Serializable>> namedStateSerializables = new ArrayList<>(states.size());
for (Map.Entry<String, List<? extends Serializable>> entry : states.entrySet()) {
namedStateSerializables.add(entry.getValue());
}
Tuple2<byte[], List<long[]>> serializationWithOffsets = serializeTogetherAndTrackOffsets(namedStateSerializables);
Map<String, OperatorStateHandle.StateMetaInfo> offsetsMap = new HashMap<>(states.size());
int idx = 0;
for (Map.Entry<String, List<? extends Serializable>> entry : states.entrySet()) {
offsetsMap.put(entry.getKey(), new OperatorStateHandle.StateMetaInfo(serializationWithOffsets.f1.get(idx), OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
++idx;
}
ByteStreamStateHandle streamStateHandle = new TestByteStreamStateHandleDeepCompare(String.valueOf(UUID.randomUUID()), serializationWithOffsets.f0);
OperatorStateHandle operatorStateHandle = new OperatorStateHandle(offsetsMap, streamStateHandle);
return ChainedStateHandle.wrapSingleHandle(operatorStateHandle);
}
use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method verifyStateRestore.
public static void verifyStateRestore(JobVertexID jobVertexID, ExecutionJobVertex executionJobVertex, List<KeyGroupRange> keyGroupPartitions) throws Exception {
for (int i = 0; i < executionJobVertex.getParallelism(); i++) {
TaskStateHandles taskStateHandles = executionJobVertex.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskStateHandles();
ChainedStateHandle<StreamStateHandle> expectNonPartitionedState = generateStateForVertex(jobVertexID, i);
ChainedStateHandle<StreamStateHandle> actualNonPartitionedState = taskStateHandles.getLegacyOperatorState();
assertTrue(CommonTestUtils.isSteamContentEqual(expectNonPartitionedState.get(0).openInputStream(), actualNonPartitionedState.get(0).openInputStream()));
ChainedStateHandle<OperatorStateHandle> expectedOpStateBackend = generateChainedPartitionableStateHandle(jobVertexID, i, 2, 8, false);
List<Collection<OperatorStateHandle>> actualPartitionableState = taskStateHandles.getManagedOperatorState();
assertTrue(CommonTestUtils.isSteamContentEqual(expectedOpStateBackend.get(0).openInputStream(), actualPartitionableState.get(0).iterator().next().openInputStream()));
KeyGroupsStateHandle expectPartitionedKeyGroupState = generateKeyGroupState(jobVertexID, keyGroupPartitions.get(i), false);
Collection<KeyGroupsStateHandle> actualPartitionedKeyGroupState = taskStateHandles.getManagedKeyedState();
compareKeyedState(Collections.singletonList(expectPartitionedKeyGroupState), actualPartitionedKeyGroupState);
}
}
use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method doTestPartitionableStateRepartitioning.
private void doTestPartitionableStateRepartitioning(Random r, int oldParallelism, int newParallelism, int numNamedStates, int maxPartitionsPerState) {
List<OperatorStateHandle> previousParallelOpInstanceStates = new ArrayList<>(oldParallelism);
for (int i = 0; i < oldParallelism; ++i) {
Path fakePath = new Path("/fake-" + i);
Map<String, OperatorStateHandle.StateMetaInfo> namedStatesToOffsets = new HashMap<>();
int off = 0;
for (int s = 0; s < numNamedStates; ++s) {
long[] offs = new long[1 + r.nextInt(maxPartitionsPerState)];
for (int o = 0; o < offs.length; ++o) {
offs[o] = off;
++off;
}
OperatorStateHandle.Mode mode = r.nextInt(10) == 0 ? OperatorStateHandle.Mode.BROADCAST : OperatorStateHandle.Mode.SPLIT_DISTRIBUTE;
namedStatesToOffsets.put("State-" + s, new OperatorStateHandle.StateMetaInfo(offs, mode));
}
previousParallelOpInstanceStates.add(new OperatorStateHandle(namedStatesToOffsets, new FileStateHandle(fakePath, -1)));
}
Map<StreamStateHandle, Map<String, List<Long>>> expected = new HashMap<>();
int expectedTotalPartitions = 0;
for (OperatorStateHandle psh : previousParallelOpInstanceStates) {
Map<String, OperatorStateHandle.StateMetaInfo> offsMap = psh.getStateNameToPartitionOffsets();
Map<String, List<Long>> offsMapWithList = new HashMap<>(offsMap.size());
for (Map.Entry<String, OperatorStateHandle.StateMetaInfo> e : offsMap.entrySet()) {
long[] offs = e.getValue().getOffsets();
int replication = e.getValue().getDistributionMode().equals(OperatorStateHandle.Mode.BROADCAST) ? newParallelism : 1;
expectedTotalPartitions += replication * offs.length;
List<Long> offsList = new ArrayList<>(offs.length);
for (int i = 0; i < offs.length; ++i) {
for (int p = 0; p < replication; ++p) {
offsList.add(offs[i]);
}
}
offsMapWithList.put(e.getKey(), offsList);
}
expected.put(psh.getDelegateStateHandle(), offsMapWithList);
}
OperatorStateRepartitioner repartitioner = RoundRobinOperatorStateRepartitioner.INSTANCE;
List<Collection<OperatorStateHandle>> pshs = repartitioner.repartitionState(previousParallelOpInstanceStates, newParallelism);
Map<StreamStateHandle, Map<String, List<Long>>> actual = new HashMap<>();
int minCount = Integer.MAX_VALUE;
int maxCount = 0;
int actualTotalPartitions = 0;
for (int p = 0; p < newParallelism; ++p) {
int partitionCount = 0;
Collection<OperatorStateHandle> pshc = pshs.get(p);
for (OperatorStateHandle sh : pshc) {
for (Map.Entry<String, OperatorStateHandle.StateMetaInfo> namedState : sh.getStateNameToPartitionOffsets().entrySet()) {
Map<String, List<Long>> stateToOffsets = actual.get(sh.getDelegateStateHandle());
if (stateToOffsets == null) {
stateToOffsets = new HashMap<>();
actual.put(sh.getDelegateStateHandle(), stateToOffsets);
}
List<Long> actualOffs = stateToOffsets.get(namedState.getKey());
if (actualOffs == null) {
actualOffs = new ArrayList<>();
stateToOffsets.put(namedState.getKey(), actualOffs);
}
long[] add = namedState.getValue().getOffsets();
for (int i = 0; i < add.length; ++i) {
actualOffs.add(add[i]);
}
partitionCount += namedState.getValue().getOffsets().length;
}
}
minCount = Math.min(minCount, partitionCount);
maxCount = Math.max(maxCount, partitionCount);
actualTotalPartitions += partitionCount;
}
for (Map<String, List<Long>> v : actual.values()) {
for (List<Long> l : v.values()) {
Collections.sort(l);
}
}
int maxLoadDiff = maxCount - minCount;
Assert.assertTrue("Difference in partition load is > 1 : " + maxLoadDiff, maxLoadDiff <= 1);
Assert.assertEquals(expectedTotalPartitions, actualTotalPartitions);
Assert.assertEquals(expected, actual);
}
use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedState.
/**
* Tests that the checkpointed partitioned and non-partitioned state is assigned properly to
* the {@link Execution} upon recovery.
*
* @throws Exception
*/
@Test
public void testRestoreLatestCheckpointedState() throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = 2;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// trigger the checkpoint
coord.triggerCheckpoint(timestamp, false);
assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> nonPartitionedState = generateStateForVertex(jobVertexID1, index);
ChainedStateHandle<OperatorStateHandle> partitionableState = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
KeyGroupsStateHandle partitionedKeyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(nonPartitionedState, partitionableState, null, partitionedKeyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> nonPartitionedState = generateStateForVertex(jobVertexID2, index);
ChainedStateHandle<OperatorStateHandle> partitionableState = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
KeyGroupsStateHandle partitionedKeyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(nonPartitionedState, partitionableState, null, partitionedKeyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
tasks.put(jobVertexID1, jobVertex1);
tasks.put(jobVertexID2, jobVertex2);
coord.restoreLatestCheckpointedState(tasks, true, false);
// verify the restored state
verifyStateRestore(jobVertexID1, jobVertex1, keyGroupPartitions1);
verifyStateRestore(jobVertexID2, jobVertex2, keyGroupPartitions2);
}
Aggregations