use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method verifyStateRestore.
public static void verifyStateRestore(JobVertexID jobVertexID, ExecutionJobVertex executionJobVertex, List<KeyGroupRange> keyGroupPartitions) throws Exception {
for (int i = 0; i < executionJobVertex.getParallelism(); i++) {
TaskStateHandles taskStateHandles = executionJobVertex.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskStateHandles();
ChainedStateHandle<StreamStateHandle> expectNonPartitionedState = generateStateForVertex(jobVertexID, i);
ChainedStateHandle<StreamStateHandle> actualNonPartitionedState = taskStateHandles.getLegacyOperatorState();
assertTrue(CommonTestUtils.isSteamContentEqual(expectNonPartitionedState.get(0).openInputStream(), actualNonPartitionedState.get(0).openInputStream()));
ChainedStateHandle<OperatorStateHandle> expectedOpStateBackend = generateChainedPartitionableStateHandle(jobVertexID, i, 2, 8, false);
List<Collection<OperatorStateHandle>> actualPartitionableState = taskStateHandles.getManagedOperatorState();
assertTrue(CommonTestUtils.isSteamContentEqual(expectedOpStateBackend.get(0).openInputStream(), actualPartitionableState.get(0).iterator().next().openInputStream()));
KeyGroupsStateHandle expectPartitionedKeyGroupState = generateKeyGroupState(jobVertexID, keyGroupPartitions.get(i), false);
Collection<KeyGroupsStateHandle> actualPartitionedKeyGroupState = taskStateHandles.getManagedKeyedState();
compareKeyedState(Collections.singletonList(expectPartitionedKeyGroupState), actualPartitionedKeyGroupState);
}
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method doTestPartitionableStateRepartitioning.
private void doTestPartitionableStateRepartitioning(Random r, int oldParallelism, int newParallelism, int numNamedStates, int maxPartitionsPerState) {
List<OperatorStateHandle> previousParallelOpInstanceStates = new ArrayList<>(oldParallelism);
for (int i = 0; i < oldParallelism; ++i) {
Path fakePath = new Path("/fake-" + i);
Map<String, OperatorStateHandle.StateMetaInfo> namedStatesToOffsets = new HashMap<>();
int off = 0;
for (int s = 0; s < numNamedStates; ++s) {
long[] offs = new long[1 + r.nextInt(maxPartitionsPerState)];
for (int o = 0; o < offs.length; ++o) {
offs[o] = off;
++off;
}
OperatorStateHandle.Mode mode = r.nextInt(10) == 0 ? OperatorStateHandle.Mode.BROADCAST : OperatorStateHandle.Mode.SPLIT_DISTRIBUTE;
namedStatesToOffsets.put("State-" + s, new OperatorStateHandle.StateMetaInfo(offs, mode));
}
previousParallelOpInstanceStates.add(new OperatorStateHandle(namedStatesToOffsets, new FileStateHandle(fakePath, -1)));
}
Map<StreamStateHandle, Map<String, List<Long>>> expected = new HashMap<>();
int expectedTotalPartitions = 0;
for (OperatorStateHandle psh : previousParallelOpInstanceStates) {
Map<String, OperatorStateHandle.StateMetaInfo> offsMap = psh.getStateNameToPartitionOffsets();
Map<String, List<Long>> offsMapWithList = new HashMap<>(offsMap.size());
for (Map.Entry<String, OperatorStateHandle.StateMetaInfo> e : offsMap.entrySet()) {
long[] offs = e.getValue().getOffsets();
int replication = e.getValue().getDistributionMode().equals(OperatorStateHandle.Mode.BROADCAST) ? newParallelism : 1;
expectedTotalPartitions += replication * offs.length;
List<Long> offsList = new ArrayList<>(offs.length);
for (int i = 0; i < offs.length; ++i) {
for (int p = 0; p < replication; ++p) {
offsList.add(offs[i]);
}
}
offsMapWithList.put(e.getKey(), offsList);
}
expected.put(psh.getDelegateStateHandle(), offsMapWithList);
}
OperatorStateRepartitioner repartitioner = RoundRobinOperatorStateRepartitioner.INSTANCE;
List<Collection<OperatorStateHandle>> pshs = repartitioner.repartitionState(previousParallelOpInstanceStates, newParallelism);
Map<StreamStateHandle, Map<String, List<Long>>> actual = new HashMap<>();
int minCount = Integer.MAX_VALUE;
int maxCount = 0;
int actualTotalPartitions = 0;
for (int p = 0; p < newParallelism; ++p) {
int partitionCount = 0;
Collection<OperatorStateHandle> pshc = pshs.get(p);
for (OperatorStateHandle sh : pshc) {
for (Map.Entry<String, OperatorStateHandle.StateMetaInfo> namedState : sh.getStateNameToPartitionOffsets().entrySet()) {
Map<String, List<Long>> stateToOffsets = actual.get(sh.getDelegateStateHandle());
if (stateToOffsets == null) {
stateToOffsets = new HashMap<>();
actual.put(sh.getDelegateStateHandle(), stateToOffsets);
}
List<Long> actualOffs = stateToOffsets.get(namedState.getKey());
if (actualOffs == null) {
actualOffs = new ArrayList<>();
stateToOffsets.put(namedState.getKey(), actualOffs);
}
long[] add = namedState.getValue().getOffsets();
for (int i = 0; i < add.length; ++i) {
actualOffs.add(add[i]);
}
partitionCount += namedState.getValue().getOffsets().length;
}
}
minCount = Math.min(minCount, partitionCount);
maxCount = Math.max(maxCount, partitionCount);
actualTotalPartitions += partitionCount;
}
for (Map<String, List<Long>> v : actual.values()) {
for (List<Long> l : v.values()) {
Collections.sort(l);
}
}
int maxLoadDiff = maxCount - minCount;
Assert.assertTrue("Difference in partition load is > 1 : " + maxLoadDiff, maxLoadDiff <= 1);
Assert.assertEquals(expectedTotalPartitions, actualTotalPartitions);
Assert.assertEquals(expected, actual);
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedState.
/**
* Tests that the checkpointed partitioned and non-partitioned state is assigned properly to
* the {@link Execution} upon recovery.
*
* @throws Exception
*/
@Test
public void testRestoreLatestCheckpointedState() throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = 2;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// trigger the checkpoint
coord.triggerCheckpoint(timestamp, false);
assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> nonPartitionedState = generateStateForVertex(jobVertexID1, index);
ChainedStateHandle<OperatorStateHandle> partitionableState = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
KeyGroupsStateHandle partitionedKeyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(nonPartitionedState, partitionableState, null, partitionedKeyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> nonPartitionedState = generateStateForVertex(jobVertexID2, index);
ChainedStateHandle<OperatorStateHandle> partitionableState = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
KeyGroupsStateHandle partitionedKeyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(nonPartitionedState, partitionableState, null, partitionedKeyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
tasks.put(jobVertexID1, jobVertex1);
tasks.put(jobVertexID2, jobVertex2);
coord.restoreLatestCheckpointedState(tasks, true, false);
// verify the restored state
verifyStateRestore(jobVertexID1, jobVertex1, keyGroupPartitions1);
verifyStateRestore(jobVertexID2, jobVertex2, keyGroupPartitions2);
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class CheckpointStateRestoreTest method testNonRestoredState.
/**
* Tests that the allow non restored state flag is correctly handled.
*
* The flag only applies for state that is part of the checkpoint.
*/
@Test
public void testNonRestoredState() throws Exception {
// --- (1) Create tasks to restore checkpoint with ---
JobVertexID jobVertexId1 = new JobVertexID();
JobVertexID jobVertexId2 = new JobVertexID();
// 1st JobVertex
ExecutionVertex vertex11 = mockExecutionVertex(mockExecution(), jobVertexId1, 0, 3);
ExecutionVertex vertex12 = mockExecutionVertex(mockExecution(), jobVertexId1, 1, 3);
ExecutionVertex vertex13 = mockExecutionVertex(mockExecution(), jobVertexId1, 2, 3);
// 2nd JobVertex
ExecutionVertex vertex21 = mockExecutionVertex(mockExecution(), jobVertexId2, 0, 2);
ExecutionVertex vertex22 = mockExecutionVertex(mockExecution(), jobVertexId2, 1, 2);
ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexId1, new ExecutionVertex[] { vertex11, vertex12, vertex13 });
ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexId2, new ExecutionVertex[] { vertex21, vertex22 });
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
tasks.put(jobVertexId1, jobVertex1);
tasks.put(jobVertexId2, jobVertex2);
CheckpointCoordinator coord = new CheckpointCoordinator(new JobID(), Integer.MAX_VALUE, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] {}, new ExecutionVertex[] {}, new ExecutionVertex[] {}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
ChainedStateHandle<StreamStateHandle> serializedState = CheckpointCoordinatorTest.generateChainedStateHandle(new SerializableObject());
// --- (2) Checkpoint misses state for a jobVertex (should work) ---
Map<JobVertexID, TaskState> checkpointTaskStates = new HashMap<>();
{
TaskState taskState = new TaskState(jobVertexId1, 3, 3, 1);
taskState.putState(0, new SubtaskState(serializedState, null, null, null, null));
taskState.putState(1, new SubtaskState(serializedState, null, null, null, null));
taskState.putState(2, new SubtaskState(serializedState, null, null, null, null));
checkpointTaskStates.put(jobVertexId1, taskState);
}
CompletedCheckpoint checkpoint = new CompletedCheckpoint(new JobID(), 0, 1, 2, new HashMap<>(checkpointTaskStates));
coord.getCheckpointStore().addCheckpoint(checkpoint);
coord.restoreLatestCheckpointedState(tasks, true, false);
coord.restoreLatestCheckpointedState(tasks, true, true);
// --- (3) JobVertex missing for task state that is part of the checkpoint ---
JobVertexID newJobVertexID = new JobVertexID();
// There is no task for this
{
TaskState taskState = new TaskState(jobVertexId1, 1, 1, 1);
taskState.putState(0, new SubtaskState(serializedState, null, null, null, null));
checkpointTaskStates.put(newJobVertexID, taskState);
}
checkpoint = new CompletedCheckpoint(new JobID(), 1, 2, 3, new HashMap<>(checkpointTaskStates));
coord.getCheckpointStore().addCheckpoint(checkpoint);
// (i) Allow non restored state (should succeed)
coord.restoreLatestCheckpointedState(tasks, true, true);
// (ii) Don't allow non restored state (should fail)
try {
coord.restoreLatestCheckpointedState(tasks, true, false);
fail("Did not throw the expected Exception.");
} catch (IllegalStateException ignored) {
}
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class MigrationV0ToV1Test method testSavepointMigrationV0ToV1.
/**
* Simple test of savepoint methods.
*/
@Test
public void testSavepointMigrationV0ToV1() throws Exception {
String target = tmp.getRoot().getAbsolutePath();
assertEquals(0, tmp.getRoot().listFiles().length);
long checkpointId = ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE);
int numTaskStates = 4;
int numSubtaskStates = 16;
Collection<org.apache.flink.migration.runtime.checkpoint.TaskState> expected = createTaskStatesOld(numTaskStates, numSubtaskStates);
SavepointV0 savepoint = new SavepointV0(checkpointId, expected);
assertEquals(SavepointV0.VERSION, savepoint.getVersion());
assertEquals(checkpointId, savepoint.getCheckpointId());
assertEquals(expected, savepoint.getOldTaskStates());
assertFalse(savepoint.getOldTaskStates().isEmpty());
Exception latestException = null;
Path path = null;
FSDataOutputStream fdos = null;
FileSystem fs = null;
try {
// Try to create a FS output stream
for (int attempt = 0; attempt < 10; attempt++) {
path = new Path(target, FileUtils.getRandomFilename("savepoint-"));
if (fs == null) {
fs = FileSystem.get(path.toUri());
}
try {
fdos = fs.create(path, false);
break;
} catch (Exception e) {
latestException = e;
}
}
if (fdos == null) {
throw new IOException("Failed to create file output stream at " + path, latestException);
}
try (DataOutputStream dos = new DataOutputStream(fdos)) {
dos.writeInt(SavepointStore.MAGIC_NUMBER);
dos.writeInt(savepoint.getVersion());
SavepointV0Serializer.INSTANCE.serializeOld(savepoint, dos);
}
ClassLoader cl = Thread.currentThread().getContextClassLoader();
Savepoint sp = SavepointStore.loadSavepoint(path.toString(), cl);
int t = 0;
for (TaskState taskState : sp.getTaskStates()) {
for (int p = 0; p < taskState.getParallelism(); ++p) {
SubtaskState subtaskState = taskState.getState(p);
ChainedStateHandle<StreamStateHandle> legacyOperatorState = subtaskState.getLegacyOperatorState();
for (int c = 0; c < legacyOperatorState.getLength(); ++c) {
StreamStateHandle stateHandle = legacyOperatorState.get(c);
try (InputStream is = stateHandle.openInputStream()) {
Tuple4<Integer, Integer, Integer, Integer> expTestState = new Tuple4<>(0, t, p, c);
Tuple4<Integer, Integer, Integer, Integer> actTestState;
//check function state
if (p % 4 != 0) {
assertEquals(1, is.read());
actTestState = InstantiationUtil.deserializeObject(is, cl);
assertEquals(expTestState, actTestState);
} else {
assertEquals(0, is.read());
}
//check operator state
expTestState.f0 = 1;
actTestState = InstantiationUtil.deserializeObject(is, cl);
assertEquals(expTestState, actTestState);
}
}
//check keyed state
KeyGroupsStateHandle keyGroupsStateHandle = subtaskState.getManagedKeyedState();
if (t % 3 != 0) {
assertEquals(1, keyGroupsStateHandle.getNumberOfKeyGroups());
assertEquals(p, keyGroupsStateHandle.getGroupRangeOffsets().getKeyGroupRange().getStartKeyGroup());
ByteStreamStateHandle stateHandle = (ByteStreamStateHandle) keyGroupsStateHandle.getDelegateStateHandle();
HashMap<String, KvStateSnapshot<?, ?, ?, ?>> testKeyedState = MigrationInstantiationUtil.deserializeObject(stateHandle.getData(), cl);
assertEquals(2, testKeyedState.size());
for (KvStateSnapshot<?, ?, ?, ?> snapshot : testKeyedState.values()) {
MemValueState.Snapshot<?, ?, ?> castedSnapshot = (MemValueState.Snapshot<?, ?, ?>) snapshot;
byte[] data = castedSnapshot.getData();
assertEquals(t, data[0]);
assertEquals(p, data[1]);
}
} else {
assertEquals(null, keyGroupsStateHandle);
}
}
++t;
}
savepoint.dispose();
} finally {
// Dispose
SavepointStore.removeSavepointFile(path.toString());
}
}
Aggregations