use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.
the class CheckpointCoordinatorRestoringTest method testStateRecoveryWithTopologyChange.
/**
* old topology. [operator1,operator2] * parallelism1 -> [operator3,operator4] * parallelism2
*
* <p>new topology
*
* <p>[operator5,operator1,operator3] * newParallelism1 -> [operator3, operator6] *
* newParallelism2
*/
public void testStateRecoveryWithTopologyChange(TestScaleType scaleType) throws Exception {
/*
* Old topology
* CHAIN(op1 -> op2) * parallelism1 -> CHAIN(op3 -> op4) * parallelism2
*/
Tuple2<JobVertexID, OperatorID> id1 = generateIDPair();
Tuple2<JobVertexID, OperatorID> id2 = generateIDPair();
int parallelism1 = 10;
int maxParallelism1 = 64;
Tuple2<JobVertexID, OperatorID> id3 = generateIDPair();
Tuple2<JobVertexID, OperatorID> id4 = generateIDPair();
int parallelism2 = 10;
int maxParallelism2 = 64;
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
Map<OperatorID, OperatorState> operatorStates = new HashMap<>();
// prepare vertex1 state
for (Tuple2<JobVertexID, OperatorID> id : Arrays.asList(id1, id2)) {
OperatorState taskState = new OperatorState(id.f1, parallelism1, maxParallelism1);
operatorStates.put(id.f1, taskState);
for (int index = 0; index < taskState.getParallelism(); index++) {
OperatorSubtaskState subtaskState = OperatorSubtaskState.builder().setManagedOperatorState(generatePartitionableStateHandle(id.f0, index, 2, 8, false)).setRawOperatorState(generatePartitionableStateHandle(id.f0, index, 2, 8, true)).build();
taskState.putState(index, subtaskState);
}
}
List<List<ChainedStateHandle<OperatorStateHandle>>> expectedManagedOperatorStates = new ArrayList<>();
List<List<ChainedStateHandle<OperatorStateHandle>>> expectedRawOperatorStates = new ArrayList<>();
// prepare vertex2 state
for (Tuple2<JobVertexID, OperatorID> id : Arrays.asList(id3, id4)) {
OperatorState operatorState = new OperatorState(id.f1, parallelism2, maxParallelism2);
operatorStates.put(id.f1, operatorState);
List<ChainedStateHandle<OperatorStateHandle>> expectedManagedOperatorState = new ArrayList<>();
List<ChainedStateHandle<OperatorStateHandle>> expectedRawOperatorState = new ArrayList<>();
expectedManagedOperatorStates.add(expectedManagedOperatorState);
expectedRawOperatorStates.add(expectedRawOperatorState);
for (int index = 0; index < operatorState.getParallelism(); index++) {
final OperatorSubtaskState.Builder stateBuilder = OperatorSubtaskState.builder();
OperatorStateHandle subManagedOperatorState = generateChainedPartitionableStateHandle(id.f0, index, 2, 8, false).get(0);
OperatorStateHandle subRawOperatorState = generateChainedPartitionableStateHandle(id.f0, index, 2, 8, true).get(0);
if (id.f0.equals(id3.f0)) {
stateBuilder.setManagedKeyedState(generateKeyGroupState(id.f0, keyGroupPartitions2.get(index), false));
}
if (id.f0.equals(id3.f0)) {
stateBuilder.setRawKeyedState(generateKeyGroupState(id.f0, keyGroupPartitions2.get(index), true));
}
expectedManagedOperatorState.add(ChainedStateHandle.wrapSingleHandle(subManagedOperatorState));
expectedRawOperatorState.add(ChainedStateHandle.wrapSingleHandle(subRawOperatorState));
OperatorSubtaskState subtaskState = stateBuilder.setManagedOperatorState(subManagedOperatorState).setRawOperatorState(subRawOperatorState).build();
operatorState.putState(index, subtaskState);
}
}
/*
* New topology
* CHAIN(op5 -> op1 -> op2) * newParallelism1 -> CHAIN(op3 -> op6) * newParallelism2
*/
Tuple2<JobVertexID, OperatorID> id5 = generateIDPair();
int newParallelism1 = 10;
Tuple2<JobVertexID, OperatorID> id6 = generateIDPair();
int newParallelism2 = parallelism2;
if (scaleType == TestScaleType.INCREASE_PARALLELISM) {
newParallelism2 = 20;
} else if (scaleType == TestScaleType.DECREASE_PARALLELISM) {
newParallelism2 = 8;
}
List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
ExecutionGraph newGraph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(id5.f0, newParallelism1, maxParallelism1, Stream.of(id2.f1, id1.f1, id5.f1).map(OperatorIDPair::generatedIDOnly).collect(Collectors.toList()), true).addJobVertex(id3.f0, newParallelism2, maxParallelism2, Stream.of(id6.f1, id3.f1).map(OperatorIDPair::generatedIDOnly).collect(Collectors.toList()), true).build();
ExecutionJobVertex newJobVertex1 = newGraph.getJobVertex(id5.f0);
ExecutionJobVertex newJobVertex2 = newGraph.getJobVertex(id3.f0);
Set<ExecutionJobVertex> tasks = new HashSet<>();
tasks.add(newJobVertex1);
tasks.add(newJobVertex2);
CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(newGraph.getJobID(), 2, System.currentTimeMillis(), System.currentTimeMillis() + 3000, operatorStates, Collections.<MasterState>emptyList(), CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
// set up the coordinator and validate the initial state
SharedStateRegistry sharedStateRegistry = SharedStateRegistry.DEFAULT_FACTORY.create(Executors.directExecutor(), emptyList());
CheckpointCoordinator coord = new CheckpointCoordinatorBuilder().setExecutionGraph(newGraph).setCompletedCheckpointStore(storeFor(sharedStateRegistry, () -> {
}, completedCheckpoint)).setTimer(manuallyTriggeredScheduledExecutor).build();
coord.restoreLatestCheckpointedStateToAll(tasks, true);
for (int i = 0; i < newJobVertex1.getParallelism(); i++) {
final List<OperatorIDPair> operatorIDs = newJobVertex1.getOperatorIDs();
JobManagerTaskRestore taskRestore = newJobVertex1.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
Assert.assertEquals(2L, taskRestore.getRestoreCheckpointId());
TaskStateSnapshot stateSnapshot = taskRestore.getTaskStateSnapshot();
OperatorSubtaskState headOpState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIDs.size() - 1).getGeneratedOperatorID());
assertTrue(headOpState.getManagedKeyedState().isEmpty());
assertTrue(headOpState.getRawKeyedState().isEmpty());
// operator5
{
int operatorIndexInChain = 2;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
assertTrue(opState.getManagedOperatorState().isEmpty());
assertTrue(opState.getRawOperatorState().isEmpty());
}
// operator1
{
int operatorIndexInChain = 1;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
OperatorStateHandle expectedManagedOpState = generatePartitionableStateHandle(id1.f0, i, 2, 8, false);
OperatorStateHandle expectedRawOpState = generatePartitionableStateHandle(id1.f0, i, 2, 8, true);
Collection<OperatorStateHandle> managedOperatorState = opState.getManagedOperatorState();
assertEquals(1, managedOperatorState.size());
assertTrue(CommonTestUtils.isStreamContentEqual(expectedManagedOpState.openInputStream(), managedOperatorState.iterator().next().openInputStream()));
Collection<OperatorStateHandle> rawOperatorState = opState.getRawOperatorState();
assertEquals(1, rawOperatorState.size());
assertTrue(CommonTestUtils.isStreamContentEqual(expectedRawOpState.openInputStream(), rawOperatorState.iterator().next().openInputStream()));
}
// operator2
{
int operatorIndexInChain = 0;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
OperatorStateHandle expectedManagedOpState = generatePartitionableStateHandle(id2.f0, i, 2, 8, false);
OperatorStateHandle expectedRawOpState = generatePartitionableStateHandle(id2.f0, i, 2, 8, true);
Collection<OperatorStateHandle> managedOperatorState = opState.getManagedOperatorState();
assertEquals(1, managedOperatorState.size());
assertTrue(CommonTestUtils.isStreamContentEqual(expectedManagedOpState.openInputStream(), managedOperatorState.iterator().next().openInputStream()));
Collection<OperatorStateHandle> rawOperatorState = opState.getRawOperatorState();
assertEquals(1, rawOperatorState.size());
assertTrue(CommonTestUtils.isStreamContentEqual(expectedRawOpState.openInputStream(), rawOperatorState.iterator().next().openInputStream()));
}
}
List<List<Collection<OperatorStateHandle>>> actualManagedOperatorStates = new ArrayList<>(newJobVertex2.getParallelism());
List<List<Collection<OperatorStateHandle>>> actualRawOperatorStates = new ArrayList<>(newJobVertex2.getParallelism());
for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
final List<OperatorIDPair> operatorIDs = newJobVertex2.getOperatorIDs();
JobManagerTaskRestore taskRestore = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskRestore();
Assert.assertEquals(2L, taskRestore.getRestoreCheckpointId());
TaskStateSnapshot stateSnapshot = taskRestore.getTaskStateSnapshot();
// operator 3
{
int operatorIndexInChain = 1;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
List<Collection<OperatorStateHandle>> actualSubManagedOperatorState = new ArrayList<>(1);
actualSubManagedOperatorState.add(opState.getManagedOperatorState());
List<Collection<OperatorStateHandle>> actualSubRawOperatorState = new ArrayList<>(1);
actualSubRawOperatorState.add(opState.getRawOperatorState());
actualManagedOperatorStates.add(actualSubManagedOperatorState);
actualRawOperatorStates.add(actualSubRawOperatorState);
}
// operator 6
{
int operatorIndexInChain = 0;
OperatorSubtaskState opState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIndexInChain).getGeneratedOperatorID());
assertTrue(opState.getManagedOperatorState().isEmpty());
assertTrue(opState.getRawOperatorState().isEmpty());
}
KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(id3.f0, newKeyGroupPartitions2.get(i), false);
KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(id3.f0, newKeyGroupPartitions2.get(i), true);
OperatorSubtaskState headOpState = stateSnapshot.getSubtaskStateByOperatorID(operatorIDs.get(operatorIDs.size() - 1).getGeneratedOperatorID());
Collection<KeyedStateHandle> keyedStateBackend = headOpState.getManagedKeyedState();
Collection<KeyedStateHandle> keyGroupStateRaw = headOpState.getRawKeyedState();
compareKeyedState(singletonList(originalKeyedStateBackend), keyedStateBackend);
compareKeyedState(singletonList(originalKeyedStateRaw), keyGroupStateRaw);
}
comparePartitionableState(expectedManagedOperatorStates.get(0), actualManagedOperatorStates);
comparePartitionableState(expectedRawOperatorStates.get(0), actualRawOperatorStates);
}
use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.
the class CompletedCheckpointStoreTest method testGetAllCheckpoints.
/**
* Tests that all added checkpoints are returned.
*/
@Test
public void testGetAllCheckpoints() throws Exception {
SharedStateRegistry sharedStateRegistry = new SharedStateRegistryImpl();
CompletedCheckpointStore checkpoints = createRecoveredCompletedCheckpointStore(4);
TestCompletedCheckpoint[] expected = new TestCompletedCheckpoint[] { createCheckpoint(0, sharedStateRegistry), createCheckpoint(1, sharedStateRegistry), createCheckpoint(2, sharedStateRegistry), createCheckpoint(3, sharedStateRegistry) };
for (TestCompletedCheckpoint checkpoint : expected) {
checkpoints.addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
});
}
List<CompletedCheckpoint> actual = checkpoints.getAllCheckpoints();
assertEquals(expected.length, actual.size());
for (int i = 0; i < expected.length; i++) {
assertEquals(expected[i], actual.get(i));
}
}
use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.
the class CompletedCheckpointStoreTest method testDiscardAllCheckpoints.
/**
* Tests that all checkpoints are discarded (using the correct class loader).
*/
@Test
public void testDiscardAllCheckpoints() throws Exception {
SharedStateRegistry sharedStateRegistry = new SharedStateRegistryImpl();
CompletedCheckpointStore checkpoints = createRecoveredCompletedCheckpointStore(4);
TestCompletedCheckpoint[] expected = new TestCompletedCheckpoint[] { createCheckpoint(0, sharedStateRegistry), createCheckpoint(1, sharedStateRegistry), createCheckpoint(2, sharedStateRegistry), createCheckpoint(3, sharedStateRegistry) };
for (TestCompletedCheckpoint checkpoint : expected) {
checkpoints.addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
});
}
checkpoints.shutdown(JobStatus.FINISHED, new CheckpointsCleaner());
// Empty state
assertNull(checkpoints.getLatestCheckpoint());
assertEquals(0, checkpoints.getAllCheckpoints().size());
assertEquals(0, checkpoints.getNumberOfRetainedCheckpoints());
// All have been discarded
for (TestCompletedCheckpoint checkpoint : expected) {
// The ZooKeeper implementation discards asynchronously
checkpoint.awaitDiscard();
assertTrue(checkpoint.isDiscarded());
}
}
use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.
the class ZooKeeperCompletedCheckpointStoreTest method testDiscardingCheckpointsAtShutDown.
/**
* Tests that checkpoints are discarded when the completed checkpoint store is shut down with a
* globally terminal state.
*/
@Test
public void testDiscardingCheckpointsAtShutDown() throws Exception {
final SharedStateRegistry sharedStateRegistry = new SharedStateRegistryImpl();
final Configuration configuration = new Configuration();
configuration.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());
final CuratorFrameworkWithUnhandledErrorListener curatorFrameworkWrapper = ZooKeeperUtils.startCuratorFramework(configuration, NoOpFatalErrorHandler.INSTANCE);
final CompletedCheckpointStore checkpointStore = createZooKeeperCheckpointStore(curatorFrameworkWrapper.asCuratorFramework());
try {
final CompletedCheckpointStoreTest.TestCompletedCheckpoint checkpoint1 = CompletedCheckpointStoreTest.createCheckpoint(0, sharedStateRegistry);
checkpointStore.addCheckpointAndSubsumeOldestOne(checkpoint1, new CheckpointsCleaner(), () -> {
});
assertThat(checkpointStore.getAllCheckpoints(), Matchers.contains(checkpoint1));
checkpointStore.shutdown(JobStatus.FINISHED, new CheckpointsCleaner());
// verify that the checkpoint is discarded
CompletedCheckpointStoreTest.verifyCheckpointDiscarded(checkpoint1);
} finally {
curatorFrameworkWrapper.close();
}
}
use of org.apache.flink.runtime.state.SharedStateRegistry in project flink by apache.
the class ZooKeeperCompletedCheckpointStoreITCase method testRecover.
// ---------------------------------------------------------------------------------------------
/**
* Tests that older checkpoints are not cleaned up right away when recovering. Only after
* another checkpoint has been completed the old checkpoints exceeding the number of checkpoints
* to retain will be removed.
*/
@Test
public void testRecover() throws Exception {
SharedStateRegistry sharedStateRegistry = new SharedStateRegistryImpl();
CompletedCheckpointStore checkpoints = createRecoveredCompletedCheckpointStore(3);
TestCompletedCheckpoint[] expected = new TestCompletedCheckpoint[] { createCheckpoint(0, sharedStateRegistry), createCheckpoint(1, sharedStateRegistry), createCheckpoint(2, sharedStateRegistry) };
// Add multiple checkpoints
checkpoints.addCheckpointAndSubsumeOldestOne(expected[0], new CheckpointsCleaner(), () -> {
});
checkpoints.addCheckpointAndSubsumeOldestOne(expected[1], new CheckpointsCleaner(), () -> {
});
checkpoints.addCheckpointAndSubsumeOldestOne(expected[2], new CheckpointsCleaner(), () -> {
});
verifyCheckpointRegistered(expected[0].getOperatorStates().values(), sharedStateRegistry);
verifyCheckpointRegistered(expected[1].getOperatorStates().values(), sharedStateRegistry);
verifyCheckpointRegistered(expected[2].getOperatorStates().values(), sharedStateRegistry);
// All three should be in ZK
assertEquals(3, ZOOKEEPER.getClient().getChildren().forPath(CHECKPOINT_PATH).size());
assertEquals(3, checkpoints.getNumberOfRetainedCheckpoints());
// Recover
sharedStateRegistry.close();
sharedStateRegistry = new SharedStateRegistryImpl();
assertEquals(3, ZOOKEEPER.getClient().getChildren().forPath(CHECKPOINT_PATH).size());
assertEquals(3, checkpoints.getNumberOfRetainedCheckpoints());
assertEquals(expected[2], checkpoints.getLatestCheckpoint());
List<CompletedCheckpoint> expectedCheckpoints = new ArrayList<>(3);
expectedCheckpoints.add(expected[1]);
expectedCheckpoints.add(expected[2]);
expectedCheckpoints.add(createCheckpoint(3, sharedStateRegistry));
checkpoints.addCheckpointAndSubsumeOldestOne(expectedCheckpoints.get(2), new CheckpointsCleaner(), () -> {
});
List<CompletedCheckpoint> actualCheckpoints = checkpoints.getAllCheckpoints();
assertEquals(expectedCheckpoints, actualCheckpoints);
for (CompletedCheckpoint actualCheckpoint : actualCheckpoints) {
verifyCheckpointRegistered(actualCheckpoint.getOperatorStates().values(), sharedStateRegistry);
}
}
Aggregations