use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class AccumulatingAlignedProcessingTimeWindowOperatorTest method checkpointRestoreWithPendingWindowSlidingWithProcessFunction.
@Test
public void checkpointRestoreWithPendingWindowSlidingWithProcessFunction() {
try {
final int factor = 4;
final int windowSlide = 50;
final int windowSize = factor * windowSlide;
// sliding window (200 msecs) every 50 msecs
AccumulatingProcessingTimeWindowOperator<Integer, Integer, Integer> op = new AccumulatingProcessingTimeWindowOperator<>(validatingIdentityProcessFunction, identitySelector, IntSerializer.INSTANCE, IntSerializer.INSTANCE, windowSize, windowSlide);
OneInputStreamOperatorTestHarness<Integer, Integer> testHarness = new OneInputStreamOperatorTestHarness<>(op);
testHarness.setProcessingTime(0);
testHarness.setup();
testHarness.open();
// inject some elements
final int numElements = 1000;
final int numElementsFirst = 700;
for (int i = 0; i < numElementsFirst; i++) {
testHarness.processElement(new StreamRecord<>(i));
}
// draw a snapshot
List<Integer> resultAtSnapshot = extractFromStreamRecords(testHarness.getOutput());
int beforeSnapShot = testHarness.getOutput().size();
StreamStateHandle state = testHarness.snapshotLegacy(1L, System.currentTimeMillis());
int afterSnapShot = testHarness.getOutput().size();
assertEquals("operator performed computation during snapshot", beforeSnapShot, afterSnapShot);
assertTrue(resultAtSnapshot.size() <= factor * numElementsFirst);
// inject the remaining elements - these should not influence the snapshot
for (int i = numElementsFirst; i < numElements; i++) {
testHarness.processElement(new StreamRecord<>(i));
}
testHarness.close();
// re-create the operator and restore the state
op = new AccumulatingProcessingTimeWindowOperator<>(validatingIdentityProcessFunction, identitySelector, IntSerializer.INSTANCE, IntSerializer.INSTANCE, windowSize, windowSlide);
testHarness = new OneInputStreamOperatorTestHarness<>(op);
testHarness.setup();
testHarness.restore(state);
testHarness.open();
// inject again the remaining elements
for (int i = numElementsFirst; i < numElements; i++) {
testHarness.processElement(new StreamRecord<>(i));
}
testHarness.setProcessingTime(50);
testHarness.setProcessingTime(100);
testHarness.setProcessingTime(150);
testHarness.setProcessingTime(200);
testHarness.setProcessingTime(250);
testHarness.setProcessingTime(300);
testHarness.setProcessingTime(350);
// get and verify the result
List<Integer> finalResult = new ArrayList<>(resultAtSnapshot);
List<Integer> finalPartialResult = extractFromStreamRecords(testHarness.getOutput());
finalResult.addAll(finalPartialResult);
assertEquals(factor * numElements, finalResult.size());
Collections.sort(finalResult);
for (int i = 0; i < factor * numElements; i++) {
assertEquals(i / factor, finalResult.get(i).intValue());
}
testHarness.close();
op.dispose();
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class SavepointV0Serializer method convertOperatorAndFunctionState.
/**
* This is public so that we can use it when restoring a legacy snapshot
* in {@code AbstractStreamOperatorTestHarness}.
*/
public static StreamStateHandle convertOperatorAndFunctionState(StreamTaskState streamTaskState) throws Exception {
List<StreamStateHandle> mergeStateHandles = new ArrayList<>(4);
StateHandle<Serializable> functionState = streamTaskState.getFunctionState();
StateHandle<?> operatorState = streamTaskState.getOperatorState();
if (null != functionState) {
mergeStateHandles.add(SIGNAL_1);
mergeStateHandles.add(convertStateHandle(functionState));
} else {
mergeStateHandles.add(SIGNAL_0);
}
if (null != operatorState) {
mergeStateHandles.add(convertStateHandle(operatorState));
}
return new MigrationStreamStateHandle(new MultiStreamStateHandle(mergeStateHandles));
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class SavepointV0Serializer method convertKeyedBackendState.
/**
* This is public so that we can use it when restoring a legacy snapshot
* in {@code AbstractStreamOperatorTestHarness}.
*/
public static KeyGroupsStateHandle convertKeyedBackendState(HashMap<String, KvStateSnapshot<?, ?, ?, ?>> oldKeyedState, int parallelInstanceIdx, long checkpointID) throws Exception {
if (null != oldKeyedState) {
CheckpointStreamFactory checkpointStreamFactory = new MemCheckpointStreamFactory(MAX_SIZE);
CheckpointStreamFactory.CheckpointStateOutputStream keyedStateOut = checkpointStreamFactory.createCheckpointStateOutputStream(checkpointID, 0L);
try {
final long offset = keyedStateOut.getPos();
InstantiationUtil.serializeObject(keyedStateOut, oldKeyedState);
StreamStateHandle streamStateHandle = keyedStateOut.closeAndGetHandle();
// makes IOUtils.closeQuietly(...) ignore this
keyedStateOut = null;
if (null != streamStateHandle) {
KeyGroupRangeOffsets keyGroupRangeOffsets = new KeyGroupRangeOffsets(parallelInstanceIdx, parallelInstanceIdx, new long[] { offset });
return new MigrationKeyGroupStateHandle(keyGroupRangeOffsets, streamStateHandle);
}
} finally {
IOUtils.closeQuietly(keyedStateOut);
}
}
return null;
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class SavepointV0Serializer method convertSubtaskState.
private org.apache.flink.runtime.checkpoint.SubtaskState convertSubtaskState(SubtaskState subtaskState, int parallelInstanceIdx, ClassLoader userClassLoader, long checkpointID) throws Exception {
SerializedValue<StateHandle<?>> serializedValue = subtaskState.getState();
StreamTaskStateList stateList = (StreamTaskStateList) serializedValue.deserializeValue(userClassLoader);
StreamTaskState[] streamTaskStates = stateList.getState(userClassLoader);
List<StreamStateHandle> newChainStateList = Arrays.asList(new StreamStateHandle[streamTaskStates.length]);
KeyGroupsStateHandle newKeyedState = null;
for (int chainIdx = 0; chainIdx < streamTaskStates.length; ++chainIdx) {
StreamTaskState streamTaskState = streamTaskStates[chainIdx];
if (streamTaskState == null) {
continue;
}
newChainStateList.set(chainIdx, convertOperatorAndFunctionState(streamTaskState));
HashMap<String, KvStateSnapshot<?, ?, ?, ?>> oldKeyedState = streamTaskState.getKvStates();
if (null != oldKeyedState) {
Preconditions.checkState(null == newKeyedState, "Found more than one keyed state in chain");
newKeyedState = convertKeyedBackendState(oldKeyedState, parallelInstanceIdx, checkpointID);
}
}
ChainedStateHandle<StreamStateHandle> newChainedState = new ChainedStateHandle<>(newChainStateList);
ChainedStateHandle<OperatorStateHandle> nopChain = new ChainedStateHandle<>(Arrays.asList(new OperatorStateHandle[newChainedState.getLength()]));
return new org.apache.flink.runtime.checkpoint.SubtaskState(newChainedState, nopChain, nopChain, newKeyedState, null);
}
use of org.apache.flink.runtime.state.StreamStateHandle in project flink by apache.
the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedStateWithChangingParallelism.
/**
* Tests the checkpoint restoration with changing parallelism of job vertex with partitioned
* state.
*
* @throws Exception
*/
private void testRestoreLatestCheckpointedStateWithChangingParallelism(boolean scaleOut) throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = scaleOut ? 2 : 13;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
int newParallelism2 = scaleOut ? 13 : 2;
final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// trigger the checkpoint
coord.triggerCheckpoint(timestamp, false);
assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
//vertex 1
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), true);
SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, opStateBackend, null, keyedStateBackend, keyedStateRaw);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
//vertex 2
final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesBackend = new ArrayList<>(jobVertex2.getParallelism());
final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesRaw = new ArrayList<>(jobVertex2.getParallelism());
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), true);
ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
ChainedStateHandle<OperatorStateHandle> opStateRaw = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, true);
expectedOpStatesBackend.add(opStateBackend);
expectedOpStatesRaw.add(opStateRaw);
SubtaskState checkpointStateHandles = new SubtaskState(new ChainedStateHandle<>(Collections.<StreamStateHandle>singletonList(null)), opStateBackend, opStateRaw, keyedStateBackend, keyedStateRaw);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
// rescale vertex 2
final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
tasks.put(jobVertexID1, newJobVertex1);
tasks.put(jobVertexID2, newJobVertex2);
coord.restoreLatestCheckpointedState(tasks, true, false);
// verify the restored state
verifyStateRestore(jobVertexID1, newJobVertex1, keyGroupPartitions1);
List<List<Collection<OperatorStateHandle>>> actualOpStatesBackend = new ArrayList<>(newJobVertex2.getParallelism());
List<List<Collection<OperatorStateHandle>>> actualOpStatesRaw = new ArrayList<>(newJobVertex2.getParallelism());
for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), false);
KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), true);
TaskStateHandles taskStateHandles = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskStateHandles();
ChainedStateHandle<StreamStateHandle> operatorState = taskStateHandles.getLegacyOperatorState();
List<Collection<OperatorStateHandle>> opStateBackend = taskStateHandles.getManagedOperatorState();
List<Collection<OperatorStateHandle>> opStateRaw = taskStateHandles.getRawOperatorState();
Collection<KeyGroupsStateHandle> keyGroupStateBackend = taskStateHandles.getManagedKeyedState();
Collection<KeyGroupsStateHandle> keyGroupStateRaw = taskStateHandles.getRawKeyedState();
actualOpStatesBackend.add(opStateBackend);
actualOpStatesRaw.add(opStateRaw);
assertNull(operatorState);
compareKeyedState(Collections.singletonList(originalKeyedStateBackend), keyGroupStateBackend);
compareKeyedState(Collections.singletonList(originalKeyedStateRaw), keyGroupStateRaw);
}
comparePartitionableState(expectedOpStatesBackend, actualOpStatesBackend);
comparePartitionableState(expectedOpStatesRaw, actualOpStatesRaw);
}
Aggregations