use of org.apache.flink.runtime.checkpoint.StateObjectCollection in project flink by apache.
the class AbstractStreamOperatorTestHarness method repartitionOperatorState.
/**
* Returns the reshaped the state handles to include only those key-group states in the local
* key-group range and the operator states that would be assigned to the local subtask.
*/
public static OperatorSubtaskState repartitionOperatorState(final OperatorSubtaskState operatorStateHandles, final int numKeyGroups, final int oldParallelism, final int newParallelism, final int subtaskIndex) {
Preconditions.checkNotNull(operatorStateHandles, "the previous operatorStateHandles should not be null.");
// create a new OperatorStateHandles that only contains the state for our key-groups
List<KeyGroupRange> keyGroupPartitions = StateAssignmentOperation.createKeyGroupPartitions(numKeyGroups, newParallelism);
KeyGroupRange localKeyGroupRange = keyGroupPartitions.get(subtaskIndex);
List<KeyedStateHandle> localManagedKeyGroupState = new ArrayList<>();
StateAssignmentOperation.extractIntersectingState(operatorStateHandles.getManagedKeyedState(), localKeyGroupRange, localManagedKeyGroupState);
List<KeyedStateHandle> localRawKeyGroupState = new ArrayList<>();
StateAssignmentOperation.extractIntersectingState(operatorStateHandles.getRawKeyedState(), localKeyGroupRange, localRawKeyGroupState);
StateObjectCollection<OperatorStateHandle> managedOperatorStates = operatorStateHandles.getManagedOperatorState();
Collection<OperatorStateHandle> localManagedOperatorState;
if (!managedOperatorStates.isEmpty()) {
List<List<OperatorStateHandle>> managedOperatorState = managedOperatorStates.stream().map(Collections::singletonList).collect(Collectors.toList());
localManagedOperatorState = operatorStateRepartitioner.repartitionState(managedOperatorState, oldParallelism, newParallelism).get(subtaskIndex);
} else {
localManagedOperatorState = Collections.emptyList();
}
StateObjectCollection<OperatorStateHandle> rawOperatorStates = operatorStateHandles.getRawOperatorState();
Collection<OperatorStateHandle> localRawOperatorState;
if (!rawOperatorStates.isEmpty()) {
List<List<OperatorStateHandle>> rawOperatorState = rawOperatorStates.stream().map(Collections::singletonList).collect(Collectors.toList());
localRawOperatorState = operatorStateRepartitioner.repartitionState(rawOperatorState, oldParallelism, newParallelism).get(subtaskIndex);
} else {
localRawOperatorState = Collections.emptyList();
}
return OperatorSubtaskState.builder().setManagedOperatorState(new StateObjectCollection<>(nullToEmptyCollection(localManagedOperatorState))).setRawOperatorState(new StateObjectCollection<>(nullToEmptyCollection(localRawOperatorState))).setManagedKeyedState(new StateObjectCollection<>(nullToEmptyCollection(localManagedKeyGroupState))).setRawKeyedState(new StateObjectCollection<>(nullToEmptyCollection(localRawKeyGroupState))).build();
}
use of org.apache.flink.runtime.checkpoint.StateObjectCollection in project flink by apache.
the class AbstractStreamOperatorTestHarness method repackageState.
/**
* Takes the different {@link OperatorSubtaskState} created by calling {@link #snapshot(long,
* long)} on different instances of {@link AbstractStreamOperatorTestHarness} (each one
* representing one subtask) and repacks them into a single {@link OperatorSubtaskState} so that
* the parallelism of the test can change arbitrarily (i.e. be able to scale both up and down).
*
* <p>After repacking the partial states, remember to use {@link
* #repartitionOperatorState(OperatorSubtaskState, int, int, int, int)} to reshape the state
* handles to include only those key-group states in the local key-group range and the operator
* states that would be assigned to the local subtask. Bear in mind that for parallelism greater
* than one, you have to use the constructor {@link
* #AbstractStreamOperatorTestHarness(StreamOperator, int, int, int)}.
*
* <p><b>NOTE: </b> each of the {@code handles} in the argument list is assumed to be from a
* single task of a single operator (i.e. chain length of one).
*
* <p>For an example of how to use it, have a look at {@link
* AbstractStreamOperatorTest#testStateAndTimerStateShufflingScalingDown()}.
*
* @param handles the different states to be merged.
* @return the resulting state, or {@code null} if no partial states are specified.
*/
public static OperatorSubtaskState repackageState(OperatorSubtaskState... handles) throws Exception {
if (handles.length < 1) {
return null;
} else if (handles.length == 1) {
return handles[0];
}
List<OperatorStateHandle> mergedManagedOperatorState = new ArrayList<>(handles.length);
List<OperatorStateHandle> mergedRawOperatorState = new ArrayList<>(handles.length);
List<KeyedStateHandle> mergedManagedKeyedState = new ArrayList<>(handles.length);
List<KeyedStateHandle> mergedRawKeyedState = new ArrayList<>(handles.length);
for (OperatorSubtaskState handle : handles) {
Collection<OperatorStateHandle> managedOperatorState = handle.getManagedOperatorState();
Collection<OperatorStateHandle> rawOperatorState = handle.getRawOperatorState();
Collection<KeyedStateHandle> managedKeyedState = handle.getManagedKeyedState();
Collection<KeyedStateHandle> rawKeyedState = handle.getRawKeyedState();
mergedManagedOperatorState.addAll(managedOperatorState);
mergedRawOperatorState.addAll(rawOperatorState);
mergedManagedKeyedState.addAll(managedKeyedState);
mergedRawKeyedState.addAll(rawKeyedState);
}
return OperatorSubtaskState.builder().setManagedOperatorState(new StateObjectCollection<>(mergedManagedOperatorState)).setRawOperatorState(new StateObjectCollection<>(mergedRawOperatorState)).setManagedKeyedState(new StateObjectCollection<>(mergedManagedKeyedState)).setRawKeyedState(new StateObjectCollection<>(mergedRawKeyedState)).build();
}
use of org.apache.flink.runtime.checkpoint.StateObjectCollection in project flink by apache.
the class OperatorSnapshotUtil method readStateHandle.
public static OperatorSubtaskState readStateHandle(String path) throws IOException, ClassNotFoundException {
FileInputStream in = new FileInputStream(path);
try (DataInputStream dis = new DataInputStream(in)) {
// required for backwards compatibility.
final int v = dis.readInt();
// still required for compatibility to consume the bytes.
MetadataV3Serializer.deserializeStreamStateHandle(dis);
List<OperatorStateHandle> rawOperatorState = null;
int numRawOperatorStates = dis.readInt();
if (numRawOperatorStates >= 0) {
rawOperatorState = new ArrayList<>();
for (int i = 0; i < numRawOperatorStates; i++) {
OperatorStateHandle operatorState = MetadataV3Serializer.deserializeOperatorStateHandleUtil(dis);
rawOperatorState.add(operatorState);
}
}
List<OperatorStateHandle> managedOperatorState = null;
int numManagedOperatorStates = dis.readInt();
if (numManagedOperatorStates >= 0) {
managedOperatorState = new ArrayList<>();
for (int i = 0; i < numManagedOperatorStates; i++) {
OperatorStateHandle operatorState = MetadataV3Serializer.deserializeOperatorStateHandleUtil(dis);
managedOperatorState.add(operatorState);
}
}
List<KeyedStateHandle> rawKeyedState = null;
int numRawKeyedStates = dis.readInt();
if (numRawKeyedStates >= 0) {
rawKeyedState = new ArrayList<>();
for (int i = 0; i < numRawKeyedStates; i++) {
KeyedStateHandle keyedState = MetadataV3Serializer.deserializeKeyedStateHandleUtil(dis);
rawKeyedState.add(keyedState);
}
}
List<KeyedStateHandle> managedKeyedState = null;
int numManagedKeyedStates = dis.readInt();
if (numManagedKeyedStates >= 0) {
managedKeyedState = new ArrayList<>();
for (int i = 0; i < numManagedKeyedStates; i++) {
KeyedStateHandle keyedState = MetadataV3Serializer.deserializeKeyedStateHandleUtil(dis);
managedKeyedState.add(keyedState);
}
}
final StateObjectCollection<InputChannelStateHandle> inputChannelStateHandles = v == MetadataV3Serializer.VERSION ? MetadataV3Serializer.deserializeInputChannelStateHandle(dis) : StateObjectCollection.empty();
final StateObjectCollection<ResultSubpartitionStateHandle> resultSubpartitionStateHandles = v == MetadataV3Serializer.VERSION ? MetadataV3Serializer.INSTANCE.deserializeResultSubpartitionStateHandle(dis) : StateObjectCollection.empty();
return OperatorSubtaskState.builder().setManagedOperatorState(new StateObjectCollection<>(managedOperatorState)).setRawOperatorState(new StateObjectCollection<>(rawOperatorState)).setManagedKeyedState(new StateObjectCollection<>(managedKeyedState)).setRawKeyedState(new StateObjectCollection<>(rawKeyedState)).setInputChannelState(inputChannelStateHandles).setResultSubpartitionState(resultSubpartitionStateHandles).build();
}
}
use of org.apache.flink.runtime.checkpoint.StateObjectCollection in project flink by apache.
the class TaskStateManagerImplTest method testStateReportingAndRetrieving.
/**
* Test reporting and retrieving prioritized local and remote state.
*/
@Test
public void testStateReportingAndRetrieving() {
JobID jobID = new JobID();
ExecutionAttemptID executionAttemptID = new ExecutionAttemptID();
TestCheckpointResponder testCheckpointResponder = new TestCheckpointResponder();
TestTaskLocalStateStore testTaskLocalStateStore = new TestTaskLocalStateStore();
InMemoryStateChangelogStorage changelogStorage = new InMemoryStateChangelogStorage();
TaskStateManager taskStateManager = taskStateManager(jobID, executionAttemptID, testCheckpointResponder, null, testTaskLocalStateStore, changelogStorage);
// ---------------------------------------- test reporting
// -----------------------------------------
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(74L, 11L);
CheckpointMetrics checkpointMetrics = new CheckpointMetrics();
TaskStateSnapshot jmTaskStateSnapshot = new TaskStateSnapshot();
OperatorID operatorID_1 = new OperatorID(1L, 1L);
OperatorID operatorID_2 = new OperatorID(2L, 2L);
OperatorID operatorID_3 = new OperatorID(3L, 3L);
Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_1).isRestored());
Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_2).isRestored());
Assert.assertFalse(taskStateManager.prioritizedOperatorState(operatorID_3).isRestored());
KeyGroupRange keyGroupRange = new KeyGroupRange(0, 1);
// Remote state of operator 1 has only managed keyed state.
OperatorSubtaskState jmOperatorSubtaskState_1 = OperatorSubtaskState.builder().setManagedKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
// Remote state of operator 1 has only raw keyed state.
OperatorSubtaskState jmOperatorSubtaskState_2 = OperatorSubtaskState.builder().setRawKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
jmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_1, jmOperatorSubtaskState_1);
jmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_2, jmOperatorSubtaskState_2);
TaskStateSnapshot tmTaskStateSnapshot = new TaskStateSnapshot();
// Only operator 1 has a local alternative for the managed keyed state.
OperatorSubtaskState tmOperatorSubtaskState_1 = OperatorSubtaskState.builder().setManagedKeyedState(StateHandleDummyUtil.createNewKeyedStateHandle(keyGroupRange)).build();
tmTaskStateSnapshot.putSubtaskStateByOperatorID(operatorID_1, tmOperatorSubtaskState_1);
taskStateManager.reportTaskStateSnapshots(checkpointMetaData, checkpointMetrics, jmTaskStateSnapshot, tmTaskStateSnapshot);
TestCheckpointResponder.AcknowledgeReport acknowledgeReport = testCheckpointResponder.getAcknowledgeReports().get(0);
// checks that the checkpoint responder and the local state store received state as
// expected.
Assert.assertEquals(checkpointMetaData.getCheckpointId(), acknowledgeReport.getCheckpointId());
Assert.assertEquals(checkpointMetrics, acknowledgeReport.getCheckpointMetrics());
Assert.assertEquals(executionAttemptID, acknowledgeReport.getExecutionAttemptID());
Assert.assertEquals(jobID, acknowledgeReport.getJobID());
Assert.assertEquals(jmTaskStateSnapshot, acknowledgeReport.getSubtaskState());
Assert.assertEquals(tmTaskStateSnapshot, testTaskLocalStateStore.retrieveLocalState(checkpointMetaData.getCheckpointId()));
// -------------------------------------- test prio retrieving
// ---------------------------------------
JobManagerTaskRestore taskRestore = new JobManagerTaskRestore(checkpointMetaData.getCheckpointId(), acknowledgeReport.getSubtaskState());
taskStateManager = taskStateManager(jobID, executionAttemptID, testCheckpointResponder, taskRestore, testTaskLocalStateStore, changelogStorage);
// this has remote AND local managed keyed state.
PrioritizedOperatorSubtaskState prioritized_1 = taskStateManager.prioritizedOperatorState(operatorID_1);
// this has only remote raw keyed state.
PrioritizedOperatorSubtaskState prioritized_2 = taskStateManager.prioritizedOperatorState(operatorID_2);
// not restored.
PrioritizedOperatorSubtaskState prioritized_3 = taskStateManager.prioritizedOperatorState(operatorID_3);
Assert.assertTrue(prioritized_1.isRestored());
Assert.assertTrue(prioritized_2.isRestored());
Assert.assertTrue(prioritized_3.isRestored());
Assert.assertTrue(taskStateManager.prioritizedOperatorState(new OperatorID()).isRestored());
// checks for operator 1.
Iterator<StateObjectCollection<KeyedStateHandle>> prioritizedManagedKeyedState_1 = prioritized_1.getPrioritizedManagedKeyedState().iterator();
Assert.assertTrue(prioritizedManagedKeyedState_1.hasNext());
StateObjectCollection<KeyedStateHandle> current = prioritizedManagedKeyedState_1.next();
KeyedStateHandle keyedStateHandleExp = tmOperatorSubtaskState_1.getManagedKeyedState().iterator().next();
KeyedStateHandle keyedStateHandleAct = current.iterator().next();
Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
Assert.assertTrue(prioritizedManagedKeyedState_1.hasNext());
current = prioritizedManagedKeyedState_1.next();
keyedStateHandleExp = jmOperatorSubtaskState_1.getManagedKeyedState().iterator().next();
keyedStateHandleAct = current.iterator().next();
Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
Assert.assertFalse(prioritizedManagedKeyedState_1.hasNext());
// checks for operator 2.
Iterator<StateObjectCollection<KeyedStateHandle>> prioritizedRawKeyedState_2 = prioritized_2.getPrioritizedRawKeyedState().iterator();
Assert.assertTrue(prioritizedRawKeyedState_2.hasNext());
current = prioritizedRawKeyedState_2.next();
keyedStateHandleExp = jmOperatorSubtaskState_2.getRawKeyedState().iterator().next();
keyedStateHandleAct = current.iterator().next();
Assert.assertTrue(keyedStateHandleExp == keyedStateHandleAct);
Assert.assertFalse(prioritizedRawKeyedState_2.hasNext());
}
use of org.apache.flink.runtime.checkpoint.StateObjectCollection in project flink by apache.
the class BackendRestorerProcedureTest method testExceptionThrownIfAllRestoresFailed.
/**
* Tests if there is an exception if all restore attempts are exhausted and failed.
*/
@Test
public void testExceptionThrownIfAllRestoresFailed() throws Exception {
CloseableRegistry closeableRegistry = new CloseableRegistry();
OperatorStateHandle firstFailHandle = mock(OperatorStateHandle.class);
OperatorStateHandle secondFailHandle = mock(OperatorStateHandle.class);
OperatorStateHandle thirdFailHandle = mock(OperatorStateHandle.class);
List<StateObjectCollection<OperatorStateHandle>> sortedRestoreOptions = Arrays.asList(new StateObjectCollection<>(Collections.singletonList(firstFailHandle)), new StateObjectCollection<>(Collections.singletonList(secondFailHandle)), new StateObjectCollection<>(Collections.singletonList(thirdFailHandle)));
BackendRestorerProcedure<OperatorStateBackend, OperatorStateHandle> restorerProcedure = new BackendRestorerProcedure<>(backendSupplier, closeableRegistry, "test op state backend");
try {
restorerProcedure.createAndRestore(sortedRestoreOptions);
Assert.fail();
} catch (Exception ignore) {
}
verify(firstFailHandle).openInputStream();
verify(secondFailHandle).openInputStream();
verify(thirdFailHandle).openInputStream();
}
Aggregations