use of org.apache.flink.runtime.jobgraph.OperatorInstanceID in project flink by apache.
the class StateAssignmentOperationTest method verifyAndCollectStateInfo.
// ------------------------------------------------------------------------
/**
* Verify that after repartition states, state of different modes works as expected and collect
* the information of state-name -> how many operator stat handles would be used for new
* sub-tasks to initialize in total.
*/
private void verifyAndCollectStateInfo(OperatorState operatorState, OperatorID operatorID, int oldParallelism, int newParallelism, Map<String, Integer> stateInfoCounts) {
final Map<OperatorInstanceID, List<OperatorStateHandle>> newManagedOperatorStates = new HashMap<>();
StateAssignmentOperation.reDistributePartitionableStates(Collections.singletonMap(operatorID, operatorState), newParallelism, OperatorSubtaskState::getManagedOperatorState, RoundRobinOperatorStateRepartitioner.INSTANCE, newManagedOperatorStates);
// Verify the repartitioned managed operator states per sub-task.
for (List<OperatorStateHandle> operatorStateHandles : newManagedOperatorStates.values()) {
final EnumMap<OperatorStateHandle.Mode, Map<String, Integer>> stateModeOffsets = new EnumMap<>(OperatorStateHandle.Mode.class);
for (OperatorStateHandle.Mode mode : OperatorStateHandle.Mode.values()) {
stateModeOffsets.put(mode, new HashMap<>());
}
for (OperatorStateHandle operatorStateHandle : operatorStateHandles) {
for (Map.Entry<String, OperatorStateHandle.StateMetaInfo> stateNameToMetaInfo : operatorStateHandle.getStateNameToPartitionOffsets().entrySet()) {
String stateName = stateNameToMetaInfo.getKey();
stateInfoCounts.merge(stateName, 1, (count, inc) -> count + inc);
OperatorStateHandle.StateMetaInfo stateMetaInfo = stateNameToMetaInfo.getValue();
stateModeOffsets.get(stateMetaInfo.getDistributionMode()).merge(stateName, stateMetaInfo.getOffsets().length, (count, inc) -> count + inc);
}
}
for (Map.Entry<OperatorStateHandle.Mode, Map<String, Integer>> modeMapEntry : stateModeOffsets.entrySet()) {
OperatorStateHandle.Mode mode = modeMapEntry.getKey();
Map<String, Integer> stateOffsets = modeMapEntry.getValue();
if (OperatorStateHandle.Mode.SPLIT_DISTRIBUTE.equals(mode)) {
if (oldParallelism < newParallelism) {
// SPLIT_DISTRIBUTE: when rescale up, split the state and re-distribute it
// -> each one will go to one task
stateOffsets.values().forEach(length -> Assert.assertEquals(1, (int) length));
} else {
// SPLIT_DISTRIBUTE: when rescale down to 1 or not rescale, not
// re-distribute them.
stateOffsets.values().forEach(length -> Assert.assertEquals(2, (int) length));
}
} else if (OperatorStateHandle.Mode.UNION.equals(mode)) {
// UNION: all to all
stateOffsets.values().forEach(length -> Assert.assertEquals(2, (int) length));
} else {
// BROADCAST: so all to all
stateOffsets.values().forEach(length -> Assert.assertEquals(3, (int) length));
}
}
}
}
use of org.apache.flink.runtime.jobgraph.OperatorInstanceID in project flink by apache.
the class StateAssignmentOperation method assignNonFinishedStateToTask.
private void assignNonFinishedStateToTask(TaskStateAssignment assignment, List<OperatorIDPair> operatorIDs, int subTaskIndex, Execution currentExecutionAttempt) {
TaskStateSnapshot taskState = new TaskStateSnapshot(operatorIDs.size(), false);
for (OperatorIDPair operatorID : operatorIDs) {
OperatorInstanceID instanceID = OperatorInstanceID.of(subTaskIndex, operatorID.getGeneratedOperatorID());
OperatorSubtaskState operatorSubtaskState = assignment.getSubtaskState(instanceID);
taskState.putSubtaskStateByOperatorID(operatorID.getGeneratedOperatorID(), operatorSubtaskState);
}
JobManagerTaskRestore taskRestore = new JobManagerTaskRestore(restoreCheckpointId, taskState);
currentExecutionAttempt.setInitialState(taskRestore);
}
use of org.apache.flink.runtime.jobgraph.OperatorInstanceID in project flink by apache.
the class StateAssignmentOperation method reDistributeKeyedStates.
private void reDistributeKeyedStates(List<KeyGroupRange> keyGroupPartitions, TaskStateAssignment stateAssignment) {
stateAssignment.oldState.forEach((operatorID, operatorState) -> {
for (int subTaskIndex = 0; subTaskIndex < stateAssignment.newParallelism; subTaskIndex++) {
OperatorInstanceID instanceID = OperatorInstanceID.of(subTaskIndex, operatorID);
Tuple2<List<KeyedStateHandle>, List<KeyedStateHandle>> subKeyedStates = reAssignSubKeyedStates(operatorState, keyGroupPartitions, subTaskIndex, stateAssignment.newParallelism, operatorState.getParallelism());
stateAssignment.subManagedKeyedState.put(instanceID, subKeyedStates.f0);
stateAssignment.subRawKeyedState.put(instanceID, subKeyedStates.f1);
}
});
}
use of org.apache.flink.runtime.jobgraph.OperatorInstanceID in project flink by apache.
the class StateAssignmentOperation method reDistributeInputChannelStates.
public void reDistributeInputChannelStates(TaskStateAssignment stateAssignment) {
if (!stateAssignment.hasInputState) {
return;
}
checkForUnsupportedToplogyChanges(stateAssignment.oldState, OperatorSubtaskState::getInputChannelState, stateAssignment.inputOperatorID);
final ExecutionJobVertex executionJobVertex = stateAssignment.executionJobVertex;
final List<IntermediateResult> inputs = executionJobVertex.getInputs();
// check for rescaling: no rescaling = simple reassignment
final OperatorState inputState = stateAssignment.oldState.get(stateAssignment.inputOperatorID);
final List<List<InputChannelStateHandle>> inputOperatorState = splitBySubtasks(inputState, OperatorSubtaskState::getInputChannelState);
if (inputState.getParallelism() == executionJobVertex.getParallelism()) {
stateAssignment.inputChannelStates.putAll(toInstanceMap(stateAssignment.inputOperatorID, inputOperatorState));
return;
}
// subtask 0 + 2
for (int gateIndex = 0; gateIndex < inputs.size(); gateIndex++) {
final RescaleMappings mapping = stateAssignment.getInputMapping(gateIndex).getRescaleMappings();
final List<List<InputChannelStateHandle>> gateState = inputs.size() == 1 ? inputOperatorState : getPartitionState(inputOperatorState, InputChannelInfo::getGateIdx, gateIndex);
final MappingBasedRepartitioner<InputChannelStateHandle> repartitioner = new MappingBasedRepartitioner(mapping);
final Map<OperatorInstanceID, List<InputChannelStateHandle>> repartitioned = applyRepartitioner(stateAssignment.inputOperatorID, repartitioner, gateState, inputOperatorState.size(), stateAssignment.newParallelism);
addToSubtasks(stateAssignment.inputChannelStates, repartitioned);
}
}
use of org.apache.flink.runtime.jobgraph.OperatorInstanceID in project flink by apache.
the class StateAssignmentOperation method reDistributeResultSubpartitionStates.
public <I, T extends AbstractChannelStateHandle<I>> void reDistributeResultSubpartitionStates(TaskStateAssignment assignment) {
if (!assignment.hasOutputState) {
return;
}
checkForUnsupportedToplogyChanges(assignment.oldState, OperatorSubtaskState::getResultSubpartitionState, assignment.outputOperatorID);
final OperatorState outputState = assignment.oldState.get(assignment.outputOperatorID);
final List<List<ResultSubpartitionStateHandle>> outputOperatorState = splitBySubtasks(outputState, OperatorSubtaskState::getResultSubpartitionState);
final ExecutionJobVertex executionJobVertex = assignment.executionJobVertex;
final List<IntermediateDataSet> outputs = executionJobVertex.getJobVertex().getProducedDataSets();
if (outputState.getParallelism() == executionJobVertex.getParallelism()) {
assignment.resultSubpartitionStates.putAll(toInstanceMap(assignment.outputOperatorID, outputOperatorState));
return;
}
// according to output mapping.
for (int partitionIndex = 0; partitionIndex < outputs.size(); partitionIndex++) {
final List<List<ResultSubpartitionStateHandle>> partitionState = outputs.size() == 1 ? outputOperatorState : getPartitionState(outputOperatorState, ResultSubpartitionInfo::getPartitionIdx, partitionIndex);
final MappingBasedRepartitioner<ResultSubpartitionStateHandle> repartitioner = new MappingBasedRepartitioner<>(assignment.getOutputMapping(partitionIndex).getRescaleMappings());
final Map<OperatorInstanceID, List<ResultSubpartitionStateHandle>> repartitioned = applyRepartitioner(assignment.outputOperatorID, repartitioner, partitionState, outputOperatorState.size(), executionJobVertex.getParallelism());
addToSubtasks(assignment.resultSubpartitionStates, repartitioned);
}
}
Aggregations