Search in sources :

Example 6 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class SavepointV1Serializer method deserializeSubtaskState.

private static SubtaskState deserializeSubtaskState(DataInputStream dis) throws IOException {
    // Duration field has been removed from SubtaskState
    long ignoredDuration = dis.readLong();
    int len = dis.readInt();
    List<StreamStateHandle> nonPartitionableState = new ArrayList<>(len);
    for (int i = 0; i < len; ++i) {
        StreamStateHandle streamStateHandle = deserializeStreamStateHandle(dis);
        nonPartitionableState.add(streamStateHandle);
    }
    len = dis.readInt();
    List<OperatorStateHandle> operatorStateBackend = new ArrayList<>(len);
    for (int i = 0; i < len; ++i) {
        OperatorStateHandle streamStateHandle = deserializeOperatorStateHandle(dis);
        operatorStateBackend.add(streamStateHandle);
    }
    len = dis.readInt();
    List<OperatorStateHandle> operatorStateStream = new ArrayList<>(len);
    for (int i = 0; i < len; ++i) {
        OperatorStateHandle streamStateHandle = deserializeOperatorStateHandle(dis);
        operatorStateStream.add(streamStateHandle);
    }
    KeyGroupsStateHandle keyedStateBackend = deserializeKeyGroupStateHandle(dis);
    KeyGroupsStateHandle keyedStateStream = deserializeKeyGroupStateHandle(dis);
    ChainedStateHandle<StreamStateHandle> nonPartitionableStateChain = new ChainedStateHandle<>(nonPartitionableState);
    ChainedStateHandle<OperatorStateHandle> operatorStateBackendChain = new ChainedStateHandle<>(operatorStateBackend);
    ChainedStateHandle<OperatorStateHandle> operatorStateStreamChain = new ChainedStateHandle<>(operatorStateStream);
    return new SubtaskState(nonPartitionableStateChain, operatorStateBackendChain, operatorStateStreamChain, keyedStateBackend, keyedStateStream);
}
Also used : StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) ArrayList(java.util.ArrayList) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle)

Example 7 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class SavepointV1Serializer method serializeSubtaskState.

private static void serializeSubtaskState(SubtaskState subtaskState, DataOutputStream dos) throws IOException {
    dos.writeLong(-1);
    ChainedStateHandle<StreamStateHandle> nonPartitionableState = subtaskState.getLegacyOperatorState();
    int len = nonPartitionableState != null ? nonPartitionableState.getLength() : 0;
    dos.writeInt(len);
    for (int i = 0; i < len; ++i) {
        StreamStateHandle stateHandle = nonPartitionableState.get(i);
        serializeStreamStateHandle(stateHandle, dos);
    }
    ChainedStateHandle<OperatorStateHandle> operatorStateBackend = subtaskState.getManagedOperatorState();
    len = operatorStateBackend != null ? operatorStateBackend.getLength() : 0;
    dos.writeInt(len);
    for (int i = 0; i < len; ++i) {
        OperatorStateHandle stateHandle = operatorStateBackend.get(i);
        serializeOperatorStateHandle(stateHandle, dos);
    }
    ChainedStateHandle<OperatorStateHandle> operatorStateFromStream = subtaskState.getRawOperatorState();
    len = operatorStateFromStream != null ? operatorStateFromStream.getLength() : 0;
    dos.writeInt(len);
    for (int i = 0; i < len; ++i) {
        OperatorStateHandle stateHandle = operatorStateFromStream.get(i);
        serializeOperatorStateHandle(stateHandle, dos);
    }
    KeyGroupsStateHandle keyedStateBackend = subtaskState.getManagedKeyedState();
    serializeKeyGroupStateHandle(keyedStateBackend, dos);
    KeyGroupsStateHandle keyedStateStream = subtaskState.getRawKeyedState();
    serializeKeyGroupStateHandle(keyedStateStream, dos);
}
Also used : StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle)

Example 8 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class StateAssignmentOperation method assignTaskStatesToOperatorInstances.

private static void assignTaskStatesToOperatorInstances(TaskState taskState, ExecutionJobVertex executionJobVertex) {
    final int oldParallelism = taskState.getParallelism();
    final int newParallelism = executionJobVertex.getParallelism();
    List<KeyGroupRange> keyGroupPartitions = createKeyGroupPartitions(executionJobVertex.getMaxParallelism(), newParallelism);
    final int chainLength = taskState.getChainLength();
    // operator chain idx -> list of the stored op states from all parallel instances for this chain idx
    @SuppressWarnings("unchecked") List<OperatorStateHandle>[] parallelOpStatesBackend = new List[chainLength];
    @SuppressWarnings("unchecked") List<OperatorStateHandle>[] parallelOpStatesStream = new List[chainLength];
    List<KeyGroupsStateHandle> parallelKeyedStatesBackend = new ArrayList<>(oldParallelism);
    List<KeyGroupsStateHandle> parallelKeyedStateStream = new ArrayList<>(oldParallelism);
    for (int p = 0; p < oldParallelism; ++p) {
        SubtaskState subtaskState = taskState.getState(p);
        if (null != subtaskState) {
            collectParallelStatesByChainOperator(parallelOpStatesBackend, subtaskState.getManagedOperatorState());
            collectParallelStatesByChainOperator(parallelOpStatesStream, subtaskState.getRawOperatorState());
            KeyGroupsStateHandle keyedStateBackend = subtaskState.getManagedKeyedState();
            if (null != keyedStateBackend) {
                parallelKeyedStatesBackend.add(keyedStateBackend);
            }
            KeyGroupsStateHandle keyedStateStream = subtaskState.getRawKeyedState();
            if (null != keyedStateStream) {
                parallelKeyedStateStream.add(keyedStateStream);
            }
        }
    }
    // operator chain index -> lists with collected states (one collection for each parallel subtasks)
    @SuppressWarnings("unchecked") List<Collection<OperatorStateHandle>>[] partitionedParallelStatesBackend = new List[chainLength];
    @SuppressWarnings("unchecked") List<Collection<OperatorStateHandle>>[] partitionedParallelStatesStream = new List[chainLength];
    //TODO here we can employ different redistribution strategies for state, e.g. union state.
    // For now we only offer round robin as the default.
    OperatorStateRepartitioner opStateRepartitioner = RoundRobinOperatorStateRepartitioner.INSTANCE;
    for (int chainIdx = 0; chainIdx < chainLength; ++chainIdx) {
        List<OperatorStateHandle> chainOpParallelStatesBackend = parallelOpStatesBackend[chainIdx];
        List<OperatorStateHandle> chainOpParallelStatesStream = parallelOpStatesStream[chainIdx];
        partitionedParallelStatesBackend[chainIdx] = applyRepartitioner(opStateRepartitioner, chainOpParallelStatesBackend, oldParallelism, newParallelism);
        partitionedParallelStatesStream[chainIdx] = applyRepartitioner(opStateRepartitioner, chainOpParallelStatesStream, oldParallelism, newParallelism);
    }
    for (int subTaskIdx = 0; subTaskIdx < newParallelism; ++subTaskIdx) {
        // non-partitioned state
        ChainedStateHandle<StreamStateHandle> nonPartitionableState = null;
        if (oldParallelism == newParallelism) {
            if (taskState.getState(subTaskIdx) != null) {
                nonPartitionableState = taskState.getState(subTaskIdx).getLegacyOperatorState();
            }
        }
        // partitionable state
        @SuppressWarnings("unchecked") Collection<OperatorStateHandle>[] iab = new Collection[chainLength];
        @SuppressWarnings("unchecked") Collection<OperatorStateHandle>[] ias = new Collection[chainLength];
        List<Collection<OperatorStateHandle>> operatorStateFromBackend = Arrays.asList(iab);
        List<Collection<OperatorStateHandle>> operatorStateFromStream = Arrays.asList(ias);
        for (int chainIdx = 0; chainIdx < partitionedParallelStatesBackend.length; ++chainIdx) {
            List<Collection<OperatorStateHandle>> redistributedOpStateBackend = partitionedParallelStatesBackend[chainIdx];
            List<Collection<OperatorStateHandle>> redistributedOpStateStream = partitionedParallelStatesStream[chainIdx];
            if (redistributedOpStateBackend != null) {
                operatorStateFromBackend.set(chainIdx, redistributedOpStateBackend.get(subTaskIdx));
            }
            if (redistributedOpStateStream != null) {
                operatorStateFromStream.set(chainIdx, redistributedOpStateStream.get(subTaskIdx));
            }
        }
        Execution currentExecutionAttempt = executionJobVertex.getTaskVertices()[subTaskIdx].getCurrentExecutionAttempt();
        List<KeyGroupsStateHandle> newKeyedStatesBackend;
        List<KeyGroupsStateHandle> newKeyedStateStream;
        if (oldParallelism == newParallelism) {
            SubtaskState subtaskState = taskState.getState(subTaskIdx);
            if (subtaskState != null) {
                KeyGroupsStateHandle oldKeyedStatesBackend = subtaskState.getManagedKeyedState();
                KeyGroupsStateHandle oldKeyedStatesStream = subtaskState.getRawKeyedState();
                newKeyedStatesBackend = oldKeyedStatesBackend != null ? Collections.singletonList(oldKeyedStatesBackend) : null;
                newKeyedStateStream = oldKeyedStatesStream != null ? Collections.singletonList(oldKeyedStatesStream) : null;
            } else {
                newKeyedStatesBackend = null;
                newKeyedStateStream = null;
            }
        } else {
            KeyGroupRange subtaskKeyGroupIds = keyGroupPartitions.get(subTaskIdx);
            newKeyedStatesBackend = getKeyGroupsStateHandles(parallelKeyedStatesBackend, subtaskKeyGroupIds);
            newKeyedStateStream = getKeyGroupsStateHandles(parallelKeyedStateStream, subtaskKeyGroupIds);
        }
        TaskStateHandles taskStateHandles = new TaskStateHandles(nonPartitionableState, operatorStateFromBackend, operatorStateFromStream, newKeyedStatesBackend, newKeyedStateStream);
        currentExecutionAttempt.setInitialState(taskStateHandles);
    }
}
Also used : KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) Execution(org.apache.flink.runtime.executiongraph.Execution) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle)

Example 9 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class StateAssignmentOperation method applyRepartitioner.

private static List<Collection<OperatorStateHandle>> applyRepartitioner(OperatorStateRepartitioner opStateRepartitioner, List<OperatorStateHandle> chainOpParallelStates, int oldParallelism, int newParallelism) {
    if (chainOpParallelStates == null) {
        return null;
    }
    //We only redistribute if the parallelism of the operator changed from previous executions
    if (newParallelism != oldParallelism) {
        return opStateRepartitioner.repartitionState(chainOpParallelStates, newParallelism);
    } else {
        List<Collection<OperatorStateHandle>> repackStream = new ArrayList<>(newParallelism);
        for (OperatorStateHandle operatorStateHandle : chainOpParallelStates) {
            Map<String, OperatorStateHandle.StateMetaInfo> partitionOffsets = operatorStateHandle.getStateNameToPartitionOffsets();
            for (OperatorStateHandle.StateMetaInfo metaInfo : partitionOffsets.values()) {
                // if we find any broadcast state, we cannot take the shortcut and need to go through repartitioning
                if (OperatorStateHandle.Mode.BROADCAST.equals(metaInfo.getDistributionMode())) {
                    return opStateRepartitioner.repartitionState(chainOpParallelStates, newParallelism);
                }
            }
            repackStream.add(Collections.singletonList(operatorStateHandle));
        }
        return repackStream;
    }
}
Also used : ArrayList(java.util.ArrayList) Collection(java.util.Collection) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle)

Example 10 with OperatorStateHandle

use of org.apache.flink.runtime.state.OperatorStateHandle in project flink by apache.

the class StateAssignmentOperation method collectParallelStatesByChainOperator.

/**
	 * @param chainParallelOpStates array = chain ops, array[idx] = parallel states for this chain op.
	 * @param chainOpState the operator chain
	 */
private static void collectParallelStatesByChainOperator(List<OperatorStateHandle>[] chainParallelOpStates, ChainedStateHandle<OperatorStateHandle> chainOpState) {
    if (null != chainOpState) {
        int chainLength = chainOpState.getLength();
        Preconditions.checkState(chainLength >= chainParallelOpStates.length, "Found more states than operators in the chain. Chain length: " + chainLength + ", States: " + chainParallelOpStates.length);
        for (int chainIdx = 0; chainIdx < chainParallelOpStates.length; ++chainIdx) {
            OperatorStateHandle operatorState = chainOpState.get(chainIdx);
            if (null != operatorState) {
                List<OperatorStateHandle> opParallelStatesForOneChainOp = chainParallelOpStates[chainIdx];
                if (null == opParallelStatesForOneChainOp) {
                    opParallelStatesForOneChainOp = new ArrayList<>();
                    chainParallelOpStates[chainIdx] = opParallelStatesForOneChainOp;
                }
                opParallelStatesForOneChainOp.add(operatorState);
            }
        }
    }
}
Also used : OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle)

Aggregations

OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)30 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)20 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)18 ArrayList (java.util.ArrayList)15 HashMap (java.util.HashMap)11 Collection (java.util.Collection)8 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)8 Test (org.junit.Test)8 JobID (org.apache.flink.api.common.JobID)7 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)7 List (java.util.List)6 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)6 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)6 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)6 Map (java.util.Map)5 CloseableRegistry (org.apache.flink.core.fs.CloseableRegistry)5 ChainedStateHandle (org.apache.flink.runtime.state.ChainedStateHandle)5 TaskStateHandles (org.apache.flink.runtime.state.TaskStateHandles)5 Configuration (org.apache.flink.configuration.Configuration)4 CheckpointOptions (org.apache.flink.runtime.checkpoint.CheckpointOptions)4