Search in sources :

Example 6 with OperatorStreamStateHandle

use of org.apache.flink.runtime.state.OperatorStreamStateHandle in project flink by apache.

the class StateAssignmentOperationTest method testRepartitionBroadcastState.

@Test
public void testRepartitionBroadcastState() {
    OperatorID operatorID = new OperatorID();
    OperatorState operatorState = new OperatorState(operatorID, 2, 4);
    Map<String, OperatorStateHandle.StateMetaInfo> metaInfoMap1 = new HashMap<>(2);
    metaInfoMap1.put("t-5", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 10, 20 }, OperatorStateHandle.Mode.BROADCAST));
    metaInfoMap1.put("t-6", new OperatorStateHandle.StateMetaInfo(new long[] { 30, 40, 50 }, OperatorStateHandle.Mode.BROADCAST));
    OperatorStateHandle osh1 = new OperatorStreamStateHandle(metaInfoMap1, new ByteStreamStateHandle("test1", new byte[60]));
    operatorState.putState(0, OperatorSubtaskState.builder().setManagedOperatorState(osh1).build());
    Map<String, OperatorStateHandle.StateMetaInfo> metaInfoMap2 = new HashMap<>(2);
    metaInfoMap2.put("t-5", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 10, 20 }, OperatorStateHandle.Mode.BROADCAST));
    metaInfoMap2.put("t-6", new OperatorStateHandle.StateMetaInfo(new long[] { 30, 40, 50 }, OperatorStateHandle.Mode.BROADCAST));
    OperatorStateHandle osh2 = new OperatorStreamStateHandle(metaInfoMap2, new ByteStreamStateHandle("test2", new byte[60]));
    operatorState.putState(1, OperatorSubtaskState.builder().setManagedOperatorState(osh2).build());
    verifyOneKindPartitionableStateRescale(operatorState, operatorID);
}
Also used : HashMap(java.util.HashMap) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) StateHandleDummyUtil.createNewOperatorStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewOperatorStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) Test(org.junit.Test)

Example 7 with OperatorStreamStateHandle

use of org.apache.flink.runtime.state.OperatorStreamStateHandle in project flink by apache.

the class StateAssignmentOperationTest method testRepartitionSplitDistributeStates.

@Test
public void testRepartitionSplitDistributeStates() {
    OperatorID operatorID = new OperatorID();
    OperatorState operatorState = new OperatorState(operatorID, 2, 4);
    Map<String, OperatorStateHandle.StateMetaInfo> metaInfoMap1 = new HashMap<>(1);
    metaInfoMap1.put("t-1", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 10 }, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
    OperatorStateHandle osh1 = new OperatorStreamStateHandle(metaInfoMap1, new ByteStreamStateHandle("test1", new byte[30]));
    operatorState.putState(0, OperatorSubtaskState.builder().setManagedOperatorState(osh1).build());
    Map<String, OperatorStateHandle.StateMetaInfo> metaInfoMap2 = new HashMap<>(1);
    metaInfoMap2.put("t-2", new OperatorStateHandle.StateMetaInfo(new long[] { 0, 15 }, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
    OperatorStateHandle osh2 = new OperatorStreamStateHandle(metaInfoMap2, new ByteStreamStateHandle("test2", new byte[40]));
    operatorState.putState(1, OperatorSubtaskState.builder().setManagedOperatorState(osh2).build());
    verifyOneKindPartitionableStateRescale(operatorState, operatorID);
}
Also used : HashMap(java.util.HashMap) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) StateHandleDummyUtil.createNewOperatorStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewOperatorStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) Test(org.junit.Test)

Example 8 with OperatorStreamStateHandle

use of org.apache.flink.runtime.state.OperatorStreamStateHandle in project flink by apache.

the class StateHandleDummyUtil method deepDummyCopy.

/**
 * Creates a deep copy of the given {@link OperatorStreamStateHandle}.
 */
public static OperatorStateHandle deepDummyCopy(OperatorStateHandle original) {
    if (original == null) {
        return null;
    }
    ByteStreamStateHandle stateHandleCopy = cloneByteStreamStateHandle((ByteStreamStateHandle) original.getDelegateStateHandle());
    Map<String, OperatorStateHandle.StateMetaInfo> offsets = original.getStateNameToPartitionOffsets();
    Map<String, OperatorStateHandle.StateMetaInfo> offsetsCopy = new HashMap<>(offsets.size());
    for (Map.Entry<String, OperatorStateHandle.StateMetaInfo> entry : offsets.entrySet()) {
        OperatorStateHandle.StateMetaInfo metaInfo = entry.getValue();
        OperatorStateHandle.StateMetaInfo metaInfoCopy = new OperatorStateHandle.StateMetaInfo(metaInfo.getOffsets(), metaInfo.getDistributionMode());
        offsetsCopy.put(String.valueOf(entry.getKey()), metaInfoCopy);
    }
    return new OperatorStreamStateHandle(offsetsCopy, stateHandleCopy);
}
Also used : HashMap(java.util.HashMap) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) HashMap(java.util.HashMap) Map(java.util.Map)

Example 9 with OperatorStreamStateHandle

use of org.apache.flink.runtime.state.OperatorStreamStateHandle in project flink by apache.

the class RoundRobinOperatorStateRepartitioner method repartitionBroadcastState.

/**
 * Repartition BROADCAST state.
 */
private void repartitionBroadcastState(Map<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> broadcastState, List<Map<StreamStateHandle, OperatorStateHandle>> mergeMapList) {
    int newParallelism = mergeMapList.size();
    for (int i = 0; i < newParallelism; ++i) {
        final Map<StreamStateHandle, OperatorStateHandle> mergeMap = mergeMapList.get(i);
        // for each name, pick the i-th entry
        for (Map.Entry<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> e : broadcastState.entrySet()) {
            int previousParallelism = e.getValue().size();
            Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> handleWithMetaInfo = e.getValue().get(i % previousParallelism);
            OperatorStateHandle operatorStateHandle = mergeMap.get(handleWithMetaInfo.f0);
            if (operatorStateHandle == null) {
                operatorStateHandle = new OperatorStreamStateHandle(new HashMap<>(broadcastState.size()), handleWithMetaInfo.f0);
                mergeMap.put(handleWithMetaInfo.f0, operatorStateHandle);
            }
            operatorStateHandle.getStateNameToPartitionOffsets().put(e.getKey(), handleWithMetaInfo.f1);
        }
    }
}
Also used : OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 10 with OperatorStreamStateHandle

use of org.apache.flink.runtime.state.OperatorStreamStateHandle in project flink by apache.

the class RoundRobinOperatorStateRepartitioner method repartitionSplitState.

/**
 * Repartition SPLIT_DISTRIBUTE state.
 */
private void repartitionSplitState(Map<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> nameToDistributeState, int newParallelism, List<Map<StreamStateHandle, OperatorStateHandle>> mergeMapList) {
    int startParallelOp = 0;
    // Iterate all named states and repartition one named state at a time per iteration
    for (Map.Entry<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> e : nameToDistributeState.entrySet()) {
        List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> current = e.getValue();
        // Determine actual number of partitions for this named state
        int totalPartitions = 0;
        for (Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> offsets : current) {
            totalPartitions += offsets.f1.getOffsets().length;
        }
        // Repartition the state across the parallel operator instances
        int lstIdx = 0;
        int offsetIdx = 0;
        int baseFraction = totalPartitions / newParallelism;
        int remainder = totalPartitions % newParallelism;
        int newStartParallelOp = startParallelOp;
        for (int i = 0; i < newParallelism; ++i) {
            // Preparation: calculate the actual index considering wrap around
            int parallelOpIdx = (i + startParallelOp) % newParallelism;
            // Now calculate the number of partitions we will assign to the parallel instance in
            // this round ...
            int numberOfPartitionsToAssign = baseFraction;
            // ... and distribute odd partitions while we still have some, one at a time
            if (remainder > 0) {
                ++numberOfPartitionsToAssign;
                --remainder;
            } else if (remainder == 0) {
                // We are out of odd partitions now and begin our next redistribution round with
                // the current
                // parallel operator to ensure fair load balance
                newStartParallelOp = parallelOpIdx;
                --remainder;
            }
            while (numberOfPartitionsToAssign > 0) {
                Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> handleWithOffsets = current.get(lstIdx);
                long[] offsets = handleWithOffsets.f1.getOffsets();
                int remaining = offsets.length - offsetIdx;
                // Repartition offsets
                long[] offs;
                if (remaining > numberOfPartitionsToAssign) {
                    offs = Arrays.copyOfRange(offsets, offsetIdx, offsetIdx + numberOfPartitionsToAssign);
                    offsetIdx += numberOfPartitionsToAssign;
                } else {
                    if (OPTIMIZE_MEMORY_USE) {
                        // GC
                        handleWithOffsets.f1 = null;
                    }
                    offs = Arrays.copyOfRange(offsets, offsetIdx, offsets.length);
                    offsetIdx = 0;
                    ++lstIdx;
                }
                numberOfPartitionsToAssign -= remaining;
                // As a last step we merge partitions that use the same StreamStateHandle in a
                // single
                // OperatorStateHandle
                Map<StreamStateHandle, OperatorStateHandle> mergeMap = mergeMapList.get(parallelOpIdx);
                OperatorStateHandle operatorStateHandle = mergeMap.get(handleWithOffsets.f0);
                if (operatorStateHandle == null) {
                    operatorStateHandle = new OperatorStreamStateHandle(new HashMap<>(nameToDistributeState.size()), handleWithOffsets.f0);
                    mergeMap.put(handleWithOffsets.f0, operatorStateHandle);
                }
                operatorStateHandle.getStateNameToPartitionOffsets().put(e.getKey(), new OperatorStateHandle.StateMetaInfo(offs, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
            }
        }
        startParallelOp = newStartParallelOp;
        e.setValue(null);
    }
}
Also used : HashMap(java.util.HashMap) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ArrayList(java.util.ArrayList) List(java.util.List) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

OperatorStreamStateHandle (org.apache.flink.runtime.state.OperatorStreamStateHandle)19 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)16 HashMap (java.util.HashMap)14 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)12 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)10 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)6 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)6 StateHandleDummyUtil.createNewOperatorStateHandle (org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewOperatorStateHandle)6 Map (java.util.Map)5 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)5 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)5 List (java.util.List)4 KeyedStateHandle (org.apache.flink.runtime.state.KeyedStateHandle)4 EnumMap (java.util.EnumMap)3 Random (java.util.Random)3 JobID (org.apache.flink.api.common.JobID)3 JobManagerTaskRestore (org.apache.flink.runtime.checkpoint.JobManagerTaskRestore)3 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)3 CloseableRegistry (org.apache.flink.core.fs.CloseableRegistry)2