Search in sources :

Example 91 with List

use of java.util.List in project groovy by apache.

the class Node method build.

public void build(final GroovyObject builder, final Map namespaceMap, final Map<String, String> namespaceTagHints) {
    if (this.replacementNodeStack.empty()) {
        final Closure rest = new Closure(null) {

            public Object doCall(final Object o) {
                buildChildren(builder, namespaceMap, namespaceTagHints);
                return null;
            }
        };
        if (this.namespaceURI.length() == 0 && this.attributeNamespaces.isEmpty()) {
            builder.invokeMethod(this.name, new Object[] { this.attributes, rest });
        } else {
            final List newTags = new LinkedList();
            builder.getProperty("mkp");
            final List namespaces = (List) builder.invokeMethod("getNamespaces", new Object[] {});
            final Map current = (Map) namespaces.get(0);
            final Map pending = (Map) namespaces.get(1);
            if (this.attributeNamespaces.isEmpty()) {
                builder.getProperty(getTagFor(this.namespaceURI, current, pending, namespaceMap, namespaceTagHints, newTags, builder));
                builder.invokeMethod(this.name, new Object[] { this.attributes, rest });
            } else {
                final Map attributesWithNamespaces = new HashMap(this.attributes);
                for (Object key : this.attributes.keySet()) {
                    final Object attributeNamespaceURI = this.attributeNamespaces.get(key);
                    if (attributeNamespaceURI != null) {
                        attributesWithNamespaces.put(getTagFor(attributeNamespaceURI, current, pending, namespaceMap, namespaceTagHints, newTags, builder) + "$" + key, attributesWithNamespaces.remove(key));
                    }
                }
                builder.getProperty(getTagFor(this.namespaceURI, current, pending, namespaceMap, namespaceTagHints, newTags, builder));
                builder.invokeMethod(this.name, new Object[] { attributesWithNamespaces, rest });
            }
            // remove the new tags we had to define for this element
            if (!newTags.isEmpty()) {
                final Iterator iter = newTags.iterator();
                do {
                    pending.remove(iter.next());
                } while (iter.hasNext());
            }
        }
    } else {
        ((ReplacementNode) this.replacementNodeStack.peek()).build(builder, namespaceMap, namespaceTagHints);
    }
}
Also used : Closure(groovy.lang.Closure) HashMap(java.util.HashMap) Iterator(java.util.Iterator) GroovyObject(groovy.lang.GroovyObject) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) HashMap(java.util.HashMap) Map(java.util.Map) LinkedList(java.util.LinkedList)

Example 92 with List

use of java.util.List in project flink by apache.

the class StateAssignmentOperation method assignTaskStatesToOperatorInstances.

private static void assignTaskStatesToOperatorInstances(TaskState taskState, ExecutionJobVertex executionJobVertex) {
    final int oldParallelism = taskState.getParallelism();
    final int newParallelism = executionJobVertex.getParallelism();
    List<KeyGroupRange> keyGroupPartitions = createKeyGroupPartitions(executionJobVertex.getMaxParallelism(), newParallelism);
    final int chainLength = taskState.getChainLength();
    // operator chain idx -> list of the stored op states from all parallel instances for this chain idx
    @SuppressWarnings("unchecked") List<OperatorStateHandle>[] parallelOpStatesBackend = new List[chainLength];
    @SuppressWarnings("unchecked") List<OperatorStateHandle>[] parallelOpStatesStream = new List[chainLength];
    List<KeyGroupsStateHandle> parallelKeyedStatesBackend = new ArrayList<>(oldParallelism);
    List<KeyGroupsStateHandle> parallelKeyedStateStream = new ArrayList<>(oldParallelism);
    for (int p = 0; p < oldParallelism; ++p) {
        SubtaskState subtaskState = taskState.getState(p);
        if (null != subtaskState) {
            collectParallelStatesByChainOperator(parallelOpStatesBackend, subtaskState.getManagedOperatorState());
            collectParallelStatesByChainOperator(parallelOpStatesStream, subtaskState.getRawOperatorState());
            KeyGroupsStateHandle keyedStateBackend = subtaskState.getManagedKeyedState();
            if (null != keyedStateBackend) {
                parallelKeyedStatesBackend.add(keyedStateBackend);
            }
            KeyGroupsStateHandle keyedStateStream = subtaskState.getRawKeyedState();
            if (null != keyedStateStream) {
                parallelKeyedStateStream.add(keyedStateStream);
            }
        }
    }
    // operator chain index -> lists with collected states (one collection for each parallel subtasks)
    @SuppressWarnings("unchecked") List<Collection<OperatorStateHandle>>[] partitionedParallelStatesBackend = new List[chainLength];
    @SuppressWarnings("unchecked") List<Collection<OperatorStateHandle>>[] partitionedParallelStatesStream = new List[chainLength];
    //TODO here we can employ different redistribution strategies for state, e.g. union state.
    // For now we only offer round robin as the default.
    OperatorStateRepartitioner opStateRepartitioner = RoundRobinOperatorStateRepartitioner.INSTANCE;
    for (int chainIdx = 0; chainIdx < chainLength; ++chainIdx) {
        List<OperatorStateHandle> chainOpParallelStatesBackend = parallelOpStatesBackend[chainIdx];
        List<OperatorStateHandle> chainOpParallelStatesStream = parallelOpStatesStream[chainIdx];
        partitionedParallelStatesBackend[chainIdx] = applyRepartitioner(opStateRepartitioner, chainOpParallelStatesBackend, oldParallelism, newParallelism);
        partitionedParallelStatesStream[chainIdx] = applyRepartitioner(opStateRepartitioner, chainOpParallelStatesStream, oldParallelism, newParallelism);
    }
    for (int subTaskIdx = 0; subTaskIdx < newParallelism; ++subTaskIdx) {
        // non-partitioned state
        ChainedStateHandle<StreamStateHandle> nonPartitionableState = null;
        if (oldParallelism == newParallelism) {
            if (taskState.getState(subTaskIdx) != null) {
                nonPartitionableState = taskState.getState(subTaskIdx).getLegacyOperatorState();
            }
        }
        // partitionable state
        @SuppressWarnings("unchecked") Collection<OperatorStateHandle>[] iab = new Collection[chainLength];
        @SuppressWarnings("unchecked") Collection<OperatorStateHandle>[] ias = new Collection[chainLength];
        List<Collection<OperatorStateHandle>> operatorStateFromBackend = Arrays.asList(iab);
        List<Collection<OperatorStateHandle>> operatorStateFromStream = Arrays.asList(ias);
        for (int chainIdx = 0; chainIdx < partitionedParallelStatesBackend.length; ++chainIdx) {
            List<Collection<OperatorStateHandle>> redistributedOpStateBackend = partitionedParallelStatesBackend[chainIdx];
            List<Collection<OperatorStateHandle>> redistributedOpStateStream = partitionedParallelStatesStream[chainIdx];
            if (redistributedOpStateBackend != null) {
                operatorStateFromBackend.set(chainIdx, redistributedOpStateBackend.get(subTaskIdx));
            }
            if (redistributedOpStateStream != null) {
                operatorStateFromStream.set(chainIdx, redistributedOpStateStream.get(subTaskIdx));
            }
        }
        Execution currentExecutionAttempt = executionJobVertex.getTaskVertices()[subTaskIdx].getCurrentExecutionAttempt();
        List<KeyGroupsStateHandle> newKeyedStatesBackend;
        List<KeyGroupsStateHandle> newKeyedStateStream;
        if (oldParallelism == newParallelism) {
            SubtaskState subtaskState = taskState.getState(subTaskIdx);
            if (subtaskState != null) {
                KeyGroupsStateHandle oldKeyedStatesBackend = subtaskState.getManagedKeyedState();
                KeyGroupsStateHandle oldKeyedStatesStream = subtaskState.getRawKeyedState();
                newKeyedStatesBackend = oldKeyedStatesBackend != null ? Collections.singletonList(oldKeyedStatesBackend) : null;
                newKeyedStateStream = oldKeyedStatesStream != null ? Collections.singletonList(oldKeyedStatesStream) : null;
            } else {
                newKeyedStatesBackend = null;
                newKeyedStateStream = null;
            }
        } else {
            KeyGroupRange subtaskKeyGroupIds = keyGroupPartitions.get(subTaskIdx);
            newKeyedStatesBackend = getKeyGroupsStateHandles(parallelKeyedStatesBackend, subtaskKeyGroupIds);
            newKeyedStateStream = getKeyGroupsStateHandles(parallelKeyedStateStream, subtaskKeyGroupIds);
        }
        TaskStateHandles taskStateHandles = new TaskStateHandles(nonPartitionableState, operatorStateFromBackend, operatorStateFromStream, newKeyedStatesBackend, newKeyedStateStream);
        currentExecutionAttempt.setInitialState(taskStateHandles);
    }
}
Also used : KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) Execution(org.apache.flink.runtime.executiongraph.Execution) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle)

Example 93 with List

use of java.util.List in project flink by apache.

the class RoundRobinOperatorStateRepartitioner method repartition.

/**
	 * Repartition all named states.
	 */
private List<Map<StreamStateHandle, OperatorStateHandle>> repartition(GroupByStateNameResults nameToStateByMode, int parallelism) {
    // We will use this to merge w.r.t. StreamStateHandles for each parallel subtask inside the maps
    List<Map<StreamStateHandle, OperatorStateHandle>> mergeMapList = new ArrayList<>(parallelism);
    // Initialize
    for (int i = 0; i < parallelism; ++i) {
        mergeMapList.add(new HashMap<StreamStateHandle, OperatorStateHandle>());
    }
    // Start with the state handles we distribute round robin by splitting by offsets
    Map<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> distributeNameToState = nameToStateByMode.getByMode(OperatorStateHandle.Mode.SPLIT_DISTRIBUTE);
    int startParallelOp = 0;
    // Iterate all named states and repartition one named state at a time per iteration
    for (Map.Entry<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> e : distributeNameToState.entrySet()) {
        List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> current = e.getValue();
        // Determine actual number of partitions for this named state
        int totalPartitions = 0;
        for (Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> offsets : current) {
            totalPartitions += offsets.f1.getOffsets().length;
        }
        // Repartition the state across the parallel operator instances
        int lstIdx = 0;
        int offsetIdx = 0;
        int baseFraction = totalPartitions / parallelism;
        int remainder = totalPartitions % parallelism;
        int newStartParallelOp = startParallelOp;
        for (int i = 0; i < parallelism; ++i) {
            // Preparation: calculate the actual index considering wrap around
            int parallelOpIdx = (i + startParallelOp) % parallelism;
            // Now calculate the number of partitions we will assign to the parallel instance in this round ...
            int numberOfPartitionsToAssign = baseFraction;
            // ... and distribute odd partitions while we still have some, one at a time
            if (remainder > 0) {
                ++numberOfPartitionsToAssign;
                --remainder;
            } else if (remainder == 0) {
                // We are out of odd partitions now and begin our next redistribution round with the current
                // parallel operator to ensure fair load balance
                newStartParallelOp = parallelOpIdx;
                --remainder;
            }
            // Now start collection the partitions for the parallel instance into this list
            List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> parallelOperatorState = new ArrayList<>();
            while (numberOfPartitionsToAssign > 0) {
                Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> handleWithOffsets = current.get(lstIdx);
                long[] offsets = handleWithOffsets.f1.getOffsets();
                int remaining = offsets.length - offsetIdx;
                // Repartition offsets
                long[] offs;
                if (remaining > numberOfPartitionsToAssign) {
                    offs = Arrays.copyOfRange(offsets, offsetIdx, offsetIdx + numberOfPartitionsToAssign);
                    offsetIdx += numberOfPartitionsToAssign;
                } else {
                    if (OPTIMIZE_MEMORY_USE) {
                        // GC
                        handleWithOffsets.f1 = null;
                    }
                    offs = Arrays.copyOfRange(offsets, offsetIdx, offsets.length);
                    offsetIdx = 0;
                    ++lstIdx;
                }
                parallelOperatorState.add(new Tuple2<>(handleWithOffsets.f0, new OperatorStateHandle.StateMetaInfo(offs, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE)));
                numberOfPartitionsToAssign -= remaining;
                // As a last step we merge partitions that use the same StreamStateHandle in a single
                // OperatorStateHandle
                Map<StreamStateHandle, OperatorStateHandle> mergeMap = mergeMapList.get(parallelOpIdx);
                OperatorStateHandle operatorStateHandle = mergeMap.get(handleWithOffsets.f0);
                if (operatorStateHandle == null) {
                    operatorStateHandle = new OperatorStateHandle(new HashMap<String, OperatorStateHandle.StateMetaInfo>(), handleWithOffsets.f0);
                    mergeMap.put(handleWithOffsets.f0, operatorStateHandle);
                }
                operatorStateHandle.getStateNameToPartitionOffsets().put(e.getKey(), new OperatorStateHandle.StateMetaInfo(offs, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
            }
        }
        startParallelOp = newStartParallelOp;
        e.setValue(null);
    }
    // Now we also add the state handles marked for broadcast to all parallel instances
    Map<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> broadcastNameToState = nameToStateByMode.getByMode(OperatorStateHandle.Mode.BROADCAST);
    for (int i = 0; i < parallelism; ++i) {
        Map<StreamStateHandle, OperatorStateHandle> mergeMap = mergeMapList.get(i);
        for (Map.Entry<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> e : broadcastNameToState.entrySet()) {
            List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> current = e.getValue();
            for (Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> handleWithMetaInfo : current) {
                OperatorStateHandle operatorStateHandle = mergeMap.get(handleWithMetaInfo.f0);
                if (operatorStateHandle == null) {
                    operatorStateHandle = new OperatorStateHandle(new HashMap<String, OperatorStateHandle.StateMetaInfo>(), handleWithMetaInfo.f0);
                    mergeMap.put(handleWithMetaInfo.f0, operatorStateHandle);
                }
                operatorStateHandle.getStateNameToPartitionOffsets().put(e.getKey(), handleWithMetaInfo.f1);
            }
        }
    }
    return mergeMapList;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) List(java.util.List) ArrayList(java.util.ArrayList) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) EnumMap(java.util.EnumMap) Map(java.util.Map) HashMap(java.util.HashMap)

Example 94 with List

use of java.util.List in project flink by apache.

the class ExecutionJobVertex method computeLocalInputSplitsPerTask.

// --------------------------------------------------------------------------------------------
//  Static / pre-assigned input splits
// --------------------------------------------------------------------------------------------
private List<LocatableInputSplit>[] computeLocalInputSplitsPerTask(InputSplit[] splits) throws JobException {
    final int numSubTasks = getParallelism();
    // sanity check
    if (numSubTasks > splits.length) {
        throw new JobException("Strictly local assignment requires at least as many splits as subtasks.");
    }
    // group the splits by host while preserving order per host
    Map<String, List<LocatableInputSplit>> splitsByHost = new HashMap<String, List<LocatableInputSplit>>();
    for (InputSplit split : splits) {
        // check that split has exactly one local host
        if (!(split instanceof LocatableInputSplit)) {
            throw new JobException("Invalid InputSplit type " + split.getClass().getCanonicalName() + ". " + "Strictly local assignment requires LocatableInputSplit");
        }
        LocatableInputSplit lis = (LocatableInputSplit) split;
        if (lis.getHostnames() == null) {
            throw new JobException("LocatableInputSplit has no host information. " + "Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
        } else if (lis.getHostnames().length != 1) {
            throw new JobException("Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
        }
        String hostName = lis.getHostnames()[0];
        if (hostName == null) {
            throw new JobException("For strictly local input split assignment, no null host names are allowed.");
        }
        List<LocatableInputSplit> hostSplits = splitsByHost.get(hostName);
        if (hostSplits == null) {
            hostSplits = new ArrayList<LocatableInputSplit>();
            splitsByHost.put(hostName, hostSplits);
        }
        hostSplits.add(lis);
    }
    int numHosts = splitsByHost.size();
    if (numSubTasks < numHosts) {
        throw new JobException("Strictly local split assignment requires at least as " + "many parallel subtasks as distinct split hosts. Please increase the parallelism " + "of DataSource " + this.getJobVertex().getName() + " to at least " + numHosts + ".");
    }
    // get list of hosts in deterministic order
    List<String> hosts = new ArrayList<String>(splitsByHost.keySet());
    Collections.sort(hosts);
    @SuppressWarnings("unchecked") List<LocatableInputSplit>[] subTaskSplitAssignment = (List<LocatableInputSplit>[]) new List<?>[numSubTasks];
    final int subtasksPerHost = numSubTasks / numHosts;
    final int hostsWithOneMore = numSubTasks % numHosts;
    int subtaskNum = 0;
    // over the subtasks
    for (int hostNum = 0; hostNum < numHosts; hostNum++) {
        String host = hosts.get(hostNum);
        List<LocatableInputSplit> splitsOnHost = splitsByHost.get(host);
        int numSplitsOnHost = splitsOnHost.size();
        // the number of subtasks to split this over.
        // NOTE: if the host has few splits, some subtasks will not get anything.
        int subtasks = Math.min(numSplitsOnHost, hostNum < hostsWithOneMore ? subtasksPerHost + 1 : subtasksPerHost);
        int splitsPerSubtask = numSplitsOnHost / subtasks;
        int subtasksWithOneMore = numSplitsOnHost % subtasks;
        int splitnum = 0;
        // go over the subtasks and grab a subrange of the input splits
        for (int i = 0; i < subtasks; i++) {
            int numSplitsForSubtask = (i < subtasksWithOneMore ? splitsPerSubtask + 1 : splitsPerSubtask);
            List<LocatableInputSplit> splitList;
            if (numSplitsForSubtask == numSplitsOnHost) {
                splitList = splitsOnHost;
            } else {
                splitList = new ArrayList<LocatableInputSplit>(numSplitsForSubtask);
                for (int k = 0; k < numSplitsForSubtask; k++) {
                    splitList.add(splitsOnHost.get(splitnum++));
                }
            }
            subTaskSplitAssignment[subtaskNum++] = splitList;
        }
    }
    return subTaskSplitAssignment;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) JobException(org.apache.flink.runtime.JobException) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit) ArrayList(java.util.ArrayList) List(java.util.List) InputSplit(org.apache.flink.core.io.InputSplit) LocatableInputSplit(org.apache.flink.core.io.LocatableInputSplit)

Example 95 with List

use of java.util.List in project flink by apache.

the class OutputSplitterITCase method testOnMergedDataStream.

@SuppressWarnings("unchecked")
@Test
public void testOnMergedDataStream() throws Exception {
    TestListResultSink<Integer> splitterResultSink1 = new TestListResultSink<Integer>();
    TestListResultSink<Integer> splitterResultSink2 = new TestListResultSink<Integer>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setBufferTimeout(1);
    DataStream<Integer> d1 = env.fromElements(0, 2, 4, 6, 8);
    DataStream<Integer> d2 = env.fromElements(1, 3, 5, 7, 9);
    d1 = d1.union(d2);
    d1.split(new OutputSelector<Integer>() {

        private static final long serialVersionUID = 8354166915727490130L;

        @Override
        public Iterable<String> select(Integer value) {
            List<String> s = new ArrayList<String>();
            if (value > 4) {
                s.add(">");
            } else {
                s.add("<");
            }
            return s;
        }
    }).select(">").addSink(splitterResultSink1);
    d1.split(new OutputSelector<Integer>() {

        private static final long serialVersionUID = -6822487543355994807L;

        @Override
        public Iterable<String> select(Integer value) {
            List<String> s = new ArrayList<String>();
            if (value % 3 == 0) {
                s.add("yes");
            } else {
                s.add("no");
            }
            return s;
        }
    }).select("yes").addSink(splitterResultSink2);
    env.execute();
    expectedSplitterResult.clear();
    expectedSplitterResult.addAll(Arrays.asList(5, 6, 7, 8, 9));
    assertEquals(expectedSplitterResult, splitterResultSink1.getSortedResult());
    expectedSplitterResult.clear();
    expectedSplitterResult.addAll(Arrays.asList(0, 3, 6, 9));
    assertEquals(expectedSplitterResult, splitterResultSink2.getSortedResult());
}
Also used : TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

List (java.util.List)19204 ArrayList (java.util.ArrayList)12470 Test (org.junit.Test)4025 HashMap (java.util.HashMap)3622 Map (java.util.Map)3242 IOException (java.io.IOException)1670 Iterator (java.util.Iterator)1563 LinkedList (java.util.LinkedList)1336 HashSet (java.util.HashSet)1189 Set (java.util.Set)1151 File (java.io.File)921 ImmutableList (com.google.common.collect.ImmutableList)826 Collectors (java.util.stream.Collectors)784 LinkedHashMap (java.util.LinkedHashMap)540 Test (org.testng.annotations.Test)527 Session (org.hibernate.Session)521 Collection (java.util.Collection)496 Collections (java.util.Collections)474 ICompilationUnit (org.eclipse.jdt.core.ICompilationUnit)471 IPackageFragment (org.eclipse.jdt.core.IPackageFragment)453