use of java.util.List in project groovy by apache.
the class Node method build.
public void build(final GroovyObject builder, final Map namespaceMap, final Map<String, String> namespaceTagHints) {
if (this.replacementNodeStack.empty()) {
final Closure rest = new Closure(null) {
public Object doCall(final Object o) {
buildChildren(builder, namespaceMap, namespaceTagHints);
return null;
}
};
if (this.namespaceURI.length() == 0 && this.attributeNamespaces.isEmpty()) {
builder.invokeMethod(this.name, new Object[] { this.attributes, rest });
} else {
final List newTags = new LinkedList();
builder.getProperty("mkp");
final List namespaces = (List) builder.invokeMethod("getNamespaces", new Object[] {});
final Map current = (Map) namespaces.get(0);
final Map pending = (Map) namespaces.get(1);
if (this.attributeNamespaces.isEmpty()) {
builder.getProperty(getTagFor(this.namespaceURI, current, pending, namespaceMap, namespaceTagHints, newTags, builder));
builder.invokeMethod(this.name, new Object[] { this.attributes, rest });
} else {
final Map attributesWithNamespaces = new HashMap(this.attributes);
for (Object key : this.attributes.keySet()) {
final Object attributeNamespaceURI = this.attributeNamespaces.get(key);
if (attributeNamespaceURI != null) {
attributesWithNamespaces.put(getTagFor(attributeNamespaceURI, current, pending, namespaceMap, namespaceTagHints, newTags, builder) + "$" + key, attributesWithNamespaces.remove(key));
}
}
builder.getProperty(getTagFor(this.namespaceURI, current, pending, namespaceMap, namespaceTagHints, newTags, builder));
builder.invokeMethod(this.name, new Object[] { attributesWithNamespaces, rest });
}
// remove the new tags we had to define for this element
if (!newTags.isEmpty()) {
final Iterator iter = newTags.iterator();
do {
pending.remove(iter.next());
} while (iter.hasNext());
}
}
} else {
((ReplacementNode) this.replacementNodeStack.peek()).build(builder, namespaceMap, namespaceTagHints);
}
}
use of java.util.List in project flink by apache.
the class StateAssignmentOperation method assignTaskStatesToOperatorInstances.
private static void assignTaskStatesToOperatorInstances(TaskState taskState, ExecutionJobVertex executionJobVertex) {
final int oldParallelism = taskState.getParallelism();
final int newParallelism = executionJobVertex.getParallelism();
List<KeyGroupRange> keyGroupPartitions = createKeyGroupPartitions(executionJobVertex.getMaxParallelism(), newParallelism);
final int chainLength = taskState.getChainLength();
// operator chain idx -> list of the stored op states from all parallel instances for this chain idx
@SuppressWarnings("unchecked") List<OperatorStateHandle>[] parallelOpStatesBackend = new List[chainLength];
@SuppressWarnings("unchecked") List<OperatorStateHandle>[] parallelOpStatesStream = new List[chainLength];
List<KeyGroupsStateHandle> parallelKeyedStatesBackend = new ArrayList<>(oldParallelism);
List<KeyGroupsStateHandle> parallelKeyedStateStream = new ArrayList<>(oldParallelism);
for (int p = 0; p < oldParallelism; ++p) {
SubtaskState subtaskState = taskState.getState(p);
if (null != subtaskState) {
collectParallelStatesByChainOperator(parallelOpStatesBackend, subtaskState.getManagedOperatorState());
collectParallelStatesByChainOperator(parallelOpStatesStream, subtaskState.getRawOperatorState());
KeyGroupsStateHandle keyedStateBackend = subtaskState.getManagedKeyedState();
if (null != keyedStateBackend) {
parallelKeyedStatesBackend.add(keyedStateBackend);
}
KeyGroupsStateHandle keyedStateStream = subtaskState.getRawKeyedState();
if (null != keyedStateStream) {
parallelKeyedStateStream.add(keyedStateStream);
}
}
}
// operator chain index -> lists with collected states (one collection for each parallel subtasks)
@SuppressWarnings("unchecked") List<Collection<OperatorStateHandle>>[] partitionedParallelStatesBackend = new List[chainLength];
@SuppressWarnings("unchecked") List<Collection<OperatorStateHandle>>[] partitionedParallelStatesStream = new List[chainLength];
//TODO here we can employ different redistribution strategies for state, e.g. union state.
// For now we only offer round robin as the default.
OperatorStateRepartitioner opStateRepartitioner = RoundRobinOperatorStateRepartitioner.INSTANCE;
for (int chainIdx = 0; chainIdx < chainLength; ++chainIdx) {
List<OperatorStateHandle> chainOpParallelStatesBackend = parallelOpStatesBackend[chainIdx];
List<OperatorStateHandle> chainOpParallelStatesStream = parallelOpStatesStream[chainIdx];
partitionedParallelStatesBackend[chainIdx] = applyRepartitioner(opStateRepartitioner, chainOpParallelStatesBackend, oldParallelism, newParallelism);
partitionedParallelStatesStream[chainIdx] = applyRepartitioner(opStateRepartitioner, chainOpParallelStatesStream, oldParallelism, newParallelism);
}
for (int subTaskIdx = 0; subTaskIdx < newParallelism; ++subTaskIdx) {
// non-partitioned state
ChainedStateHandle<StreamStateHandle> nonPartitionableState = null;
if (oldParallelism == newParallelism) {
if (taskState.getState(subTaskIdx) != null) {
nonPartitionableState = taskState.getState(subTaskIdx).getLegacyOperatorState();
}
}
// partitionable state
@SuppressWarnings("unchecked") Collection<OperatorStateHandle>[] iab = new Collection[chainLength];
@SuppressWarnings("unchecked") Collection<OperatorStateHandle>[] ias = new Collection[chainLength];
List<Collection<OperatorStateHandle>> operatorStateFromBackend = Arrays.asList(iab);
List<Collection<OperatorStateHandle>> operatorStateFromStream = Arrays.asList(ias);
for (int chainIdx = 0; chainIdx < partitionedParallelStatesBackend.length; ++chainIdx) {
List<Collection<OperatorStateHandle>> redistributedOpStateBackend = partitionedParallelStatesBackend[chainIdx];
List<Collection<OperatorStateHandle>> redistributedOpStateStream = partitionedParallelStatesStream[chainIdx];
if (redistributedOpStateBackend != null) {
operatorStateFromBackend.set(chainIdx, redistributedOpStateBackend.get(subTaskIdx));
}
if (redistributedOpStateStream != null) {
operatorStateFromStream.set(chainIdx, redistributedOpStateStream.get(subTaskIdx));
}
}
Execution currentExecutionAttempt = executionJobVertex.getTaskVertices()[subTaskIdx].getCurrentExecutionAttempt();
List<KeyGroupsStateHandle> newKeyedStatesBackend;
List<KeyGroupsStateHandle> newKeyedStateStream;
if (oldParallelism == newParallelism) {
SubtaskState subtaskState = taskState.getState(subTaskIdx);
if (subtaskState != null) {
KeyGroupsStateHandle oldKeyedStatesBackend = subtaskState.getManagedKeyedState();
KeyGroupsStateHandle oldKeyedStatesStream = subtaskState.getRawKeyedState();
newKeyedStatesBackend = oldKeyedStatesBackend != null ? Collections.singletonList(oldKeyedStatesBackend) : null;
newKeyedStateStream = oldKeyedStatesStream != null ? Collections.singletonList(oldKeyedStatesStream) : null;
} else {
newKeyedStatesBackend = null;
newKeyedStateStream = null;
}
} else {
KeyGroupRange subtaskKeyGroupIds = keyGroupPartitions.get(subTaskIdx);
newKeyedStatesBackend = getKeyGroupsStateHandles(parallelKeyedStatesBackend, subtaskKeyGroupIds);
newKeyedStateStream = getKeyGroupsStateHandles(parallelKeyedStateStream, subtaskKeyGroupIds);
}
TaskStateHandles taskStateHandles = new TaskStateHandles(nonPartitionableState, operatorStateFromBackend, operatorStateFromStream, newKeyedStatesBackend, newKeyedStateStream);
currentExecutionAttempt.setInitialState(taskStateHandles);
}
}
use of java.util.List in project flink by apache.
the class RoundRobinOperatorStateRepartitioner method repartition.
/**
* Repartition all named states.
*/
private List<Map<StreamStateHandle, OperatorStateHandle>> repartition(GroupByStateNameResults nameToStateByMode, int parallelism) {
// We will use this to merge w.r.t. StreamStateHandles for each parallel subtask inside the maps
List<Map<StreamStateHandle, OperatorStateHandle>> mergeMapList = new ArrayList<>(parallelism);
// Initialize
for (int i = 0; i < parallelism; ++i) {
mergeMapList.add(new HashMap<StreamStateHandle, OperatorStateHandle>());
}
// Start with the state handles we distribute round robin by splitting by offsets
Map<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> distributeNameToState = nameToStateByMode.getByMode(OperatorStateHandle.Mode.SPLIT_DISTRIBUTE);
int startParallelOp = 0;
// Iterate all named states and repartition one named state at a time per iteration
for (Map.Entry<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> e : distributeNameToState.entrySet()) {
List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> current = e.getValue();
// Determine actual number of partitions for this named state
int totalPartitions = 0;
for (Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> offsets : current) {
totalPartitions += offsets.f1.getOffsets().length;
}
// Repartition the state across the parallel operator instances
int lstIdx = 0;
int offsetIdx = 0;
int baseFraction = totalPartitions / parallelism;
int remainder = totalPartitions % parallelism;
int newStartParallelOp = startParallelOp;
for (int i = 0; i < parallelism; ++i) {
// Preparation: calculate the actual index considering wrap around
int parallelOpIdx = (i + startParallelOp) % parallelism;
// Now calculate the number of partitions we will assign to the parallel instance in this round ...
int numberOfPartitionsToAssign = baseFraction;
// ... and distribute odd partitions while we still have some, one at a time
if (remainder > 0) {
++numberOfPartitionsToAssign;
--remainder;
} else if (remainder == 0) {
// We are out of odd partitions now and begin our next redistribution round with the current
// parallel operator to ensure fair load balance
newStartParallelOp = parallelOpIdx;
--remainder;
}
// Now start collection the partitions for the parallel instance into this list
List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> parallelOperatorState = new ArrayList<>();
while (numberOfPartitionsToAssign > 0) {
Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> handleWithOffsets = current.get(lstIdx);
long[] offsets = handleWithOffsets.f1.getOffsets();
int remaining = offsets.length - offsetIdx;
// Repartition offsets
long[] offs;
if (remaining > numberOfPartitionsToAssign) {
offs = Arrays.copyOfRange(offsets, offsetIdx, offsetIdx + numberOfPartitionsToAssign);
offsetIdx += numberOfPartitionsToAssign;
} else {
if (OPTIMIZE_MEMORY_USE) {
// GC
handleWithOffsets.f1 = null;
}
offs = Arrays.copyOfRange(offsets, offsetIdx, offsets.length);
offsetIdx = 0;
++lstIdx;
}
parallelOperatorState.add(new Tuple2<>(handleWithOffsets.f0, new OperatorStateHandle.StateMetaInfo(offs, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE)));
numberOfPartitionsToAssign -= remaining;
// As a last step we merge partitions that use the same StreamStateHandle in a single
// OperatorStateHandle
Map<StreamStateHandle, OperatorStateHandle> mergeMap = mergeMapList.get(parallelOpIdx);
OperatorStateHandle operatorStateHandle = mergeMap.get(handleWithOffsets.f0);
if (operatorStateHandle == null) {
operatorStateHandle = new OperatorStateHandle(new HashMap<String, OperatorStateHandle.StateMetaInfo>(), handleWithOffsets.f0);
mergeMap.put(handleWithOffsets.f0, operatorStateHandle);
}
operatorStateHandle.getStateNameToPartitionOffsets().put(e.getKey(), new OperatorStateHandle.StateMetaInfo(offs, OperatorStateHandle.Mode.SPLIT_DISTRIBUTE));
}
}
startParallelOp = newStartParallelOp;
e.setValue(null);
}
// Now we also add the state handles marked for broadcast to all parallel instances
Map<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> broadcastNameToState = nameToStateByMode.getByMode(OperatorStateHandle.Mode.BROADCAST);
for (int i = 0; i < parallelism; ++i) {
Map<StreamStateHandle, OperatorStateHandle> mergeMap = mergeMapList.get(i);
for (Map.Entry<String, List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>>> e : broadcastNameToState.entrySet()) {
List<Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo>> current = e.getValue();
for (Tuple2<StreamStateHandle, OperatorStateHandle.StateMetaInfo> handleWithMetaInfo : current) {
OperatorStateHandle operatorStateHandle = mergeMap.get(handleWithMetaInfo.f0);
if (operatorStateHandle == null) {
operatorStateHandle = new OperatorStateHandle(new HashMap<String, OperatorStateHandle.StateMetaInfo>(), handleWithMetaInfo.f0);
mergeMap.put(handleWithMetaInfo.f0, operatorStateHandle);
}
operatorStateHandle.getStateNameToPartitionOffsets().put(e.getKey(), handleWithMetaInfo.f1);
}
}
}
return mergeMapList;
}
use of java.util.List in project flink by apache.
the class ExecutionJobVertex method computeLocalInputSplitsPerTask.
// --------------------------------------------------------------------------------------------
// Static / pre-assigned input splits
// --------------------------------------------------------------------------------------------
private List<LocatableInputSplit>[] computeLocalInputSplitsPerTask(InputSplit[] splits) throws JobException {
final int numSubTasks = getParallelism();
// sanity check
if (numSubTasks > splits.length) {
throw new JobException("Strictly local assignment requires at least as many splits as subtasks.");
}
// group the splits by host while preserving order per host
Map<String, List<LocatableInputSplit>> splitsByHost = new HashMap<String, List<LocatableInputSplit>>();
for (InputSplit split : splits) {
// check that split has exactly one local host
if (!(split instanceof LocatableInputSplit)) {
throw new JobException("Invalid InputSplit type " + split.getClass().getCanonicalName() + ". " + "Strictly local assignment requires LocatableInputSplit");
}
LocatableInputSplit lis = (LocatableInputSplit) split;
if (lis.getHostnames() == null) {
throw new JobException("LocatableInputSplit has no host information. " + "Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
} else if (lis.getHostnames().length != 1) {
throw new JobException("Strictly local assignment requires exactly one hostname for each LocatableInputSplit.");
}
String hostName = lis.getHostnames()[0];
if (hostName == null) {
throw new JobException("For strictly local input split assignment, no null host names are allowed.");
}
List<LocatableInputSplit> hostSplits = splitsByHost.get(hostName);
if (hostSplits == null) {
hostSplits = new ArrayList<LocatableInputSplit>();
splitsByHost.put(hostName, hostSplits);
}
hostSplits.add(lis);
}
int numHosts = splitsByHost.size();
if (numSubTasks < numHosts) {
throw new JobException("Strictly local split assignment requires at least as " + "many parallel subtasks as distinct split hosts. Please increase the parallelism " + "of DataSource " + this.getJobVertex().getName() + " to at least " + numHosts + ".");
}
// get list of hosts in deterministic order
List<String> hosts = new ArrayList<String>(splitsByHost.keySet());
Collections.sort(hosts);
@SuppressWarnings("unchecked") List<LocatableInputSplit>[] subTaskSplitAssignment = (List<LocatableInputSplit>[]) new List<?>[numSubTasks];
final int subtasksPerHost = numSubTasks / numHosts;
final int hostsWithOneMore = numSubTasks % numHosts;
int subtaskNum = 0;
// over the subtasks
for (int hostNum = 0; hostNum < numHosts; hostNum++) {
String host = hosts.get(hostNum);
List<LocatableInputSplit> splitsOnHost = splitsByHost.get(host);
int numSplitsOnHost = splitsOnHost.size();
// the number of subtasks to split this over.
// NOTE: if the host has few splits, some subtasks will not get anything.
int subtasks = Math.min(numSplitsOnHost, hostNum < hostsWithOneMore ? subtasksPerHost + 1 : subtasksPerHost);
int splitsPerSubtask = numSplitsOnHost / subtasks;
int subtasksWithOneMore = numSplitsOnHost % subtasks;
int splitnum = 0;
// go over the subtasks and grab a subrange of the input splits
for (int i = 0; i < subtasks; i++) {
int numSplitsForSubtask = (i < subtasksWithOneMore ? splitsPerSubtask + 1 : splitsPerSubtask);
List<LocatableInputSplit> splitList;
if (numSplitsForSubtask == numSplitsOnHost) {
splitList = splitsOnHost;
} else {
splitList = new ArrayList<LocatableInputSplit>(numSplitsForSubtask);
for (int k = 0; k < numSplitsForSubtask; k++) {
splitList.add(splitsOnHost.get(splitnum++));
}
}
subTaskSplitAssignment[subtaskNum++] = splitList;
}
}
return subTaskSplitAssignment;
}
use of java.util.List in project flink by apache.
the class OutputSplitterITCase method testOnMergedDataStream.
@SuppressWarnings("unchecked")
@Test
public void testOnMergedDataStream() throws Exception {
TestListResultSink<Integer> splitterResultSink1 = new TestListResultSink<Integer>();
TestListResultSink<Integer> splitterResultSink2 = new TestListResultSink<Integer>();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setBufferTimeout(1);
DataStream<Integer> d1 = env.fromElements(0, 2, 4, 6, 8);
DataStream<Integer> d2 = env.fromElements(1, 3, 5, 7, 9);
d1 = d1.union(d2);
d1.split(new OutputSelector<Integer>() {
private static final long serialVersionUID = 8354166915727490130L;
@Override
public Iterable<String> select(Integer value) {
List<String> s = new ArrayList<String>();
if (value > 4) {
s.add(">");
} else {
s.add("<");
}
return s;
}
}).select(">").addSink(splitterResultSink1);
d1.split(new OutputSelector<Integer>() {
private static final long serialVersionUID = -6822487543355994807L;
@Override
public Iterable<String> select(Integer value) {
List<String> s = new ArrayList<String>();
if (value % 3 == 0) {
s.add("yes");
} else {
s.add("no");
}
return s;
}
}).select("yes").addSink(splitterResultSink2);
env.execute();
expectedSplitterResult.clear();
expectedSplitterResult.addAll(Arrays.asList(5, 6, 7, 8, 9));
assertEquals(expectedSplitterResult, splitterResultSink1.getSortedResult());
expectedSplitterResult.clear();
expectedSplitterResult.addAll(Arrays.asList(0, 3, 6, 9));
assertEquals(expectedSplitterResult, splitterResultSink2.getSortedResult());
}
Aggregations