use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class TwoInputStreamTask method init.
@Override
public void init() throws Exception {
StreamConfig configuration = getConfiguration();
ClassLoader userClassLoader = getUserCodeClassLoader();
TypeSerializer<IN1> inputDeserializer1 = configuration.getTypeSerializerIn1(userClassLoader);
TypeSerializer<IN2> inputDeserializer2 = configuration.getTypeSerializerIn2(userClassLoader);
int numberOfInputs = configuration.getNumberOfInputs();
ArrayList<InputGate> inputList1 = new ArrayList<InputGate>();
ArrayList<InputGate> inputList2 = new ArrayList<InputGate>();
List<StreamEdge> inEdges = configuration.getInPhysicalEdges(userClassLoader);
for (int i = 0; i < numberOfInputs; i++) {
int inputType = inEdges.get(i).getTypeNumber();
InputGate reader = getEnvironment().getInputGate(i);
switch(inputType) {
case 1:
inputList1.add(reader);
break;
case 2:
inputList2.add(reader);
break;
default:
throw new RuntimeException("Invalid input type number: " + inputType);
}
}
this.inputProcessor = new StreamTwoInputProcessor<>(inputList1, inputList2, inputDeserializer1, inputDeserializer2, this, configuration.getCheckpointMode(), getCheckpointLock(), getEnvironment().getIOManager(), getEnvironment().getTaskManagerInfo().getConfiguration(), getStreamStatusMaintainer(), this.headOperator);
// make sure that stream tasks report their I/O statistics
inputProcessor.setMetricGroup(getEnvironment().getMetricGroup().getIOMetricGroup());
}
use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class StreamGraphHasherV1 method generateNodeHash.
/**
* Generates a hash for the node and returns whether the operation was
* successful.
*
* @param node The node to generate the hash for
* @param hashFunction The hash function to use
* @param hashes The current state of generated hashes
* @return <code>true</code> if the node hash has been generated.
* <code>false</code>, otherwise. If the operation is not successful, the
* hash needs be generated at a later point when all input is available.
* @throws IllegalStateException If node has user-specified hash and is
* intermediate node of a chain
*/
private boolean generateNodeHash(StreamNode node, HashFunction hashFunction, Map<Integer, byte[]> hashes, boolean isChainingEnabled) {
// Check for user-specified ID
String userSpecifiedHash = node.getTransformationUID();
if (userSpecifiedHash == null) {
// Check that all input nodes have their hashes computed
for (StreamEdge inEdge : node.getInEdges()) {
// nodes have been visited and their hashes set.
if (!hashes.containsKey(inEdge.getSourceId())) {
return false;
}
}
Hasher hasher = hashFunction.newHasher();
byte[] hash = generateDeterministicHash(node, hasher, hashes, isChainingEnabled);
if (hashes.put(node.getId(), hash) != null) {
// Sanity check
throw new IllegalStateException("Unexpected state. Tried to add node hash " + "twice. This is probably a bug in the JobGraph generator.");
}
return true;
} else {
// has chained outputs.
for (StreamEdge inEdge : node.getInEdges()) {
if (isChainable(inEdge, isChainingEnabled)) {
throw new UnsupportedOperationException("Cannot assign user-specified hash " + "to intermediate node in chain. This will be supported in future " + "versions of Flink. As a work around start new chain at task " + node.getOperatorName() + ".");
}
}
Hasher hasher = hashFunction.newHasher();
byte[] hash = generateUserSpecifiedHash(node, hasher);
for (byte[] previousHash : hashes.values()) {
if (Arrays.equals(previousHash, hash)) {
throw new IllegalArgumentException("Hash collision on user-specified ID. " + "Most likely cause is a non-unique ID. Please check that all IDs " + "specified via `uid(String)` are unique.");
}
}
if (hashes.put(node.getId(), hash) != null) {
// Sanity check
throw new IllegalStateException("Unexpected state. Tried to add node hash " + "twice. This is probably a bug in the JobGraph generator.");
}
return true;
}
}
use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class DataStreamTest method testUnion.
/**
* Tests union functionality. This ensures that self-unions and unions of streams
* with differing parallelism work.
*
* @throws Exception
*/
@Test
public void testUnion() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
DataStream<Long> input1 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
});
DataStream<Long> selfUnion = input1.union(input1).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
});
DataStream<Long> input6 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
});
DataStream<Long> selfUnionDifferentPartition = input6.broadcast().union(input6).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
});
DataStream<Long> input2 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
}).setParallelism(4);
DataStream<Long> input3 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
}).setParallelism(2);
DataStream<Long> unionDifferingParallelism = input2.union(input3).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
}).setParallelism(4);
DataStream<Long> input4 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
}).setParallelism(2);
DataStream<Long> input5 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
}).setParallelism(4);
DataStream<Long> unionDifferingPartitioning = input4.broadcast().union(input5).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
return null;
}
}).setParallelism(4);
StreamGraph streamGraph = env.getStreamGraph();
// verify self union
assertTrue(streamGraph.getStreamNode(selfUnion.getId()).getInEdges().size() == 2);
for (StreamEdge edge : streamGraph.getStreamNode(selfUnion.getId()).getInEdges()) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
}
// verify self union with differnt partitioners
assertTrue(streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges().size() == 2);
boolean hasForward = false;
boolean hasBroadcast = false;
for (StreamEdge edge : streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges()) {
if (edge.getPartitioner() instanceof ForwardPartitioner) {
hasForward = true;
}
if (edge.getPartitioner() instanceof BroadcastPartitioner) {
hasBroadcast = true;
}
}
assertTrue(hasForward && hasBroadcast);
// verify union of streams with differing parallelism
assertTrue(streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges().size() == 2);
for (StreamEdge edge : streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges()) {
if (edge.getSourceId() == input2.getId()) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
} else if (edge.getSourceId() == input3.getId()) {
assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
} else {
fail("Wrong input edge.");
}
}
// verify union of streams with differing partitionings
assertTrue(streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges().size() == 2);
for (StreamEdge edge : streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges()) {
if (edge.getSourceId() == input4.getId()) {
assertTrue(edge.getPartitioner() instanceof BroadcastPartitioner);
} else if (edge.getSourceId() == input5.getId()) {
assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
} else {
fail("Wrong input edge.");
}
}
}
use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class OperatorChain method createOutputCollector.
// ------------------------------------------------------------------------
// initialization utilities
// ------------------------------------------------------------------------
private <T> Output<StreamRecord<T>> createOutputCollector(StreamTask<?, ?> containingTask, StreamConfig operatorConfig, Map<Integer, StreamConfig> chainedConfigs, ClassLoader userCodeClassloader, Map<StreamEdge, RecordWriterOutput<?>> streamOutputs, List<StreamOperator<?>> allOperators) {
List<Tuple2<Output<StreamRecord<T>>, StreamEdge>> allOutputs = new ArrayList<>(4);
// create collectors for the network outputs
for (StreamEdge outputEdge : operatorConfig.getNonChainedOutputs(userCodeClassloader)) {
@SuppressWarnings("unchecked") RecordWriterOutput<T> output = (RecordWriterOutput<T>) streamOutputs.get(outputEdge);
allOutputs.add(new Tuple2<Output<StreamRecord<T>>, StreamEdge>(output, outputEdge));
}
// Create collectors for the chained outputs
for (StreamEdge outputEdge : operatorConfig.getChainedOutputs(userCodeClassloader)) {
int outputId = outputEdge.getTargetId();
StreamConfig chainedOpConfig = chainedConfigs.get(outputId);
Output<StreamRecord<T>> output = createChainedOperator(containingTask, chainedOpConfig, chainedConfigs, userCodeClassloader, streamOutputs, allOperators, outputEdge.getOutputTag());
allOutputs.add(new Tuple2<>(output, outputEdge));
}
// if there are multiple outputs, or the outputs are directed, we need to
// wrap them as one output
List<OutputSelector<T>> selectors = operatorConfig.getOutputSelectors(userCodeClassloader);
if (selectors == null || selectors.isEmpty()) {
// simple path, no selector necessary
if (allOutputs.size() == 1) {
return allOutputs.get(0).f0;
} else {
// send to N outputs. Note that this includes teh special case
// of sending to zero outputs
@SuppressWarnings({ "unchecked", "rawtypes" }) Output<StreamRecord<T>>[] asArray = new Output[allOutputs.size()];
for (int i = 0; i < allOutputs.size(); i++) {
asArray[i] = allOutputs.get(i).f0;
}
// otherwise multi-chaining would not work correctly.
if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
return new CopyingBroadcastingOutputCollector<>(asArray, this);
} else {
return new BroadcastingOutputCollector<>(asArray, this);
}
}
} else {
// otherwise multi-chaining would not work correctly.
if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
return new CopyingDirectedOutput<>(selectors, allOutputs);
} else {
return new DirectedOutput<>(selectors, allOutputs);
}
}
}
use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.
the class StreamTaskTestHarness method setupOutputForSingletonOperatorChain.
/**
* Users of the test harness can call this utility method to setup the stream config
* if there will only be a single operator to be tested. The method will setup the
* outgoing network connection for the operator.
*
* For more advanced test cases such as testing chains of multiple operators with the harness,
* please manually configure the stream config.
*/
public void setupOutputForSingletonOperatorChain() {
streamConfig.setChainStart();
streamConfig.setBufferTimeout(0);
streamConfig.setTimeCharacteristic(TimeCharacteristic.EventTime);
streamConfig.setOutputSelectors(Collections.<OutputSelector<?>>emptyList());
streamConfig.setNumberOfOutputs(1);
streamConfig.setTypeSerializerOut(outputSerializer);
streamConfig.setVertexID(0);
StreamOperator<OUT> dummyOperator = new AbstractStreamOperator<OUT>() {
private static final long serialVersionUID = 1L;
};
List<StreamEdge> outEdgesInOrder = new LinkedList<StreamEdge>();
StreamNode sourceVertexDummy = new StreamNode(null, 0, "group", dummyOperator, "source dummy", new LinkedList<OutputSelector<?>>(), SourceStreamTask.class);
StreamNode targetVertexDummy = new StreamNode(null, 1, "group", dummyOperator, "target dummy", new LinkedList<OutputSelector<?>>(), SourceStreamTask.class);
outEdgesInOrder.add(new StreamEdge(sourceVertexDummy, targetVertexDummy, 0, new LinkedList<String>(), new BroadcastPartitioner<Object>(), null));
streamConfig.setOutEdgesInOrder(outEdgesInOrder);
streamConfig.setNonChainedOutputs(outEdgesInOrder);
}
Aggregations