Search in sources :

Example 6 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class TwoInputStreamTask method init.

@Override
public void init() throws Exception {
    StreamConfig configuration = getConfiguration();
    ClassLoader userClassLoader = getUserCodeClassLoader();
    TypeSerializer<IN1> inputDeserializer1 = configuration.getTypeSerializerIn1(userClassLoader);
    TypeSerializer<IN2> inputDeserializer2 = configuration.getTypeSerializerIn2(userClassLoader);
    int numberOfInputs = configuration.getNumberOfInputs();
    ArrayList<InputGate> inputList1 = new ArrayList<InputGate>();
    ArrayList<InputGate> inputList2 = new ArrayList<InputGate>();
    List<StreamEdge> inEdges = configuration.getInPhysicalEdges(userClassLoader);
    for (int i = 0; i < numberOfInputs; i++) {
        int inputType = inEdges.get(i).getTypeNumber();
        InputGate reader = getEnvironment().getInputGate(i);
        switch(inputType) {
            case 1:
                inputList1.add(reader);
                break;
            case 2:
                inputList2.add(reader);
                break;
            default:
                throw new RuntimeException("Invalid input type number: " + inputType);
        }
    }
    this.inputProcessor = new StreamTwoInputProcessor<>(inputList1, inputList2, inputDeserializer1, inputDeserializer2, this, configuration.getCheckpointMode(), getCheckpointLock(), getEnvironment().getIOManager(), getEnvironment().getTaskManagerInfo().getConfiguration(), getStreamStatusMaintainer(), this.headOperator);
    // make sure that stream tasks report their I/O statistics
    inputProcessor.setMetricGroup(getEnvironment().getMetricGroup().getIOMetricGroup());
}
Also used : ArrayList(java.util.ArrayList) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) InputGate(org.apache.flink.runtime.io.network.partition.consumer.InputGate)

Example 7 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class StreamGraphHasherV1 method generateNodeHash.

/**
	 * Generates a hash for the node and returns whether the operation was
	 * successful.
	 *
	 * @param node         The node to generate the hash for
	 * @param hashFunction The hash function to use
	 * @param hashes       The current state of generated hashes
	 * @return <code>true</code> if the node hash has been generated.
	 * <code>false</code>, otherwise. If the operation is not successful, the
	 * hash needs be generated at a later point when all input is available.
	 * @throws IllegalStateException If node has user-specified hash and is
	 *                               intermediate node of a chain
	 */
private boolean generateNodeHash(StreamNode node, HashFunction hashFunction, Map<Integer, byte[]> hashes, boolean isChainingEnabled) {
    // Check for user-specified ID
    String userSpecifiedHash = node.getTransformationUID();
    if (userSpecifiedHash == null) {
        // Check that all input nodes have their hashes computed
        for (StreamEdge inEdge : node.getInEdges()) {
            // nodes have been visited and their hashes set.
            if (!hashes.containsKey(inEdge.getSourceId())) {
                return false;
            }
        }
        Hasher hasher = hashFunction.newHasher();
        byte[] hash = generateDeterministicHash(node, hasher, hashes, isChainingEnabled);
        if (hashes.put(node.getId(), hash) != null) {
            // Sanity check
            throw new IllegalStateException("Unexpected state. Tried to add node hash " + "twice. This is probably a bug in the JobGraph generator.");
        }
        return true;
    } else {
        // has chained outputs.
        for (StreamEdge inEdge : node.getInEdges()) {
            if (isChainable(inEdge, isChainingEnabled)) {
                throw new UnsupportedOperationException("Cannot assign user-specified hash " + "to intermediate node in chain. This will be supported in future " + "versions of Flink. As a work around start new chain at task " + node.getOperatorName() + ".");
            }
        }
        Hasher hasher = hashFunction.newHasher();
        byte[] hash = generateUserSpecifiedHash(node, hasher);
        for (byte[] previousHash : hashes.values()) {
            if (Arrays.equals(previousHash, hash)) {
                throw new IllegalArgumentException("Hash collision on user-specified ID. " + "Most likely cause is a non-unique ID. Please check that all IDs " + "specified via `uid(String)` are unique.");
            }
        }
        if (hashes.put(node.getId(), hash) != null) {
            // Sanity check
            throw new IllegalStateException("Unexpected state. Tried to add node hash " + "twice. This is probably a bug in the JobGraph generator.");
        }
        return true;
    }
}
Also used : StreamGraphHasher(org.apache.flink.streaming.api.graph.StreamGraphHasher) Hasher(com.google.common.hash.Hasher) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StringUtils.byteToHexString(org.apache.flink.util.StringUtils.byteToHexString)

Example 8 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class DataStreamTest method testUnion.

/**
	 * Tests union functionality. This ensures that self-unions and unions of streams
	 * with differing parallelism work.
	 *
	 * @throws Exception
	 */
@Test
public void testUnion() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    DataStream<Long> input1 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> selfUnion = input1.union(input1).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> input6 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> selfUnionDifferentPartition = input6.broadcast().union(input6).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> input2 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> input3 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(2);
    DataStream<Long> unionDifferingParallelism = input2.union(input3).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> input4 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(2);
    DataStream<Long> input5 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> unionDifferingPartitioning = input4.broadcast().union(input5).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    StreamGraph streamGraph = env.getStreamGraph();
    // verify self union
    assertTrue(streamGraph.getStreamNode(selfUnion.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(selfUnion.getId()).getInEdges()) {
        assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
    }
    // verify self union with differnt partitioners
    assertTrue(streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges().size() == 2);
    boolean hasForward = false;
    boolean hasBroadcast = false;
    for (StreamEdge edge : streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges()) {
        if (edge.getPartitioner() instanceof ForwardPartitioner) {
            hasForward = true;
        }
        if (edge.getPartitioner() instanceof BroadcastPartitioner) {
            hasBroadcast = true;
        }
    }
    assertTrue(hasForward && hasBroadcast);
    // verify union of streams with differing parallelism
    assertTrue(streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges()) {
        if (edge.getSourceId() == input2.getId()) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        } else if (edge.getSourceId() == input3.getId()) {
            assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
        } else {
            fail("Wrong input edge.");
        }
    }
    // verify union of streams with differing partitionings
    assertTrue(streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges()) {
        if (edge.getSourceId() == input4.getId()) {
            assertTrue(edge.getPartitioner() instanceof BroadcastPartitioner);
        } else if (edge.getSourceId() == input5.getId()) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        } else {
            fail("Wrong input edge.");
        }
    }
}
Also used : RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ExpectedException(org.junit.rules.ExpectedException) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) Test(org.junit.Test)

Example 9 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class OperatorChain method createOutputCollector.

// ------------------------------------------------------------------------
//  initialization utilities
// ------------------------------------------------------------------------
private <T> Output<StreamRecord<T>> createOutputCollector(StreamTask<?, ?> containingTask, StreamConfig operatorConfig, Map<Integer, StreamConfig> chainedConfigs, ClassLoader userCodeClassloader, Map<StreamEdge, RecordWriterOutput<?>> streamOutputs, List<StreamOperator<?>> allOperators) {
    List<Tuple2<Output<StreamRecord<T>>, StreamEdge>> allOutputs = new ArrayList<>(4);
    // create collectors for the network outputs
    for (StreamEdge outputEdge : operatorConfig.getNonChainedOutputs(userCodeClassloader)) {
        @SuppressWarnings("unchecked") RecordWriterOutput<T> output = (RecordWriterOutput<T>) streamOutputs.get(outputEdge);
        allOutputs.add(new Tuple2<Output<StreamRecord<T>>, StreamEdge>(output, outputEdge));
    }
    // Create collectors for the chained outputs
    for (StreamEdge outputEdge : operatorConfig.getChainedOutputs(userCodeClassloader)) {
        int outputId = outputEdge.getTargetId();
        StreamConfig chainedOpConfig = chainedConfigs.get(outputId);
        Output<StreamRecord<T>> output = createChainedOperator(containingTask, chainedOpConfig, chainedConfigs, userCodeClassloader, streamOutputs, allOperators, outputEdge.getOutputTag());
        allOutputs.add(new Tuple2<>(output, outputEdge));
    }
    // if there are multiple outputs, or the outputs are directed, we need to
    // wrap them as one output
    List<OutputSelector<T>> selectors = operatorConfig.getOutputSelectors(userCodeClassloader);
    if (selectors == null || selectors.isEmpty()) {
        // simple path, no selector necessary
        if (allOutputs.size() == 1) {
            return allOutputs.get(0).f0;
        } else {
            // send to N outputs. Note that this includes teh special case
            // of sending to zero outputs
            @SuppressWarnings({ "unchecked", "rawtypes" }) Output<StreamRecord<T>>[] asArray = new Output[allOutputs.size()];
            for (int i = 0; i < allOutputs.size(); i++) {
                asArray[i] = allOutputs.get(i).f0;
            }
            // otherwise multi-chaining would not work correctly.
            if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
                return new CopyingBroadcastingOutputCollector<>(asArray, this);
            } else {
                return new BroadcastingOutputCollector<>(asArray, this);
            }
        }
    } else {
        // otherwise multi-chaining would not work correctly.
        if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
            return new CopyingDirectedOutput<>(selectors, allOutputs);
        } else {
            return new DirectedOutput<>(selectors, allOutputs);
        }
    }
}
Also used : CopyingDirectedOutput(org.apache.flink.streaming.api.collector.selector.CopyingDirectedOutput) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ArrayList(java.util.ArrayList) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) CopyingDirectedOutput(org.apache.flink.streaming.api.collector.selector.CopyingDirectedOutput) DirectedOutput(org.apache.flink.streaming.api.collector.selector.DirectedOutput) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput) OutputSelector(org.apache.flink.streaming.api.collector.selector.OutputSelector) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Output(org.apache.flink.streaming.api.operators.Output) CopyingDirectedOutput(org.apache.flink.streaming.api.collector.selector.CopyingDirectedOutput) DirectedOutput(org.apache.flink.streaming.api.collector.selector.DirectedOutput) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput)

Example 10 with StreamEdge

use of org.apache.flink.streaming.api.graph.StreamEdge in project flink by apache.

the class StreamTaskTestHarness method setupOutputForSingletonOperatorChain.

/**
	 * Users of the test harness can call this utility method to setup the stream config
	 * if there will only be a single operator to be tested. The method will setup the
	 * outgoing network connection for the operator.
	 *
	 * For more advanced test cases such as testing chains of multiple operators with the harness,
	 * please manually configure the stream config.
	 */
public void setupOutputForSingletonOperatorChain() {
    streamConfig.setChainStart();
    streamConfig.setBufferTimeout(0);
    streamConfig.setTimeCharacteristic(TimeCharacteristic.EventTime);
    streamConfig.setOutputSelectors(Collections.<OutputSelector<?>>emptyList());
    streamConfig.setNumberOfOutputs(1);
    streamConfig.setTypeSerializerOut(outputSerializer);
    streamConfig.setVertexID(0);
    StreamOperator<OUT> dummyOperator = new AbstractStreamOperator<OUT>() {

        private static final long serialVersionUID = 1L;
    };
    List<StreamEdge> outEdgesInOrder = new LinkedList<StreamEdge>();
    StreamNode sourceVertexDummy = new StreamNode(null, 0, "group", dummyOperator, "source dummy", new LinkedList<OutputSelector<?>>(), SourceStreamTask.class);
    StreamNode targetVertexDummy = new StreamNode(null, 1, "group", dummyOperator, "target dummy", new LinkedList<OutputSelector<?>>(), SourceStreamTask.class);
    outEdgesInOrder.add(new StreamEdge(sourceVertexDummy, targetVertexDummy, 0, new LinkedList<String>(), new BroadcastPartitioner<Object>(), null));
    streamConfig.setOutEdgesInOrder(outEdgesInOrder);
    streamConfig.setNonChainedOutputs(outEdgesInOrder);
}
Also used : OutputSelector(org.apache.flink.streaming.api.collector.selector.OutputSelector) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) LinkedList(java.util.LinkedList) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)

Aggregations

StreamEdge (org.apache.flink.streaming.api.graph.StreamEdge)13 StreamNode (org.apache.flink.streaming.api.graph.StreamNode)8 BroadcastPartitioner (org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)5 Test (org.junit.Test)5 OutputSelector (org.apache.flink.streaming.api.collector.selector.OutputSelector)4 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)4 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)4 ArrayList (java.util.ArrayList)3 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)3 ForwardPartitioner (org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner)3 RebalancePartitioner (org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner)3 LinkedList (java.util.LinkedList)2 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)2 MapFunction (org.apache.flink.api.common.functions.MapFunction)2 Configuration (org.apache.flink.configuration.Configuration)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)2 CoFlatMapFunction (org.apache.flink.streaming.api.functions.co.CoFlatMapFunction)2 CoMapFunction (org.apache.flink.streaming.api.functions.co.CoMapFunction)2 AbstractStreamOperator (org.apache.flink.streaming.api.operators.AbstractStreamOperator)2