Search in sources :

Example 1 with BroadcastPartitioner

use of org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner in project flink by apache.

the class StreamGraphGeneratorTest method testVirtualTransformations2.

/**
	 * This tests whether virtual Transformations behave correctly.
	 *
	 * Checks whether output selector, partitioning works correctly when applied on a union.
	 */
@Test
public void testVirtualTransformations2() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source = env.fromElements(1, 10);
    DataStream<Integer> rebalanceMap = source.rebalance().map(new NoOpIntMap());
    DataStream<Integer> map1 = rebalanceMap.map(new NoOpIntMap());
    DataStream<Integer> map2 = rebalanceMap.map(new NoOpIntMap());
    DataStream<Integer> map3 = rebalanceMap.map(new NoOpIntMap());
    EvenOddOutputSelector selector = new EvenOddOutputSelector();
    SingleOutputStreamOperator<Integer> unionedMap = map1.union(map2).union(map3).broadcast().split(selector).select("foo").map(new NoOpIntMap());
    unionedMap.addSink(new DiscardingSink<Integer>());
    StreamGraph graph = env.getStreamGraph();
    // verify that the properties are correctly set on all input operators
    assertTrue(graph.getStreamNode(map1.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
    assertTrue(graph.getStreamNode(map1.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
    assertTrue(graph.getStreamNode(map1.getId()).getOutputSelectors().contains(selector));
    assertTrue(graph.getStreamNode(map2.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
    assertTrue(graph.getStreamNode(map2.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
    assertTrue(graph.getStreamNode(map2.getId()).getOutputSelectors().contains(selector));
    assertTrue(graph.getStreamNode(map3.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
    assertTrue(graph.getStreamNode(map3.getId()).getOutEdges().get(0).getSelectedNames().get(0).equals("foo"));
    assertTrue(graph.getStreamNode(map3.getId()).getOutputSelectors().contains(selector));
}
Also used : NoOpIntMap(org.apache.flink.streaming.util.NoOpIntMap) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) EvenOddOutputSelector(org.apache.flink.streaming.util.EvenOddOutputSelector) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) Test(org.junit.Test)

Example 2 with BroadcastPartitioner

use of org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner in project flink by apache.

the class StreamGraphGeneratorTest method testVirtualTransformations.

/**
 * This tests whether virtual Transformations behave correctly.
 *
 * <p>Verifies that partitioning, output selector, selected names are correctly set in the
 * StreamGraph when they are intermixed.
 */
@Test
public void testVirtualTransformations() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source = env.fromElements(1, 10);
    DataStream<Integer> rebalanceMap = source.rebalance().map(new NoOpIntMap());
    // verify that only the partitioning that was set last is used
    DataStream<Integer> broadcastMap = rebalanceMap.forward().global().broadcast().map(new NoOpIntMap());
    broadcastMap.addSink(new DiscardingSink<>());
    DataStream<Integer> broadcastOperator = rebalanceMap.map(new NoOpIntMap()).name("broadcast");
    DataStream<Integer> map1 = broadcastOperator.broadcast();
    DataStream<Integer> globalOperator = rebalanceMap.map(new NoOpIntMap()).name("global");
    DataStream<Integer> map2 = globalOperator.global();
    DataStream<Integer> shuffleOperator = rebalanceMap.map(new NoOpIntMap()).name("shuffle");
    DataStream<Integer> map3 = shuffleOperator.shuffle();
    SingleOutputStreamOperator<Integer> unionedMap = map1.union(map2).union(map3).map(new NoOpIntMap()).name("union");
    unionedMap.addSink(new DiscardingSink<>());
    StreamGraph graph = env.getStreamGraph();
    // rebalanceMap
    assertTrue(graph.getStreamNode(rebalanceMap.getId()).getInEdges().get(0).getPartitioner() instanceof RebalancePartitioner);
    // verify that only last partitioning takes precedence
    assertTrue(graph.getStreamNode(broadcastMap.getId()).getInEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
    assertEquals(rebalanceMap.getId(), graph.getSourceVertex(graph.getStreamNode(broadcastMap.getId()).getInEdges().get(0)).getId());
    // verify that partitioning in unions is preserved
    assertTrue(graph.getStreamNode(broadcastOperator.getId()).getOutEdges().get(0).getPartitioner() instanceof BroadcastPartitioner);
    assertTrue(graph.getStreamNode(globalOperator.getId()).getOutEdges().get(0).getPartitioner() instanceof GlobalPartitioner);
    assertTrue(graph.getStreamNode(shuffleOperator.getId()).getOutEdges().get(0).getPartitioner() instanceof ShufflePartitioner);
}
Also used : ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) GlobalPartitioner(org.apache.flink.streaming.runtime.partitioner.GlobalPartitioner) NoOpIntMap(org.apache.flink.streaming.util.NoOpIntMap) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) Test(org.junit.Test)

Example 3 with BroadcastPartitioner

use of org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner in project flink by apache.

the class IterateITCase method testmultipleHeadsTailsWithTailPartitioning.

@Test
public void testmultipleHeadsTailsWithTailPartitioning() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source1 = env.fromElements(1, 2, 3, 4, 5).shuffle().map(noOpIntMap);
    DataStream<Integer> source2 = env.fromElements(1, 2, 3, 4, 5).map(noOpIntMap);
    IterativeStream<Integer> iter1 = source1.union(source2).iterate();
    DataStream<Integer> head1 = iter1.map(noOpIntMap).name("map1");
    DataStream<Integer> head2 = iter1.map(noOpIntMap).setParallelism(parallelism / 2).name("shuffle").rebalance();
    DataStreamSink<Integer> head3 = iter1.map(noOpIntMap).setParallelism(parallelism / 2).addSink(new ReceiveCheckNoOpSink<Integer>());
    DataStreamSink<Integer> head4 = iter1.map(noOpIntMap).addSink(new ReceiveCheckNoOpSink<Integer>());
    OutputTag<Integer> even = new OutputTag<Integer>("even") {
    };
    OutputTag<Integer> odd = new OutputTag<Integer>("odd") {
    };
    SingleOutputStreamOperator<Object> source3 = env.fromElements(1, 2, 3, 4, 5).map(noOpIntMap).name("split").process(new ProcessFunction<Integer, Object>() {

        @Override
        public void processElement(Integer value, Context ctx, Collector<Object> out) throws Exception {
            if (value % 2 == 0) {
                ctx.output(even, value);
            } else {
                ctx.output(odd, value);
            }
        }
    });
    iter1.closeWith(source3.getSideOutput(even).union(head1.map(noOpIntMap).name("bc").broadcast(), head2.map(noOpIntMap).shuffle()));
    StreamGraph graph = env.getStreamGraph();
    JobGraph jg = graph.getJobGraph();
    assertEquals(1, graph.getIterationSourceSinkPairs().size());
    Tuple2<StreamNode, StreamNode> sourceSinkPair = graph.getIterationSourceSinkPairs().iterator().next();
    StreamNode itSource = sourceSinkPair.f0;
    StreamNode itSink = sourceSinkPair.f1;
    assertEquals(4, itSource.getOutEdges().size());
    assertEquals(3, itSink.getInEdges().size());
    assertEquals(itSource.getParallelism(), itSink.getParallelism());
    for (StreamEdge edge : itSource.getOutEdges()) {
        if (graph.getTargetVertex(edge).getOperatorName().equals("map1")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
            assertEquals(4, graph.getTargetVertex(edge).getParallelism());
        } else if (graph.getTargetVertex(edge).getOperatorName().equals("shuffle")) {
            assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
            assertEquals(2, graph.getTargetVertex(edge).getParallelism());
        }
    }
    for (StreamEdge edge : itSink.getInEdges()) {
        String tailName = graph.getSourceVertex(edge).getOperatorName();
        if (tailName.equals("split")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        } else if (tailName.equals("bc")) {
            assertTrue(edge.getPartitioner() instanceof BroadcastPartitioner);
        } else if (tailName.equals("shuffle")) {
            assertTrue(edge.getPartitioner() instanceof ShufflePartitioner);
        }
    }
    // Test co-location
    JobVertex itSource1 = null;
    JobVertex itSink1 = null;
    for (JobVertex vertex : jg.getVertices()) {
        if (vertex.getName().contains("IterationSource")) {
            itSource1 = vertex;
        } else if (vertex.getName().contains("IterationSink")) {
            itSink1 = vertex;
        }
    }
    assertTrue(itSource1.getCoLocationGroup() != null);
    assertTrue(itSink1.getCoLocationGroup() != null);
    assertEquals(itSource1.getCoLocationGroup(), itSink1.getCoLocationGroup());
}
Also used : RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) OutputTag(org.apache.flink.util.OutputTag) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 4 with BroadcastPartitioner

use of org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner in project flink by apache.

the class StreamConfigChainer method finishForSingletonOperatorChain.

public <OUT> OWNER finishForSingletonOperatorChain(TypeSerializer<OUT> outputSerializer) {
    checkState(chainIndex == 0, "Use finishForSingletonOperatorChain");
    checkState(headConfig == tailConfig);
    StreamOperator<OUT> dummyOperator = new AbstractStreamOperator<OUT>() {

        private static final long serialVersionUID = 1L;
    };
    List<StreamEdge> outEdgesInOrder = new LinkedList<>();
    StreamNode sourceVertexDummy = new StreamNode(MAIN_NODE_ID, "group", null, dummyOperator, "source dummy", SourceStreamTask.class);
    for (int i = 0; i < numberOfNonChainedOutputs; ++i) {
        StreamNode targetVertexDummy = new StreamNode(MAIN_NODE_ID + 1 + i, "group", null, dummyOperator, "target dummy", SourceStreamTask.class);
        outEdgesInOrder.add(new StreamEdge(sourceVertexDummy, targetVertexDummy, 0, new BroadcastPartitioner<>(), null));
    }
    headConfig.setVertexID(0);
    headConfig.setNumberOfOutputs(1);
    headConfig.setOutEdgesInOrder(outEdgesInOrder);
    headConfig.setNonChainedOutputs(outEdgesInOrder);
    headConfig.setTransitiveChainedTaskConfigs(chainedConfigs);
    headConfig.setOutEdgesInOrder(outEdgesInOrder);
    headConfig.setTypeSerializerOut(outputSerializer);
    return owner;
}
Also used : StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) LinkedList(java.util.LinkedList) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)

Example 5 with BroadcastPartitioner

use of org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner in project flink by apache.

the class TwoInputStreamTaskTestHarness method initializeInputs.

@Override
protected void initializeInputs() {
    inputGates = new StreamTestSingleInputGate[numInputGates];
    List<StreamEdge> inPhysicalEdges = new LinkedList<>();
    StreamOperator<IN1> dummyOperator = new AbstractStreamOperator<IN1>() {

        private static final long serialVersionUID = 1L;
    };
    StreamNode sourceVertexDummy = new StreamNode(0, "default group", null, dummyOperator, "source dummy", SourceStreamTask.class);
    StreamNode targetVertexDummy = new StreamNode(1, "default group", null, dummyOperator, "target dummy", SourceStreamTask.class);
    for (int i = 0; i < numInputGates; i++) {
        switch(inputGateAssignment[i]) {
            case 1:
                {
                    inputGates[i] = new StreamTestSingleInputGate<>(numInputChannelsPerGate, i, inputSerializer1, bufferSize);
                    StreamEdge streamEdge = new StreamEdge(sourceVertexDummy, targetVertexDummy, 1, new BroadcastPartitioner<>(), null);
                    inPhysicalEdges.add(streamEdge);
                    break;
                }
            case 2:
                {
                    inputGates[i] = new StreamTestSingleInputGate<>(numInputChannelsPerGate, i, inputSerializer2, bufferSize);
                    StreamEdge streamEdge = new StreamEdge(sourceVertexDummy, targetVertexDummy, 2, new BroadcastPartitioner<>(), null);
                    inPhysicalEdges.add(streamEdge);
                    break;
                }
            default:
                throw new IllegalStateException("Wrong input gate assignment.");
        }
        this.mockEnv.addInputGate(inputGates[i].getInputGate());
    }
    streamConfig.setInPhysicalEdges(inPhysicalEdges);
    streamConfig.setNumberOfNetworkInputs(numInputGates);
    streamConfig.setupNetworkInputs(inputSerializer1, inputSerializer2);
}
Also used : StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamTestSingleInputGate(org.apache.flink.runtime.io.network.partition.consumer.StreamTestSingleInputGate) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) LinkedList(java.util.LinkedList) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)

Aggregations

BroadcastPartitioner (org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)8 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)5 StreamEdge (org.apache.flink.streaming.api.graph.StreamEdge)5 Test (org.junit.Test)5 StreamNode (org.apache.flink.streaming.api.graph.StreamNode)4 RebalancePartitioner (org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner)4 LinkedList (java.util.LinkedList)3 AbstractStreamOperator (org.apache.flink.streaming.api.operators.AbstractStreamOperator)3 ForwardPartitioner (org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner)3 ShufflePartitioner (org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner)3 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)2 StreamGraph (org.apache.flink.streaming.api.graph.StreamGraph)2 GlobalPartitioner (org.apache.flink.streaming.runtime.partitioner.GlobalPartitioner)2 NoOpIntMap (org.apache.flink.streaming.util.NoOpIntMap)2 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 StreamTestSingleInputGate (org.apache.flink.runtime.io.network.partition.consumer.StreamTestSingleInputGate)1 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)1 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)1 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)1