Search in sources :

Example 6 with RebalancePartitioner

use of org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner in project flink by apache.

the class DataStreamTest method testUnion.

/**
 * Tests union functionality. This ensures that self-unions and unions of streams with differing
 * parallelism work.
 *
 * @throws Exception
 */
@Test
public void testUnion() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    DataStream<Long> input1 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> selfUnion = input1.union(input1).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> input6 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> selfUnionDifferentPartition = input6.broadcast().union(input6).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    });
    DataStream<Long> input2 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> input3 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(2);
    DataStream<Long> unionDifferingParallelism = input2.union(input3).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> input4 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(2);
    DataStream<Long> input5 = env.generateSequence(0, 0).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    DataStream<Long> unionDifferingPartitioning = input4.broadcast().union(input5).map(new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return null;
        }
    }).setParallelism(4);
    StreamGraph streamGraph = getStreamGraph(env);
    // verify self union
    assertTrue(streamGraph.getStreamNode(selfUnion.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(selfUnion.getId()).getInEdges()) {
        assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
    }
    // verify self union with different partitioners
    assertTrue(streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges().size() == 2);
    boolean hasForward = false;
    boolean hasBroadcast = false;
    for (StreamEdge edge : streamGraph.getStreamNode(selfUnionDifferentPartition.getId()).getInEdges()) {
        if (edge.getPartitioner() instanceof ForwardPartitioner) {
            hasForward = true;
        }
        if (edge.getPartitioner() instanceof BroadcastPartitioner) {
            hasBroadcast = true;
        }
    }
    assertTrue(hasForward && hasBroadcast);
    // verify union of streams with differing parallelism
    assertTrue(streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(unionDifferingParallelism.getId()).getInEdges()) {
        if (edge.getSourceId() == input2.getId()) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        } else if (edge.getSourceId() == input3.getId()) {
            assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
        } else {
            fail("Wrong input edge.");
        }
    }
    // verify union of streams with differing partitionings
    assertTrue(streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges().size() == 2);
    for (StreamEdge edge : streamGraph.getStreamNode(unionDifferingPartitioning.getId()).getInEdges()) {
        if (edge.getSourceId() == input4.getId()) {
            assertTrue(edge.getPartitioner() instanceof BroadcastPartitioner);
        } else if (edge.getSourceId() == input5.getId()) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        } else {
            fail("Wrong input edge.");
        }
    }
}
Also used : RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ExpectedException(org.junit.rules.ExpectedException) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) Test(org.junit.Test)

Example 7 with RebalancePartitioner

use of org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner in project flink by apache.

the class StreamGraphGeneratorTest method testResetBatchExchangeModeInStreamingExecution.

@Test
public void testResetBatchExchangeModeInStreamingExecution() {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);
    PartitionTransformation<Integer> transformation = new PartitionTransformation<>(sourceDataStream.getTransformation(), new RebalancePartitioner<>(), StreamExchangeMode.BATCH);
    DataStream<Integer> partitionStream = new DataStream<>(env, transformation);
    partitionStream.map(value -> value).print();
    final StreamGraph streamGraph = env.getStreamGraph();
    Assertions.assertThat(streamGraph.getStreamEdges(1, 3)).hasSize(1).satisfies(e -> Assertions.assertThat(e.get(0).getExchangeMode()).isEqualTo(StreamExchangeMode.UNDEFINED));
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) SlotSharingGroup(org.apache.flink.api.common.operators.SlotSharingGroup) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) Function(org.apache.flink.api.common.functions.Function) Assertions(org.assertj.core.api.Assertions) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) StreamTask(org.apache.flink.streaming.runtime.tasks.StreamTask) Collection(java.util.Collection) ConnectedStreams(org.apache.flink.streaming.api.datastream.ConnectedStreams) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) GlobalPartitioner(org.apache.flink.streaming.runtime.partitioner.GlobalPartitioner) List(java.util.List) NoOpIntMap(org.apache.flink.streaming.util.NoOpIntMap) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) Matchers.is(org.hamcrest.Matchers.is) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) AbstractUdfStreamOperator(org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) Watermark(org.apache.flink.streaming.api.watermark.Watermark) SavepointConfigOptions(org.apache.flink.runtime.jobgraph.SavepointConfigOptions) HashMap(java.util.HashMap) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Collector(org.apache.flink.util.Collector) Matchers.iterableWithSize(org.hamcrest.Matchers.iterableWithSize) Output(org.apache.flink.streaming.api.operators.Output) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TestExpandingSink(org.apache.flink.streaming.util.TestExpandingSink) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) Description(org.hamcrest.Description) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) FeatureMatcher(org.hamcrest.FeatureMatcher) StreamExchangeMode(org.apache.flink.streaming.api.transformations.StreamExchangeMode) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) Matcher(org.hamcrest.Matcher) Transformation(org.apache.flink.api.dag.Transformation) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) OutputTypeConfigurable(org.apache.flink.streaming.api.operators.OutputTypeConfigurable) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) Test(org.junit.Test)

Example 8 with RebalancePartitioner

use of org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner in project flink by apache.

the class IterateITCase method testmultipleHeadsTailsSimple.

@Test
public void testmultipleHeadsTailsSimple() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> source1 = env.fromElements(1, 2, 3, 4, 5).shuffle().map(noOpIntMap).name("ParallelizeMapShuffle");
    DataStream<Integer> source2 = env.fromElements(1, 2, 3, 4, 5).map(noOpIntMap).name("ParallelizeMapRebalance");
    IterativeStream<Integer> iter1 = source1.union(source2).iterate();
    DataStream<Integer> head1 = iter1.map(noOpIntMap).name("IterRebalanceMap").setParallelism(parallelism / 2);
    DataStream<Integer> head2 = iter1.map(noOpIntMap).name("IterForwardMap");
    DataStreamSink<Integer> head3 = iter1.map(noOpIntMap).setParallelism(parallelism / 2).addSink(new ReceiveCheckNoOpSink<Integer>());
    DataStreamSink<Integer> head4 = iter1.map(noOpIntMap).addSink(new ReceiveCheckNoOpSink<Integer>());
    OutputTag<Integer> even = new OutputTag<Integer>("even") {
    };
    OutputTag<Integer> odd = new OutputTag<Integer>("odd") {
    };
    SingleOutputStreamOperator<Object> source3 = env.fromElements(1, 2, 3, 4, 5).map(noOpIntMap).name("EvenOddSourceMap").process(new ProcessFunction<Integer, Object>() {

        @Override
        public void processElement(Integer value, Context ctx, Collector<Object> out) throws Exception {
            if (value % 2 == 0) {
                ctx.output(even, value);
            } else {
                ctx.output(odd, value);
            }
        }
    });
    iter1.closeWith(source3.getSideOutput(even).union(head1.rebalance().map(noOpIntMap).broadcast(), head2.shuffle()));
    StreamGraph graph = env.getStreamGraph();
    JobGraph jg = graph.getJobGraph();
    assertEquals(1, graph.getIterationSourceSinkPairs().size());
    Tuple2<StreamNode, StreamNode> sourceSinkPair = graph.getIterationSourceSinkPairs().iterator().next();
    StreamNode itSource = sourceSinkPair.f0;
    StreamNode itSink = sourceSinkPair.f1;
    assertEquals(4, itSource.getOutEdges().size());
    assertEquals(3, itSink.getInEdges().size());
    assertEquals(itSource.getParallelism(), itSink.getParallelism());
    for (StreamEdge edge : itSource.getOutEdges()) {
        if (graph.getTargetVertex(edge).getOperatorName().equals("IterRebalanceMap")) {
            assertTrue(edge.getPartitioner() instanceof RebalancePartitioner);
        } else if (graph.getTargetVertex(edge).getOperatorName().equals("IterForwardMap")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        }
    }
    for (StreamEdge edge : itSink.getInEdges()) {
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapShuffle")) {
            assertTrue(edge.getPartitioner() instanceof ShufflePartitioner);
        }
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("ParallelizeMapForward")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        }
        if (graph.getStreamNode(edge.getSourceId()).getOperatorName().equals("EvenOddSourceMap")) {
            assertTrue(edge.getPartitioner() instanceof ForwardPartitioner);
        }
    }
    // Test co-location
    JobVertex itSource1 = null;
    JobVertex itSink1 = null;
    for (JobVertex vertex : jg.getVertices()) {
        if (vertex.getName().contains("IterationSource")) {
            itSource1 = vertex;
        } else if (vertex.getName().contains("IterationSink")) {
            itSink1 = vertex;
        }
    }
    assertTrue(itSource1.getCoLocationGroup() != null);
    assertEquals(itSource1.getCoLocationGroup(), itSink1.getCoLocationGroup());
}
Also used : RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) OutputTag(org.apache.flink.util.OutputTag) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) Test(org.junit.Test)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)8 RebalancePartitioner (org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner)8 Test (org.junit.Test)8 ForwardPartitioner (org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner)6 BroadcastPartitioner (org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner)5 ShufflePartitioner (org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)4 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)4 ArrayList (java.util.ArrayList)3 Arrays (java.util.Arrays)3 Collections (java.util.Collections)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)3 MapFunction (org.apache.flink.api.common.functions.MapFunction)3 ResourceSpec (org.apache.flink.api.common.operators.ResourceSpec)3 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)3 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)3