Search in sources :

Example 81 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class DataExchangeModeClosedBranchingTest method verifyBranchingJoiningPlan.

private void verifyBranchingJoiningPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toReduceCombiner, DataExchangeMode toReduce, DataExchangeMode toFilter, DataExchangeMode toReduceSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toOtherReduceCombiner, DataExchangeMode toOtherReduce, DataExchangeMode toFlatMap, DataExchangeMode toFlatMapSink, DataExchangeMode toCoGroup1, DataExchangeMode toCoGroup2, DataExchangeMode toCoGroupSink) {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setExecutionMode(execMode);
        DataSet<Tuple2<Long, Long>> data = env.fromElements(33L, 44L).map(new MapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Long value) {
                return new Tuple2<Long, Long>(value, value);
            }
        });
        DataSet<Tuple2<Long, Long>> reduced = data.groupBy(0).reduce(new SelectOneReducer<Tuple2<Long, Long>>());
        reduced.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("reduceSink");
        DataSet<Tuple2<Long, Long>> filtered = data.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) throws Exception {
                return false;
            }
        });
        DataSet<Tuple2<Long, Long>> joined = reduced.join(filtered).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
        joined.flatMap(new IdentityFlatMapper<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("flatMapSink");
        joined.coGroup(filtered.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>())).where(0).equalTo(0).with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("cgSink");
        OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
        SinkPlanNode reduceSink = findSink(optPlan.getDataSinks(), "reduceSink");
        SinkPlanNode flatMapSink = findSink(optPlan.getDataSinks(), "flatMapSink");
        SinkPlanNode cgSink = findSink(optPlan.getDataSinks(), "cgSink");
        DualInputPlanNode coGroupNode = (DualInputPlanNode) cgSink.getPredecessor();
        DualInputPlanNode joinNode = (DualInputPlanNode) coGroupNode.getInput1().getSource();
        SingleInputPlanNode otherReduceNode = (SingleInputPlanNode) coGroupNode.getInput2().getSource();
        SingleInputPlanNode otherReduceCombinerNode = (SingleInputPlanNode) otherReduceNode.getPredecessor();
        SingleInputPlanNode reduceNode = (SingleInputPlanNode) joinNode.getInput1().getSource();
        SingleInputPlanNode reduceCombinerNode = (SingleInputPlanNode) reduceNode.getPredecessor();
        assertEquals(reduceNode, reduceSink.getPredecessor());
        SingleInputPlanNode filterNode = (SingleInputPlanNode) joinNode.getInput2().getSource();
        assertEquals(filterNode, otherReduceCombinerNode.getPredecessor());
        SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
        assertEquals(mapNode, reduceCombinerNode.getPredecessor());
        SingleInputPlanNode flatMapNode = (SingleInputPlanNode) flatMapSink.getPredecessor();
        assertEquals(joinNode, flatMapNode.getPredecessor());
        // verify the data exchange modes
        assertEquals(toReduceSink, reduceSink.getInput().getDataExchangeMode());
        assertEquals(toFlatMapSink, flatMapSink.getInput().getDataExchangeMode());
        assertEquals(toCoGroupSink, cgSink.getInput().getDataExchangeMode());
        assertEquals(toCoGroup1, coGroupNode.getInput1().getDataExchangeMode());
        assertEquals(toCoGroup2, coGroupNode.getInput2().getDataExchangeMode());
        assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
        assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
        assertEquals(toOtherReduce, otherReduceNode.getInput().getDataExchangeMode());
        assertEquals(toOtherReduceCombiner, otherReduceCombinerNode.getInput().getDataExchangeMode());
        assertEquals(toFlatMap, flatMapNode.getInput().getDataExchangeMode());
        assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
        assertEquals(toReduce, reduceNode.getInput().getDataExchangeMode());
        assertEquals(toReduceCombiner, reduceCombinerNode.getInput().getDataExchangeMode());
        assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Example 82 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class DataExchangeModeForwardTest method verifySimpleForwardPlan.

private void verifySimpleForwardPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toKeyExtractor, DataExchangeMode toCombiner, DataExchangeMode toReduce, DataExchangeMode toSink) {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setExecutionMode(execMode);
        DataSet<String> dataSet = env.readTextFile("/never/accessed");
        dataSet.map(new MapFunction<String, Integer>() {

            @Override
            public Integer map(String value) {
                return 0;
            }
        }).filter(new FilterFunction<Integer>() {

            @Override
            public boolean filter(Integer value) {
                return false;
            }
        }).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
        OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
        SinkPlanNode sinkNode = optPlan.getDataSinks().iterator().next();
        SingleInputPlanNode reduceNode = (SingleInputPlanNode) sinkNode.getPredecessor();
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getPredecessor();
        SingleInputPlanNode keyExtractorNode = (SingleInputPlanNode) combineNode.getPredecessor();
        SingleInputPlanNode filterNode = (SingleInputPlanNode) keyExtractorNode.getPredecessor();
        SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
        assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
        assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
        assertEquals(toKeyExtractor, keyExtractorNode.getInput().getDataExchangeMode());
        assertEquals(toCombiner, combineNode.getInput().getDataExchangeMode());
        assertEquals(toReduce, reduceNode.getInput().getDataExchangeMode());
        assertEquals(toSink, sinkNode.getInput().getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Top1GroupReducer(org.apache.flink.optimizer.testfunctions.Top1GroupReducer) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Example 83 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class DistinctAndGroupingOptimizerTest method testDistinctDestroysPartitioningOfNonDistinctFields.

@Test
public void testDistinctDestroysPartitioningOfNonDistinctFields() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(4);
        @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)).map(new IdentityMapper<Tuple2<Long, Long>>()).setParallelism(4);
        data.distinct(1).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
        SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        // reducer must repartition, because it works on a different field
        assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
        // distinct reducer is partitioned
        assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 84 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class GroupReduceCompilationTest method testGroupedReduceWithFieldPositionKeyNonCombinable.

@Test
public void testGroupedReduceWithFieldPositionKeyNonCombinable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(1).reduceGroup(new RichGroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {

            public void reduce(Iterable<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(1), reduceNode.getKeys(0));
        assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 85 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class GroupReduceCompilationTest method testAllReduceWithCombiner.

@Test
public void testAllReduceWithCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
        GroupReduceOperator<Long, Long> reduced = data.reduceGroup(new CombineReducer2()).name("reducer");
        reduced.setCombinable(true);
        reduced.output(new DiscardingOutputFormat<Long>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE_COMBINE, combineNode.getDriverStrategy());
        // check parallelism
        assertEquals(8, sourceNode.getParallelism());
        assertEquals(8, combineNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)104 Test (org.junit.Test)83 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)81 Plan (org.apache.flink.api.common.Plan)73 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)72 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)71 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)38 Channel (org.apache.flink.optimizer.plan.Channel)32 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)32 FieldList (org.apache.flink.api.common.operators.util.FieldList)31 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)28 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)26 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)18 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)16 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)14 PlanNode (org.apache.flink.optimizer.plan.PlanNode)14 IdentityGroupReducerCombinable (org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable)14 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)13 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)13