Search in sources :

Example 31 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class DistinctCompilationTest method testDistinctWithCombineHint.

@Test
public void testDistinctWithCombineHint() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.distinct().setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0, 1), reduceNode.getKeys(0));
        assertEquals(new FieldList(0, 1), combineNode.getKeys(0));
        assertEquals(new FieldList(0, 1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 32 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class AllReduceProperties method instantiate.

@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
    if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
        // locally connected, directly instantiate
        return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", in, DriverStrategy.ALL_REDUCE);
    } else {
        // non forward case.plug in a combiner
        Channel toCombiner = new Channel(in.getSource());
        toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
        // create an input node for combine with same parallelism as input node
        ReduceNode combinerNode = ((ReduceNode) node).getCombinerUtilityNode();
        combinerNode.setParallelism(in.getSource().getParallelism());
        SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode, "Combine (" + node.getOperator().getName() + ")", toCombiner, DriverStrategy.ALL_REDUCE);
        combiner.setCosts(new Costs(0, 0));
        combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
        Channel toReducer = new Channel(combiner);
        toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(), in.getShipStrategySortOrder(), in.getDataExchangeMode());
        toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
        return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", toReducer, DriverStrategy.ALL_REDUCE);
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ReduceNode(org.apache.flink.optimizer.dag.ReduceNode) Costs(org.apache.flink.optimizer.costs.Costs) Channel(org.apache.flink.optimizer.plan.Channel)

Example 33 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class PartialGroupProperties method instantiate.

@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
    // create in input node for combine with the same parallelism as input node
    GroupReduceNode combinerNode = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) node.getOperator());
    combinerNode.setParallelism(in.getSource().getParallelism());
    SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode, "Combine(" + node.getOperator().getName() + ")", in, DriverStrategy.SORTED_GROUP_COMBINE);
    // sorting key info
    combiner.setDriverKeyInfo(in.getLocalStrategyKeys(), in.getLocalStrategySortOrder(), 0);
    // set grouping comparator key info
    combiner.setDriverKeyInfo(this.keyList, 1);
    return combiner;
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode)

Example 34 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class GroupCombineProperties method instantiate.

@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
    node.setParallelism(in.getSource().getParallelism());
    // sorting key info
    SingleInputPlanNode singleInputPlanNode = new SingleInputPlanNode(node, "GroupCombine (" + node.getOperator().getName() + ")", // reuse the combine strategy also used in the group reduce
    in, DriverStrategy.SORTED_GROUP_COMBINE, this.keyList);
    // set sorting comparator key info
    singleInputPlanNode.setDriverKeyInfo(this.ordering.getInvolvedIndexes(), this.ordering.getFieldSortDirections(), 0);
    // set grouping comparator key info
    singleInputPlanNode.setDriverKeyInfo(this.keyList, 1);
    return singleInputPlanNode;
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode)

Example 35 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class DataExchangeModeOpenBranchingTest method verifyBranchigPlan.

private void verifyBranchigPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toFilterSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toJoinSink, DataExchangeMode toDirectSink) {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setExecutionMode(execMode);
        DataSet<Tuple2<Long, Long>> data = env.generateSequence(1, 100000).map(new MapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Long value) {
                return new Tuple2<Long, Long>(value, value);
            }
        });
        // output 1
        data.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) {
                return false;
            }
        }).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink1");
        // output 2 does a join before a join
        data.join(env.fromElements(new Tuple2<Long, Long>(1L, 2L))).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("sink2");
        // output 3 is direct
        data.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink3");
        OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
        SinkPlanNode filterSink = findSink(optPlan.getDataSinks(), "sink1");
        SinkPlanNode joinSink = findSink(optPlan.getDataSinks(), "sink2");
        SinkPlanNode directSink = findSink(optPlan.getDataSinks(), "sink3");
        SingleInputPlanNode filterNode = (SingleInputPlanNode) filterSink.getPredecessor();
        SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
        DualInputPlanNode joinNode = (DualInputPlanNode) joinSink.getPredecessor();
        assertEquals(mapNode, joinNode.getInput1().getSource());
        assertEquals(mapNode, directSink.getPredecessor());
        assertEquals(toFilterSink, filterSink.getInput().getDataExchangeMode());
        assertEquals(toJoinSink, joinSink.getInput().getDataExchangeMode());
        assertEquals(toDirectSink, directSink.getInput().getDataExchangeMode());
        assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
        assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
        assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
        assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Aggregations

SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)104 Test (org.junit.Test)83 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)81 Plan (org.apache.flink.api.common.Plan)73 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)72 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)71 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)38 Channel (org.apache.flink.optimizer.plan.Channel)32 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)32 FieldList (org.apache.flink.api.common.operators.util.FieldList)31 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)28 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)26 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)18 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)16 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)14 PlanNode (org.apache.flink.optimizer.plan.PlanNode)14 IdentityGroupReducerCombinable (org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable)14 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)13 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)13