Search in sources :

Example 76 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class GroupReduceCompilationTest method testAllReduceWithCombiner.

@Test
public void testAllReduceWithCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
        GroupReduceOperator<Long, Long> reduced = data.reduceGroup(new CombineReducer2()).name("reducer");
        reduced.setCombinable(true);
        reduced.output(new DiscardingOutputFormat<Long>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE_COMBINE, combineNode.getDriverStrategy());
        // check parallelism
        assertEquals(8, sourceNode.getParallelism());
        assertEquals(8, combineNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 77 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class GroupReduceCompilationTest method testGroupedReduceWithFieldPositionKeyNonCombinable.

@Test
public void testGroupedReduceWithFieldPositionKeyNonCombinable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(1).reduceGroup(new RichGroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {

            public void reduce(Iterable<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(1), reduceNode.getKeys(0));
        assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 78 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class GroupReduceCompilationTest method testAllGroupReduceNoCombiner.

@Test
public void testAllGroupReduceNoCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
        data.reduceGroup(new RichGroupReduceFunction<Double, Double>() {

            public void reduce(Iterable<Double> values, Collector<Double> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Double>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // the all-reduce has no combiner, when the parallelism of the input is one
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that reduce has the right strategy
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check parallelism
        assertEquals(1, sourceNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 79 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class JoinTranslationTest method createPlanAndGetJoinNode.

private DualInputPlanNode createPlanAndGetJoinNode(JoinHint hint) {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Long> i1 = env.generateSequence(1, 1000);
    DataSet<Long> i2 = env.generateSequence(1, 1000);
    i1.join(i2, hint).where(new IdentityKeySelector<Long>()).equalTo(new IdentityKeySelector<Long>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    Plan plan = env.createProgramPlan();
    // set statistics to the sources
    plan.accept(new Visitor<Operator<?>>() {

        @Override
        public boolean preVisit(Operator<?> visitable) {
            if (visitable instanceof GenericDataSourceBase) {
                GenericDataSourceBase<?, ?> source = (GenericDataSourceBase<?, ?>) visitable;
                setSourceStatistics(source, 10000000, 1000);
            }
            return true;
        }

        @Override
        public void postVisit(Operator<?> visitable) {
        }
    });
    OptimizedPlan op = compileWithStats(plan);
    return (DualInputPlanNode) ((SinkPlanNode) op.getDataSinks().iterator().next()).getInput().getSource();
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) GenericDataSourceBase(org.apache.flink.api.common.operators.GenericDataSourceBase) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Example 80 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class PartitionOperatorTest method testPartitionCustomOperatorPreservesFields.

@Test
public void testPartitionCustomOperatorPreservesFields() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
        data.partitionCustom(new Partitioner<Long>() {

            public int partition(Long key, int numPartitions) {
                return key.intValue();
            }
        }, 1).groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode partitioner = (SingleInputPlanNode) reducer.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner.getInput().getShipStrategy());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Partitioner(org.apache.flink.api.common.functions.Partitioner) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)153 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)146 Plan (org.apache.flink.api.common.Plan)139 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)139 Test (org.junit.Test)138 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)72 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)67 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)66 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)53 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)52 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)24 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)24 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)24 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)24 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)23 FieldList (org.apache.flink.api.common.operators.util.FieldList)23 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)16 IdentityGroupReducerCombinable (org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable)16 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)16 Channel (org.apache.flink.optimizer.plan.Channel)13