Search in sources :

Example 81 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class DistinctAndGroupingOptimizerTest method testDistinctDestroysPartitioningOfNonDistinctFields.

@Test
public void testDistinctDestroysPartitioningOfNonDistinctFields() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(4);
        @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)).map(new IdentityMapper<Tuple2<Long, Long>>()).setParallelism(4);
        data.distinct(1).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
        SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        // reducer must repartition, because it works on a different field
        assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
        // distinct reducer is partitioned
        assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 82 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class IterationCompilerTest method testWorksetIterationWithUnionRoot.

@Test
public void testWorksetIterationWithUnionRoot() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(43);
        DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20).map(new MapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Long value) {
                return null;
            }
        });
        DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
        iter.closeWith(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()).union(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>())), iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()).union(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()))).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource();
        // make sure that the root is part of the dynamic path
        // the "NoOp"a that come after the union.
        SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode();
        SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode();
        NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource();
        NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource();
        assertTrue(nextWorksetNoop.isOnDynamicPath());
        assertTrue(nextWorksetNoop.getCostWeight() >= 1);
        assertTrue(solutionDeltaNoop.isOnDynamicPath());
        assertTrue(solutionDeltaNoop.getCostWeight() >= 1);
        assertTrue(nextWorksetUnion.isOnDynamicPath());
        assertTrue(nextWorksetUnion.getCostWeight() >= 1);
        assertTrue(solutionDeltaUnion.isOnDynamicPath());
        assertTrue(solutionDeltaUnion.getCostWeight() >= 1);
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 83 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class IterationCompilerTest method testIterationWithUnionRoot.

@Test
public void testIterationWithUnionRoot() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(43);
        IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
        iteration.closeWith(iteration.map(new IdentityMapper<Long>()).union(iteration.map(new IdentityMapper<Long>()))).output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        BulkIterationPlanNode iterNode = (BulkIterationPlanNode) sink.getInput().getSource();
        // make sure that the root is part of the dynamic path
        // the "NoOp" that comes after the union.
        SingleInputPlanNode noop = (SingleInputPlanNode) iterNode.getRootOfStepFunction();
        NAryUnionPlanNode union = (NAryUnionPlanNode) noop.getInput().getSource();
        assertTrue(noop.isOnDynamicPath());
        assertTrue(noop.getCostWeight() >= 1);
        assertTrue(union.isOnDynamicPath());
        assertTrue(union.getCostWeight() >= 1);
        // see that the jobgraph generator can translate this
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) Test(org.junit.Test)

Example 84 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class ReduceCompilationTest method testGroupedReduceWithFieldPositionKey.

@Test
public void testGroupedReduceWithFieldPositionKey() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(1).reduce(new RichReduceFunction<Tuple2<String, Double>>() {

            @Override
            public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
                return null;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check the strategies
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(1), reduceNode.getKeys(0));
        assertEquals(new FieldList(1), combineNode.getKeys(0));
        assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 85 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class ReduceCompilationTest method testGroupedReduceWithHint.

@Test
public void testGroupedReduceWithHint() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {

            public String getKey(Tuple2<String, Double> value) {
                return value.f0;
            }
        }).reduce(new RichReduceFunction<Tuple2<String, Double>>() {

            @Override
            public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
                return null;
            }
        }).setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // get the key extractors and projectors
        SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
        SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, keyExtractor.getInput().getSource());
        assertEquals(keyProjector, sinkNode.getInput().getSource());
        // check the strategies
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0), reduceNode.getKeys(0));
        assertEquals(new FieldList(0), combineNode.getKeys(0));
        assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, keyExtractor.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, keyProjector.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)153 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)146 Plan (org.apache.flink.api.common.Plan)139 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)139 Test (org.junit.Test)138 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)72 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)67 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)66 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)53 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)52 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)24 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)24 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)24 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)24 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)23 FieldList (org.apache.flink.api.common.operators.util.FieldList)23 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)16 IdentityGroupReducerCombinable (org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable)16 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)16 Channel (org.apache.flink.optimizer.plan.Channel)13