Search in sources :

Example 41 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class ReduceCompilationTest method testGroupedReduceWithHint.

@Test
public void testGroupedReduceWithHint() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {

            public String getKey(Tuple2<String, Double> value) {
                return value.f0;
            }
        }).reduce(new RichReduceFunction<Tuple2<String, Double>>() {

            @Override
            public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
                return null;
            }
        }).setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // get the key extractors and projectors
        SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
        SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, keyExtractor.getInput().getSource());
        assertEquals(keyProjector, sinkNode.getInput().getSource());
        // check the strategies
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0), reduceNode.getKeys(0));
        assertEquals(new FieldList(0), combineNode.getKeys(0));
        assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, keyExtractor.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, keyProjector.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 42 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class ReduceCompilationTest method testAllReduceNoCombiner.

@Test
public void testAllReduceNoCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
        data.reduce(new RichReduceFunction<Double>() {

            @Override
            public Double reduce(Double value1, Double value2) {
                return value1 + value2;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Double>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // the all-reduce has no combiner, when the parallelism of the input is one
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check parallelism
        assertEquals(1, sourceNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 43 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class ReduceCompilationTest method testGroupedReduceWithoutCombiner.

/**
 * Test program compilation when the Reduce's combiner has been excluded by setting {@code
 * CombineHint.NONE}.
 */
@Test
public void testGroupedReduceWithoutCombiner() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(8);
    DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
    data.groupBy(0).reduce(new RichReduceFunction<Tuple2<String, Double>>() {

        @Override
        public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
            return null;
        }
    }).setCombineHint(CombineHint.NONE).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
    Plan p = env.createProgramPlan();
    OptimizedPlan op = compileNoStats(p);
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
    // get the original nodes
    SourcePlanNode sourceNode = resolver.getNode("source");
    SingleInputPlanNode reduceNode = resolver.getNode("reducer");
    SinkPlanNode sinkNode = resolver.getNode("sink");
    // check wiring
    assertEquals(sourceNode, reduceNode.getInput().getSource());
    // check the strategies
    assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
    // check the keys
    assertEquals(new FieldList(0), reduceNode.getKeys(0));
    assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
    // check parallelism
    assertEquals(6, sourceNode.getParallelism());
    assertEquals(8, reduceNode.getParallelism());
    assertEquals(8, sinkNode.getParallelism());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 44 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithHashPartitioning.

/**
 * Checks that a plan with consecutive UNIONs followed by PartitionByHash is correctly
 * translated.
 *
 * <p>The program can be illustrated as follows:
 *
 * <p>Src1 -\ >-> Union12--< Src2 -/ \ >-> Union123 -> PartitionByHash -> Output Src3
 * ----------------/
 *
 * <p>In the resulting plan, the hash partitioning (ShippingStrategy.PARTITION_HASH) must be
 * pushed to the inputs of the unions (Src1, Src2, Src3).
 */
@Test
public void testConsecutiveUnionsWithHashPartitioning() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.partitionByHash(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Sink input should be hash partitioned on 1.", new FieldList(1), sink.getInput().getGlobalProperties().getPartitioningFields());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input should be hash partitioned on 1.", new FieldList(1), partitioner.getInput().getGlobalProperties().getPartitioningFields());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be hash partitioned
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be hash partitioned", PartitioningProperty.HASH_PARTITIONED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be hash partitioning", ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 45 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class DistinctCompilationTest method testDistinctPlain.

@Test
public void testDistinctPlain() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.distinct().name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0, 1), reduceNode.getKeys(0));
        assertEquals(new FieldList(0, 1), combineNode.getKeys(0));
        assertEquals(new FieldList(0, 1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)61 Test (org.junit.Test)55 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)51 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)48 Plan (org.apache.flink.api.common.Plan)45 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)45 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)33 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)31 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)30 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)27 FieldList (org.apache.flink.api.common.operators.util.FieldList)18 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)18 Channel (org.apache.flink.optimizer.plan.Channel)17 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)11 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)11 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)8 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)7 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)7 FeedbackPropertiesMeetRequirementsReport (org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport)7