Search in sources :

Example 86 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class ReduceCompilationTest method testAllReduceNoCombiner.

@Test
public void testAllReduceNoCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
        data.reduce(new RichReduceFunction<Double>() {

            @Override
            public Double reduce(Double value1, Double value2) {
                return value1 + value2;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Double>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // the all-reduce has no combiner, when the parallelism of the input is one
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check parallelism
        assertEquals(1, sourceNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 87 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class ReduceCompilationTest method testGroupedReduceWithoutCombiner.

/**
 * Test program compilation when the Reduce's combiner has been excluded by setting {@code
 * CombineHint.NONE}.
 */
@Test
public void testGroupedReduceWithoutCombiner() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(8);
    DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
    data.groupBy(0).reduce(new RichReduceFunction<Tuple2<String, Double>>() {

        @Override
        public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
            return null;
        }
    }).setCombineHint(CombineHint.NONE).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
    Plan p = env.createProgramPlan();
    OptimizedPlan op = compileNoStats(p);
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
    // get the original nodes
    SourcePlanNode sourceNode = resolver.getNode("source");
    SingleInputPlanNode reduceNode = resolver.getNode("reducer");
    SinkPlanNode sinkNode = resolver.getNode("sink");
    // check wiring
    assertEquals(sourceNode, reduceNode.getInput().getSource());
    // check the strategies
    assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
    // check the keys
    assertEquals(new FieldList(0), reduceNode.getKeys(0));
    assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
    // check parallelism
    assertEquals(6, sourceNode.getParallelism());
    assertEquals(8, reduceNode.getParallelism());
    assertEquals(8, sinkNode.getParallelism());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 88 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindMap.

/**
 * Tests join program with replicated data source behind map.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
    ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
    DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
    DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
    DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.map(new IdMap()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    // when join should have forward strategy on both sides
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
    ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
    ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) ReplicatingInputFormat(org.apache.flink.api.common.io.ReplicatingInputFormat) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 89 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindFilter.

/**
 * Tests join program with replicated data source behind filter.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFilter() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
    ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
    DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
    DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
    DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.filter(new NoFilter()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    // when join should have forward strategy on both sides
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
    ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
    ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) ReplicatingInputFormat(org.apache.flink.api.common.io.ReplicatingInputFormat) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 90 with SinkPlanNode

use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.

the class ReplicatingDataSourceTest method checkJoinWithReplicatedSourceInputBehindMultiMaps.

/**
 * Tests join program with replicated data source behind multiple map ops.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMultiMaps() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
    ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
    DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
    DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
    DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.filter(new NoFilter()).mapPartition(new IdPMap()).flatMap(new IdFlatMap()).map(new IdMap()).join(source2).where("*").equalTo("*").writeAsText("/some/newpath");
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    // when join should have forward strategy on both sides
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
    ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
    ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) ReplicatingInputFormat(org.apache.flink.api.common.io.ReplicatingInputFormat) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)153 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)146 Plan (org.apache.flink.api.common.Plan)139 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)139 Test (org.junit.Test)138 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)72 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)67 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)66 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)53 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)52 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)24 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)24 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)24 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)24 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)23 FieldList (org.apache.flink.api.common.operators.util.FieldList)23 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)16 IdentityGroupReducerCombinable (org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable)16 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)16 Channel (org.apache.flink.optimizer.plan.Channel)13