Search in sources :

Example 41 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class UnionPropertyPropagationTest method testUnion2.

@Test
public void testUnion2() {
    final int NUM_INPUTS = 4;
    // construct the plan it will be multiple flat maps, all unioned
    // and the "unioned" inputDataSet will be grouped
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> source = env.readTextFile(IN_FILE);
    DataSet<Tuple2<String, Integer>> lastUnion = source.flatMap(new DummyFlatMap());
    for (int i = 1; i < NUM_INPUTS; i++) {
        lastUnion = lastUnion.union(source.flatMap(new DummyFlatMap()));
    }
    DataSet<Tuple2<String, Integer>> result = lastUnion.groupBy(0).aggregate(Aggregations.SUM, 1);
    result.writeAsText(OUT_FILE);
    // return the plan
    Plan plan = env.createProgramPlan("Test union on new java-api");
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    // Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            /* Test on the union output connections
				 * It must be under the GroupOperator and the strategy should be forward
				 */
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
                final Channel inConn = ((SingleInputPlanNode) visitable).getInput();
                Assert.assertTrue("Union should just forward the Partitioning", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
                Assert.assertTrue("Union Node should be under Group operator", inConn.getSource() instanceof NAryUnionPlanNode);
            }
            /* Test on the union input connections
				 * Must be NUM_INPUTS input connections, all FlatMapOperators with a own partitioning strategy(propably hash)
				 */
            if (visitable instanceof NAryUnionPlanNode) {
                int numberInputs = 0;
                for (Iterator<Channel> inputs = visitable.getInputs().iterator(); inputs.hasNext(); numberInputs++) {
                    final Channel inConn = inputs.next();
                    PlanNode inNode = inConn.getSource();
                    Assert.assertTrue("Input of Union should be FlatMapOperators", inNode.getProgramOperator() instanceof FlatMapOperatorBase);
                    Assert.assertTrue("Shipment strategy under union should partition the data", inConn.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
                }
                Assert.assertTrue("NAryUnion should have " + NUM_INPUTS + " inputs", numberInputs == NUM_INPUTS);
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Iterator(java.util.Iterator) Test(org.junit.Test)

Example 42 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class UnionPropertyPropagationTest method testUnion1.

@Test
public void testUnion1() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Long> sourceA = env.generateSequence(0, 1);
    DataSet<Long> sourceB = env.generateSequence(0, 1);
    DataSet<Long> redA = sourceA.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
    DataSet<Long> redB = sourceB.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
    redA.union(redB).groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).output(new DiscardingOutputFormat<Long>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    // Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
                for (Channel inConn : visitable.getInputs()) {
                    Assert.assertTrue("Reduce should just forward the input if it is already partitioned", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
                }
                //just check latest ReduceNode
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Test(org.junit.Test)

Example 43 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestJoin.

@Test
public void forwardFieldsTestJoin() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Integer, Integer>> in1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
    DataSet<Tuple3<Integer, Integer, Integer>> in2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
    in1 = in1.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1");
    in2 = in2.map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("f1->f2");
    DataSet<Tuple3<Integer, Integer, Integer>> out = in1.join(in2).where(1).equalTo(2).with(new MockJoin());
    out.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileWithStats(plan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof DualInputPlanNode && visitable.getProgramOperator() instanceof InnerJoinOperatorBase) {
                DualInputPlanNode node = ((DualInputPlanNode) visitable);
                final Channel inConn1 = node.getInput1();
                final Channel inConn2 = node.getInput2();
                Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn1.getShipStrategy() == ShipStrategyType.FORWARD);
                Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn2.getShipStrategy() == ShipStrategyType.FORWARD);
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple3(org.apache.flink.api.java.tuple.Tuple3) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Test(org.junit.Test)

Aggregations

PlanNode (org.apache.flink.optimizer.plan.PlanNode)43 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)25 Channel (org.apache.flink.optimizer.plan.Channel)24 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)22 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)21 CompilerException (org.apache.flink.optimizer.CompilerException)16 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)16 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)15 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)14 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)13 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)13 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)13 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)13 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)13 WorksetPlanNode (org.apache.flink.optimizer.plan.WorksetPlanNode)13 Plan (org.apache.flink.api.common.Plan)12 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)12 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)11