Search in sources :

Example 61 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class UnionPropertyPropagationTest method testUnion1.

@Test
public void testUnion1() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Long> sourceA = env.generateSequence(0, 1);
    DataSet<Long> sourceB = env.generateSequence(0, 1);
    DataSet<Long> redA = sourceA.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
    DataSet<Long> redB = sourceB.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
    redA.union(redB).groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).output(new DiscardingOutputFormat<Long>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    // Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
                for (Channel inConn : visitable.getInputs()) {
                    Assert.assertTrue("Reduce should just forward the input if it is already partitioned", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
                }
                //just check latest ReduceNode
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Test(org.junit.Test)

Example 62 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithRangePartitioning.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by PARTITION_RANGE is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 -> PartitionByRange -> Output
	 * Src3 ----------------/
	 *
	 * In the resulting plan, the range partitioning must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithRangePartitioning() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.partitionByRange(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be range partitioned.", PartitioningProperty.RANGE_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Sink input should be range partitioned on 1", new Ordering(1, null, Order.ASCENDING), sink.getInput().getGlobalProperties().getPartitioningOrdering());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be range partitioned.", PartitioningProperty.RANGE_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input should be range partitioned on 1", new Ordering(1, null, Order.ASCENDING), partitioner.getInput().getGlobalProperties().getPartitioningOrdering());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be range partitioned
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be range partitioned", PartitioningProperty.RANGE_PARTITIONED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be forwarded", ShipStrategyType.FORWARD, c.getShipStrategy());
        // range partitioning is executed as custom partitioning with prior sampling
        SingleInputPlanNode partitionMap = (SingleInputPlanNode) c.getSource();
        assertEquals(DriverStrategy.MAP, partitionMap.getDriverStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionMap.getInput().getShipStrategy());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Channel(org.apache.flink.optimizer.plan.Channel) Ordering(org.apache.flink.api.common.operators.Ordering) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 63 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithHashPartitioning.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by PartitionByHash is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 -> PartitionByHash -> Output
	 * Src3 ----------------/
	 *
	 * In the resulting plan, the hash partitioning (ShippingStrategy.PARTITION_HASH) must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithHashPartitioning() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.partitionByHash(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Sink input should be hash partitioned on 1.", new FieldList(1), sink.getInput().getGlobalProperties().getPartitioningFields());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input should be hash partitioned on 1.", new FieldList(1), partitioner.getInput().getGlobalProperties().getPartitioningFields());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be hash partitioned
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be hash partitioned", PartitioningProperty.HASH_PARTITIONED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be hash partitioning", ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 64 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class WorksetIterationsRecordApiCompilerTest method testRecordApiWithDirectSoltionSetUpdate.

@Test
public void testRecordApiWithDirectSoltionSetUpdate() {
    Plan plan = getTestPlan(true, false);
    OptimizedPlan oPlan;
    try {
        oPlan = compileNoStats(plan);
    } catch (CompilerException ce) {
        ce.printStackTrace();
        fail("The pact compiler is unable to compile this plan correctly.");
        // silence the compiler
        return;
    }
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
    DualInputPlanNode joinWithInvariantNode = resolver.getNode(JOIN_WITH_INVARIANT_NAME);
    DualInputPlanNode joinWithSolutionSetNode = resolver.getNode(JOIN_WITH_SOLUTION_SET);
    SingleInputPlanNode worksetReducer = resolver.getNode(NEXT_WORKSET_REDUCER_NAME);
    // iteration preserves partitioning in reducer, so the first partitioning is out of the loop, 
    // the in-loop partitioning is before the final reducer
    // verify joinWithInvariant
    assertEquals(ShipStrategyType.FORWARD, joinWithInvariantNode.getInput1().getShipStrategy());
    assertEquals(ShipStrategyType.PARTITION_HASH, joinWithInvariantNode.getInput2().getShipStrategy());
    assertEquals(list0, joinWithInvariantNode.getKeysForInput1());
    assertEquals(list0, joinWithInvariantNode.getKeysForInput2());
    // verify joinWithSolutionSet
    assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getInput1().getShipStrategy());
    assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getInput2().getShipStrategy());
    // verify reducer
    assertEquals(ShipStrategyType.FORWARD, worksetReducer.getInput().getShipStrategy());
    assertEquals(list0, worksetReducer.getKeys(0));
    // verify solution delta
    assertEquals(1, joinWithSolutionSetNode.getOutgoingChannels().size());
    assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getOutgoingChannels().get(0).getShipStrategy());
    new JobGraphGenerator().compileJobGraph(oPlan);
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 65 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class BinaryCustomPartitioningCompatibilityTest method testCompatiblePartitioningJoin.

@Test
public void testCompatiblePartitioningJoin() {
    try {
        final Partitioner<Long> partitioner = new Partitioner<Long>() {

            @Override
            public int partition(Long key, int numPartitions) {
                return 0;
            }
        };
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
        DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
        input1.partitionCustom(partitioner, 1).join(input2.partitionCustom(partitioner, 0)).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode partitioner1 = (SingleInputPlanNode) join.getInput1().getSource();
        SingleInputPlanNode partitioner2 = (SingleInputPlanNode) join.getInput2().getSource();
        assertEquals(ShipStrategyType.FORWARD, join.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, join.getInput2().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner1.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner2.getInput().getShipStrategy());
        assertEquals(partitioner, partitioner1.getInput().getPartitioner());
        assertEquals(partitioner, partitioner2.getInput().getPartitioner());
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Partitioner(org.apache.flink.api.common.functions.Partitioner) Test(org.junit.Test)

Aggregations

SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)104 Test (org.junit.Test)83 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)81 Plan (org.apache.flink.api.common.Plan)73 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)72 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)71 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)38 Channel (org.apache.flink.optimizer.plan.Channel)32 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)32 FieldList (org.apache.flink.api.common.operators.util.FieldList)31 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)28 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)26 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)18 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)16 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)14 PlanNode (org.apache.flink.optimizer.plan.PlanNode)14 IdentityGroupReducerCombinable (org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable)14 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)13 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)13