Search in sources :

Example 16 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithBroadcast.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by broadcast-fwd JOIN is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 --> bc-fwd-Join -> Output
	 * Src3 ----------------/             /
	 *                                   /
	 * Src4 ----------------------------/
	 *
	 * In the resulting plan, the broadcasting must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithBroadcast() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src4 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.join(src4, JoinOperatorBase.JoinHint.BROADCAST_HASH_FIRST).where(0).equalTo(0).name("join").output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    DualInputPlanNode join = resolver.getNode("join");
    // check input of join is broadcasted
    assertEquals("First join input should be fully replicated.", PartitioningProperty.FULL_REPLICATION, join.getInput1().getGlobalProperties().getPartitioning());
    NAryUnionPlanNode union = (NAryUnionPlanNode) join.getInput1().getSource();
    // check that all union inputs are broadcasted
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be fully replicated", PartitioningProperty.FULL_REPLICATION, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be broadcasting", ShipStrategyType.BROADCAST, c.getShipStrategy());
    }
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Channel(org.apache.flink.optimizer.plan.Channel) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 17 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithHashPartitioning.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by PartitionByHash is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 -> PartitionByHash -> Output
	 * Src3 ----------------/
	 *
	 * In the resulting plan, the hash partitioning (ShippingStrategy.PARTITION_HASH) must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithHashPartitioning() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.partitionByHash(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Sink input should be hash partitioned on 1.", new FieldList(1), sink.getInput().getGlobalProperties().getPartitioningFields());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input should be hash partitioned on 1.", new FieldList(1), partitioner.getInput().getGlobalProperties().getPartitioningFields());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be hash partitioned
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be hash partitioned", PartitioningProperty.HASH_PARTITIONED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be hash partitioning", ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 18 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class IterationCompilerTest method testWorksetIterationWithUnionRoot.

@Test
public void testWorksetIterationWithUnionRoot() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(43);
        DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20).map(new MapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Long value) {
                return null;
            }
        });
        DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
        iter.closeWith(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()).union(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>())), iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()).union(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()))).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource();
        // make sure that the root is part of the dynamic path
        // the "NoOp"a that come after the union.
        SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode();
        SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode();
        NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource();
        NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource();
        assertTrue(nextWorksetNoop.isOnDynamicPath());
        assertTrue(nextWorksetNoop.getCostWeight() >= 1);
        assertTrue(solutionDeltaNoop.isOnDynamicPath());
        assertTrue(solutionDeltaNoop.getCostWeight() >= 1);
        assertTrue(nextWorksetUnion.isOnDynamicPath());
        assertTrue(nextWorksetUnion.getCostWeight() >= 1);
        assertTrue(solutionDeltaUnion.isOnDynamicPath());
        assertTrue(solutionDeltaUnion.getCostWeight() >= 1);
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 19 with NAryUnionPlanNode

use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.

the class IterationCompilerTest method testIterationWithUnionRoot.

@Test
public void testIterationWithUnionRoot() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(43);
        IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
        iteration.closeWith(iteration.map(new IdentityMapper<Long>()).union(iteration.map(new IdentityMapper<Long>()))).output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        BulkIterationPlanNode iterNode = (BulkIterationPlanNode) sink.getInput().getSource();
        // make sure that the root is part of the dynamic path
        // the "NoOp" that comes after the union.
        SingleInputPlanNode noop = (SingleInputPlanNode) iterNode.getRootOfStepFunction();
        NAryUnionPlanNode union = (NAryUnionPlanNode) noop.getInput().getSource();
        assertTrue(noop.isOnDynamicPath());
        assertTrue(noop.getCostWeight() >= 1);
        assertTrue(union.isOnDynamicPath());
        assertTrue(union.getCostWeight() >= 1);
        // see that the jobgraph generator can translate this
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) Test(org.junit.Test)

Aggregations

NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)19 Channel (org.apache.flink.optimizer.plan.Channel)17 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)17 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)11 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)10 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)10 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)10 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)10 Test (org.junit.Test)10 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)9 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)9 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)8 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)8 WorksetPlanNode (org.apache.flink.optimizer.plan.WorksetPlanNode)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 CompilerException (org.apache.flink.optimizer.CompilerException)7 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)6 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)6 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)6