Search in sources :

Example 11 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class IterationsCompilerTest method testTwoIterationsDirectlyChained.

@Test
public void testTwoIterationsDirectlyChained() throws Exception {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
        DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
        depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        assertEquals(1, op.getDataSinks().size());
        assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
        WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
        BulkIterationPlanNode bipn = (BulkIterationPlanNode) wipn.getInput1().getSource();
        // the hash partitioning has been pushed out of the delta iteration into the bulk iteration
        assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());
        // since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning
        for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
            assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        }
        assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
        assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
        assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
        assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) Test(org.junit.Test)

Example 12 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class ParallelismChangeTest method checkPropertyHandlingWithTwoInputs.

/**
	 * Checks that re-partitioning happens when the inputs of a two-input contract have different parallelisms.
	 * 
	 * Test Plan:
	 * <pre>
	 * 
	 * (source) -> reduce -\
	 *                      Match -> (sink)
	 * (source) -> reduce -/
	 * 
	 * </pre>
	 * 
	 */
@Test
public void checkPropertyHandlingWithTwoInputs() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(5);
    DataSet<Long> set2 = env.generateSequence(0, 1).setParallelism(7);
    DataSet<Long> reduce1 = set1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(5);
    DataSet<Long> reduce2 = set2.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(7);
    reduce1.join(reduce2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).setParallelism(5).output(new DiscardingOutputFormat<Long>()).setParallelism(5);
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    //Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof DualInputPlanNode) {
                DualInputPlanNode node = (DualInputPlanNode) visitable;
                Channel c1 = node.getInput1();
                Channel c2 = node.getInput2();
                Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.FORWARD, c1.getShipStrategy());
                Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.PARTITION_HASH, c2.getShipStrategy());
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityJoiner(org.apache.flink.optimizer.testfunctions.IdentityJoiner) Test(org.junit.Test)

Example 13 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestMapReduce.

@Test
public void forwardFieldsTestMapReduce() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Integer, Integer>> set = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
    set = set.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1").map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("*");
    set.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileWithStats(plan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof ReduceOperatorBase) {
                for (Channel input : visitable.getInputs()) {
                    GlobalProperties gprops = visitable.getGlobalProperties();
                    LocalProperties lprops = visitable.getLocalProperties();
                    Assert.assertTrue("Reduce should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
                    Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.isPartitionedOnFields(new FieldSet(1)));
                    Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
                    Assert.assertTrue("Wrong LocalProperties on Reducer", lprops.getGroupedFields().contains(1));
                }
            }
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof MapOperatorBase) {
                for (Channel input : visitable.getInputs()) {
                    GlobalProperties gprops = visitable.getGlobalProperties();
                    LocalProperties lprops = visitable.getLocalProperties();
                    Assert.assertTrue("Map should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
                    Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.isPartitionedOnFields(new FieldSet(1)));
                    Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
                    Assert.assertTrue("Wrong LocalProperties on Mapper", lprops.getGroupedFields().contains(1));
                }
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) Test(org.junit.Test)

Example 14 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class UnionReplacementTest method testConsecutiveUnionsWithRebalance.

/**
	 *
	 * Checks that a plan with consecutive UNIONs followed by REBALANCE is correctly translated.
	 *
	 * The program can be illustrated as follows:
	 *
	 * Src1 -\
	 *        >-> Union12--<
	 * Src2 -/              \
	 *                       >-> Union123 -> Rebalance -> Output
	 * Src3 ----------------/
	 *
	 * In the resulting plan, the Rebalance (ShippingStrategy.PARTITION_FORCED_REBALANCE) must be
	 * pushed to the inputs of the unions (Src1, Src2, Src3).
	 *
	 */
@Test
public void testConsecutiveUnionsWithRebalance() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
    DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
    union123.rebalance().output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode sink = resolver.getNode("out");
    // check partitioning is correct
    assertEquals("Sink input should be force rebalanced.", PartitioningProperty.FORCED_REBALANCED, sink.getInput().getGlobalProperties().getPartitioning());
    SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
    assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
    assertEquals("Partitioner input should be force rebalanced.", PartitioningProperty.FORCED_REBALANCED, partitioner.getInput().getGlobalProperties().getPartitioning());
    assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
    NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
    // all union inputs should be force rebalanced
    for (Channel c : union.getInputs()) {
        assertEquals("Union input should be force rebalanced", PartitioningProperty.FORCED_REBALANCED, c.getGlobalProperties().getPartitioning());
        assertEquals("Union input channel should be rebalancing", ShipStrategyType.PARTITION_FORCED_REBALANCE, c.getShipStrategy());
        assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Channel(org.apache.flink.optimizer.plan.Channel) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 15 with Channel

use of org.apache.flink.optimizer.plan.Channel in project flink by apache.

the class UnionReplacementTest method testUnionWithTwoOutputs.

/**
	 *
	 * Test for FLINK-2662.
	 *
	 * Checks that a plan with an union with two outputs is correctly translated.
	 * The program can be illustrated as follows:
	 *
	 * Src1 ----------------\
	 *                       >-> Union123 -> GroupBy(0) -> Sum -> Output
	 * Src2 -\              /
	 *        >-> Union23--<
	 * Src3 -/              \
	 *                       >-> Union234 -> GroupBy(1) -> Sum -> Output
	 * Src4 ----------------/
	 *
	 * The fix for FLINK-2662 translates the union with two output (Union-23) into two separate
	 * unions (Union-23_1 and Union-23_2) with one output each. Due to this change, the interesting
	 * partitioning properties for GroupBy(0) and GroupBy(1) are pushed through Union-23_1 and
	 * Union-23_2 and do not interfere with each other (which would be the case if Union-23 would
	 * be a single operator with two outputs).
	 *
	 */
@Test
public void testUnionWithTwoOutputs() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> src4 = env.fromElements(new Tuple2<>(0L, 0L));
    DataSet<Tuple2<Long, Long>> union23 = src2.union(src3);
    DataSet<Tuple2<Long, Long>> union123 = src1.union(union23);
    DataSet<Tuple2<Long, Long>> union234 = src4.union(union23);
    union123.groupBy(0).sum(1).name("1").output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    union234.groupBy(1).sum(0).name("2").output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
    SingleInputPlanNode groupRed1 = resolver.getNode("1");
    SingleInputPlanNode groupRed2 = resolver.getNode("2");
    // check partitioning is correct
    assertTrue("Reduce input should be partitioned on 0.", groupRed1.getInput().getGlobalProperties().getPartitioningFields().isExactMatch(new FieldList(0)));
    assertTrue("Reduce input should be partitioned on 1.", groupRed2.getInput().getGlobalProperties().getPartitioningFields().isExactMatch(new FieldList(1)));
    // check group reduce inputs are n-ary unions with three inputs
    assertTrue("Reduce input should be n-ary union with three inputs.", groupRed1.getInput().getSource() instanceof NAryUnionPlanNode && ((NAryUnionPlanNode) groupRed1.getInput().getSource()).getListOfInputs().size() == 3);
    assertTrue("Reduce input should be n-ary union with three inputs.", groupRed2.getInput().getSource() instanceof NAryUnionPlanNode && ((NAryUnionPlanNode) groupRed2.getInput().getSource()).getListOfInputs().size() == 3);
    // check channel from union to group reduce is forwarding
    assertTrue("Channel between union and group reduce should be forwarding", groupRed1.getInput().getShipStrategy().equals(ShipStrategyType.FORWARD));
    assertTrue("Channel between union and group reduce should be forwarding", groupRed2.getInput().getShipStrategy().equals(ShipStrategyType.FORWARD));
    // check that all inputs of unions are hash partitioned
    List<Channel> union123In = ((NAryUnionPlanNode) groupRed1.getInput().getSource()).getListOfInputs();
    for (Channel i : union123In) {
        assertTrue("Union input channel should hash partition on 0", i.getShipStrategy().equals(ShipStrategyType.PARTITION_HASH) && i.getShipStrategyKeys().isExactMatch(new FieldList(0)));
    }
    List<Channel> union234In = ((NAryUnionPlanNode) groupRed2.getInput().getSource()).getListOfInputs();
    for (Channel i : union234In) {
        assertTrue("Union input channel should hash partition on 0", i.getShipStrategy().equals(ShipStrategyType.PARTITION_HASH) && i.getShipStrategyKeys().isExactMatch(new FieldList(1)));
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Channel(org.apache.flink.optimizer.plan.Channel) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) Test(org.junit.Test)

Aggregations

Channel (org.apache.flink.optimizer.plan.Channel)60 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)41 Test (org.junit.Test)30 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)26 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)24 PlanNode (org.apache.flink.optimizer.plan.PlanNode)24 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)23 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)23 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)20 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)19 Plan (org.apache.flink.api.common.Plan)18 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)18 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)16 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)16 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)15 CompilerException (org.apache.flink.optimizer.CompilerException)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)13 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)13 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)13 FieldList (org.apache.flink.api.common.operators.util.FieldList)12