Search in sources :

Example 1 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class DataExchangeModeOpenBranchingTest method verifyBranchigPlan.

private void verifyBranchigPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toFilterSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toJoinSink, DataExchangeMode toDirectSink) {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setExecutionMode(execMode);
        DataSet<Tuple2<Long, Long>> data = env.generateSequence(1, 100000).map(new MapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Long value) {
                return new Tuple2<Long, Long>(value, value);
            }
        });
        // output 1
        data.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) {
                return false;
            }
        }).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink1");
        // output 2 does a join before a join
        data.join(env.fromElements(new Tuple2<Long, Long>(1L, 2L))).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("sink2");
        // output 3 is direct
        data.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink3");
        OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
        SinkPlanNode filterSink = findSink(optPlan.getDataSinks(), "sink1");
        SinkPlanNode joinSink = findSink(optPlan.getDataSinks(), "sink2");
        SinkPlanNode directSink = findSink(optPlan.getDataSinks(), "sink3");
        SingleInputPlanNode filterNode = (SingleInputPlanNode) filterSink.getPredecessor();
        SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
        DualInputPlanNode joinNode = (DualInputPlanNode) joinSink.getPredecessor();
        assertEquals(mapNode, joinNode.getInput1().getSource());
        assertEquals(mapNode, directSink.getPredecessor());
        assertEquals(toFilterSink, filterSink.getInput().getDataExchangeMode());
        assertEquals(toJoinSink, joinSink.getInput().getDataExchangeMode());
        assertEquals(toDirectSink, directSink.getInput().getDataExchangeMode());
        assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
        assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
        assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
        assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Example 2 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class UnionClosedBranchingTest method testUnionClosedBranchingTest.

@Test
public void testUnionClosedBranchingTest() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setExecutionMode(executionMode);
    env.setParallelism(4);
    DataSet<Tuple1<Integer>> src1 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
    DataSet<Tuple1<Integer>> src2 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
    DataSet<Tuple1<Integer>> union = src1.union(src2);
    DataSet<Tuple2<Integer, Integer>> join = union.join(union).where(0).equalTo(0).projectFirst(0).projectSecond(0);
    join.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    SinkPlanNode sinkNode = optimizedPlan.getDataSinks().iterator().next();
    DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
    // Verify that the compiler correctly sets the expected data exchange modes.
    for (Channel channel : joinNode.getInputs()) {
        assertEquals("Unexpected data exchange mode between union and join node.", unionToJoin, channel.getDataExchangeMode());
        assertEquals("Unexpected ship strategy between union and join node.", unionToJoinStrategy, channel.getShipStrategy());
    }
    for (SourcePlanNode src : optimizedPlan.getDataSources()) {
        for (Channel channel : src.getOutgoingChannels()) {
            assertEquals("Unexpected data exchange mode between source and union node.", sourceToUnion, channel.getDataExchangeMode());
            assertEquals("Unexpected ship strategy between source and union node.", sourceToUnionStrategy, channel.getShipStrategy());
        }
    }
    // -----------------------------------------------------------------------------------------
    // Verify generated JobGraph
    // -----------------------------------------------------------------------------------------
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(optimizedPlan);
    List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    // Sanity check for the test setup
    assertEquals("Unexpected number of vertices created.", 4, vertices.size());
    // Verify all sources
    JobVertex[] sources = new JobVertex[] { vertices.get(0), vertices.get(1) };
    for (JobVertex src : sources) {
        // Sanity check
        assertTrue("Unexpected vertex type. Test setup is broken.", src.isInputVertex());
        // The union is not translated to an extra union task, but the join uses a union
        // input gate to read multiple inputs. The source create a single result per consumer.
        assertEquals("Unexpected number of created results.", 2, src.getNumberOfProducedIntermediateDataSets());
        for (IntermediateDataSet dataSet : src.getProducedDataSets()) {
            ResultPartitionType dsType = dataSet.getResultType();
            // Ensure batch exchange unless PIPELINED_FORCE is enabled.
            if (!executionMode.equals(ExecutionMode.PIPELINED_FORCED)) {
                assertTrue("Expected batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
            } else {
                assertFalse("Expected non-batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
            }
        }
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) Channel(org.apache.flink.optimizer.plan.Channel) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 3 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class WorksetIterationsJavaApiCompilerTest method testJavaApiWithDeferredSoltionSetUpdateWithNonPreservingJoin.

@Test
public void testJavaApiWithDeferredSoltionSetUpdateWithNonPreservingJoin() {
    try {
        Plan plan = getJavaTestPlan(false, false);
        OptimizedPlan oPlan = compileNoStats(plan);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
        DualInputPlanNode joinWithInvariantNode = resolver.getNode(JOIN_WITH_INVARIANT_NAME);
        DualInputPlanNode joinWithSolutionSetNode = resolver.getNode(JOIN_WITH_SOLUTION_SET);
        SingleInputPlanNode worksetReducer = resolver.getNode(NEXT_WORKSET_REDUCER_NAME);
        // iteration preserves partitioning in reducer, so the first partitioning is out of the loop, 
        // the in-loop partitioning is before the final reducer
        // verify joinWithInvariant
        assertEquals(ShipStrategyType.FORWARD, joinWithInvariantNode.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, joinWithInvariantNode.getInput2().getShipStrategy());
        assertEquals(new FieldList(1, 2), joinWithInvariantNode.getKeysForInput1());
        assertEquals(new FieldList(1, 2), joinWithInvariantNode.getKeysForInput2());
        // verify joinWithSolutionSet
        assertEquals(ShipStrategyType.PARTITION_HASH, joinWithSolutionSetNode.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getInput2().getShipStrategy());
        assertEquals(new FieldList(1, 0), joinWithSolutionSetNode.getKeysForInput1());
        // verify reducer
        assertEquals(ShipStrategyType.PARTITION_HASH, worksetReducer.getInput().getShipStrategy());
        assertEquals(new FieldList(1, 2), worksetReducer.getKeys(0));
        // verify solution delta
        assertEquals(2, joinWithSolutionSetNode.getOutgoingChannels().size());
        assertEquals(ShipStrategyType.PARTITION_HASH, joinWithSolutionSetNode.getOutgoingChannels().get(0).getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, joinWithSolutionSetNode.getOutgoingChannels().get(1).getShipStrategy());
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test errored: " + e.getMessage());
    }
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) Test(org.junit.Test)

Example 4 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class JoinCustomPartitioningTest method testJoinWithPojos.

@Test
public void testJoinWithPojos() {
    try {
        final Partitioner<Integer> partitioner = new TestPartitionerInt();
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
        DataSet<Pojo3> input2 = env.fromElements(new Pojo3());
        input1.join(input2, JoinHint.REPARTITION_HASH_FIRST).where("b").equalTo("a").withPartitioner(partitioner).output(new DiscardingOutputFormat<Tuple2<Pojo2, Pojo3>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
        assertEquals(partitioner, join.getInput1().getPartitioner());
        assertEquals(partitioner, join.getInput2().getPartitioner());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 5 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class ConnectedComponentsCoGroupTest method testWorksetConnectedComponents.

@Test
public void testWorksetConnectedComponents() {
    Plan plan = getConnectedComponentsCoGroupPlan();
    plan.setExecutionConfig(new ExecutionConfig());
    OptimizedPlan optPlan = compileNoStats(plan);
    OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
    if (PRINT_PLAN) {
        PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
        String json = dumper.getOptimizerPlanAsJSON(optPlan);
        System.out.println(json);
    }
    SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
    SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
    SinkPlanNode sink = or.getNode(SINK);
    WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);
    DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
    DualInputPlanNode cogroup = or.getNode(MIN_ID_AND_UPDATE);
    // --------------------------------------------------------------------
    // Plan validation:
    //
    // We expect the plan to go with a sort-merge join, because the CoGroup
    // sorts and the join in the successive iteration can re-exploit the sorting.
    // --------------------------------------------------------------------
    // test all drivers
    Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.INNER_MERGE, neighborsJoin.getDriverStrategy());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());
    Assert.assertEquals(DriverStrategy.CO_GROUP, cogroup.getDriverStrategy());
    Assert.assertEquals(set0, cogroup.getKeysForInput1());
    Assert.assertEquals(set0, cogroup.getKeysForInput2());
    // test all the shipping strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());
    // workset
    Assert.assertEquals(ShipStrategyType.FORWARD, neighborsJoin.getInput1().getShipStrategy());
    // edges
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, neighborsJoin.getInput2().getShipStrategy());
    Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
    Assert.assertTrue(neighborsJoin.getInput2().getTempMode().isCached());
    // min id
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, cogroup.getInput1().getShipStrategy());
    // solution set
    Assert.assertEquals(ShipStrategyType.FORWARD, cogroup.getInput2().getShipStrategy());
    // test all the local strategies
    Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());
    // the sort for the neighbor join in the first iteration is pushed out of the loop
    Assert.assertEquals(LocalStrategy.SORT, iter.getInitialWorksetInput().getLocalStrategy());
    // workset
    Assert.assertEquals(LocalStrategy.NONE, neighborsJoin.getInput1().getLocalStrategy());
    // edges
    Assert.assertEquals(LocalStrategy.SORT, neighborsJoin.getInput2().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.SORT, cogroup.getInput1().getLocalStrategy());
    // solution set
    Assert.assertEquals(LocalStrategy.NONE, cogroup.getInput2().getLocalStrategy());
    // check the caches
    Assert.assertTrue(TempMode.CACHED == neighborsJoin.getInput2().getTempMode());
    JobGraphGenerator jgg = new JobGraphGenerator();
    jgg.compileJobGraph(optPlan);
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanJSONDumpGenerator(org.apache.flink.optimizer.plandump.PlanJSONDumpGenerator) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)96 Test (org.junit.Test)86 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)81 Plan (org.apache.flink.api.common.Plan)76 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)67 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)65 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)36 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)31 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)19 Channel (org.apache.flink.optimizer.plan.Channel)14 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)13 FieldList (org.apache.flink.api.common.operators.util.FieldList)12 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)11 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)11 PlanNode (org.apache.flink.optimizer.plan.PlanNode)11 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)10 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)10 ShipStrategyType (org.apache.flink.runtime.operators.shipping.ShipStrategyType)10 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)8