Search in sources :

Example 36 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class CoGroupSolutionSetFirstTest method testCoGroupSolutionSet.

@Test
public void testCoGroupSolutionSet() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple1<Integer>> raw = env.readCsvFile(IN_FILE).types(Integer.class);
    DeltaIteration<Tuple1<Integer>, Tuple1<Integer>> iteration = raw.iterateDelta(raw, 1000, 0);
    DataSet<Tuple1<Integer>> test = iteration.getWorkset().map(new SimpleMap());
    DataSet<Tuple1<Integer>> delta = iteration.getSolutionSet().coGroup(test).where(0).equalTo(0).with(new SimpleCGroup());
    DataSet<Tuple1<Integer>> feedback = iteration.getWorkset().map(new SimpleMap());
    DataSet<Tuple1<Integer>> result = iteration.closeWith(delta, feedback);
    result.output(new DiscardingOutputFormat<Tuple1<Integer>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = null;
    try {
        oPlan = compileNoStats(plan);
    } catch (CompilerException e) {
        Assert.fail(e.getMessage());
    }
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof WorksetIterationPlanNode) {
                PlanNode deltaNode = ((WorksetIterationPlanNode) visitable).getSolutionSetDeltaPlanNode();
                //get the CoGroup
                DualInputPlanNode dpn = (DualInputPlanNode) deltaNode.getInputs().iterator().next().getSource();
                Channel in1 = dpn.getInput1();
                Channel in2 = dpn.getInput2();
                Assert.assertTrue(in1.getLocalProperties().getOrdering() == null);
                Assert.assertTrue(in2.getLocalProperties().getOrdering() != null);
                Assert.assertTrue(in2.getLocalProperties().getOrdering().getInvolvedIndexes().contains(0));
                Assert.assertTrue(in1.getShipStrategy() == ShipStrategyType.FORWARD);
                Assert.assertTrue(in2.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Test(org.junit.Test)

Example 37 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class HashJoinBuildFirstProperties method instantiate.

@Override
public DualInputPlanNode instantiate(Channel in1, Channel in2, TwoInputNode node) {
    DriverStrategy strategy;
    if (!in1.isOnDynamicPath() && in2.isOnDynamicPath()) {
        // sanity check that the first input is cached and remove that cache
        if (!in1.getTempMode().isCached()) {
            throw new CompilerException("No cache at point where static and dynamic parts meet.");
        }
        in1.setTempMode(in1.getTempMode().makeNonCached());
        strategy = DriverStrategy.HYBRIDHASH_BUILD_FIRST_CACHED;
    } else {
        strategy = DriverStrategy.HYBRIDHASH_BUILD_FIRST;
    }
    return new DualInputPlanNode(node, "Join(" + node.getOperator().getName() + ")", in1, in2, strategy, this.keys1, this.keys2);
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) DriverStrategy(org.apache.flink.runtime.operators.DriverStrategy)

Example 38 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class HashJoinBuildSecondProperties method instantiate.

@Override
public DualInputPlanNode instantiate(Channel in1, Channel in2, TwoInputNode node) {
    DriverStrategy strategy;
    if (!in2.isOnDynamicPath() && in1.isOnDynamicPath()) {
        // sanity check that the first input is cached and remove that cache
        if (!in2.getTempMode().isCached()) {
            throw new CompilerException("No cache at point where static and dynamic parts meet.");
        }
        in2.setTempMode(in2.getTempMode().makeNonCached());
        strategy = DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED;
    } else {
        strategy = DriverStrategy.HYBRIDHASH_BUILD_SECOND;
    }
    return new DualInputPlanNode(node, "Join (" + node.getOperator().getName() + ")", in1, in2, strategy, this.keys1, this.keys2);
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) DriverStrategy(org.apache.flink.runtime.operators.DriverStrategy)

Example 39 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class DataExchangeModeOpenBranchingTest method verifyBranchigPlan.

private void verifyBranchigPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toFilterSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toJoinSink, DataExchangeMode toDirectSink) {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setExecutionMode(execMode);
        DataSet<Tuple2<Long, Long>> data = env.generateSequence(1, 100000).map(new MapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Long value) {
                return new Tuple2<Long, Long>(value, value);
            }
        });
        // output 1
        data.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) {
                return false;
            }
        }).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink1");
        // output 2 does a join before a join
        data.join(env.fromElements(new Tuple2<Long, Long>(1L, 2L))).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("sink2");
        // output 3 is direct
        data.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink3");
        OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
        SinkPlanNode filterSink = findSink(optPlan.getDataSinks(), "sink1");
        SinkPlanNode joinSink = findSink(optPlan.getDataSinks(), "sink2");
        SinkPlanNode directSink = findSink(optPlan.getDataSinks(), "sink3");
        SingleInputPlanNode filterNode = (SingleInputPlanNode) filterSink.getPredecessor();
        SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
        DualInputPlanNode joinNode = (DualInputPlanNode) joinSink.getPredecessor();
        assertEquals(mapNode, joinNode.getInput1().getSource());
        assertEquals(mapNode, directSink.getPredecessor());
        assertEquals(toFilterSink, filterSink.getInput().getDataExchangeMode());
        assertEquals(toJoinSink, joinSink.getInput().getDataExchangeMode());
        assertEquals(toDirectSink, directSink.getInput().getDataExchangeMode());
        assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
        assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
        assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
        assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Example 40 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class UnionClosedBranchingTest method testUnionClosedBranchingTest.

@Test
public void testUnionClosedBranchingTest() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setExecutionMode(executionMode);
    env.setParallelism(4);
    DataSet<Tuple1<Integer>> src1 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
    DataSet<Tuple1<Integer>> src2 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
    DataSet<Tuple1<Integer>> union = src1.union(src2);
    DataSet<Tuple2<Integer, Integer>> join = union.join(union).where(0).equalTo(0).projectFirst(0).projectSecond(0);
    join.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    SinkPlanNode sinkNode = optimizedPlan.getDataSinks().iterator().next();
    DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
    // Verify that the compiler correctly sets the expected data exchange modes.
    for (Channel channel : joinNode.getInputs()) {
        assertEquals("Unexpected data exchange mode between union and join node.", unionToJoin, channel.getDataExchangeMode());
        assertEquals("Unexpected ship strategy between union and join node.", unionToJoinStrategy, channel.getShipStrategy());
    }
    for (SourcePlanNode src : optimizedPlan.getDataSources()) {
        for (Channel channel : src.getOutgoingChannels()) {
            assertEquals("Unexpected data exchange mode between source and union node.", sourceToUnion, channel.getDataExchangeMode());
            assertEquals("Unexpected ship strategy between source and union node.", sourceToUnionStrategy, channel.getShipStrategy());
        }
    }
    // -----------------------------------------------------------------------------------------
    // Verify generated JobGraph
    // -----------------------------------------------------------------------------------------
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(optimizedPlan);
    List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    // Sanity check for the test setup
    assertEquals("Unexpected number of vertices created.", 4, vertices.size());
    // Verify all sources
    JobVertex[] sources = new JobVertex[] { vertices.get(0), vertices.get(1) };
    for (JobVertex src : sources) {
        // Sanity check
        assertTrue("Unexpected vertex type. Test setup is broken.", src.isInputVertex());
        // The union is not translated to an extra union task, but the join uses a union
        // input gate to read multiple inputs. The source create a single result per consumer.
        assertEquals("Unexpected number of created results.", 2, src.getNumberOfProducedIntermediateDataSets());
        for (IntermediateDataSet dataSet : src.getProducedDataSets()) {
            ResultPartitionType dsType = dataSet.getResultType();
            // Ensure batch exchange unless PIPELINED_FORCE is enabled.
            if (!executionMode.equals(ExecutionMode.PIPELINED_FORCED)) {
                assertTrue("Expected batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
            } else {
                assertFalse("Expected non-batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
            }
        }
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) Channel(org.apache.flink.optimizer.plan.Channel) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Aggregations

DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)96 Test (org.junit.Test)86 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)81 Plan (org.apache.flink.api.common.Plan)76 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)67 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)65 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)36 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)31 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)19 Channel (org.apache.flink.optimizer.plan.Channel)14 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)13 FieldList (org.apache.flink.api.common.operators.util.FieldList)12 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)11 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)11 PlanNode (org.apache.flink.optimizer.plan.PlanNode)11 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)10 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)10 ShipStrategyType (org.apache.flink.runtime.operators.shipping.ShipStrategyType)10 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)8