Search in sources :

Example 6 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class MultipleJoinsWithSolutionSetCompilerTest method testMultiSolutionSetJoinPlan.

@Test
public void testMultiSolutionSetJoinPlan() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0));
        DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10);
        // add two sinks, to test the case of branching after an iteration
        result.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
        result.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan optPlan = compileNoStats(p);
        OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
        DualInputPlanNode join1 = or.getNode(JOIN_1);
        DualInputPlanNode join2 = or.getNode(JOIN_2);
        assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, join1.getDriverStrategy());
        assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, join2.getDriverStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, join1.getInput2().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, join2.getInput1().getShipStrategy());
        assertEquals(SolutionSetPlanNode.class, join1.getInput1().getSource().getClass());
        assertEquals(SolutionSetPlanNode.class, join2.getInput2().getSource().getClass());
        new JobGraphGenerator().compileJobGraph(optPlan);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test erroneous: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Test(org.junit.Test)

Example 7 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class RelationalQueryCompilerTest method testQueryGeneric.

private void testQueryGeneric(Plan p, long orderSize, long lineitemSize, float orderSelectivity, float joinSelectivity, boolean broadcastOkay, boolean partitionedOkay, boolean hashJoinFirstOkay, boolean hashJoinSecondOkay, boolean mergeJoinOkay) {
    try {
        // set statistics
        OperatorResolver cr = getContractResolver(p);
        GenericDataSourceBase<?, ?> ordersSource = cr.getNode(ORDERS);
        GenericDataSourceBase<?, ?> lineItemSource = cr.getNode(LINEITEM);
        SingleInputOperator<?, ?, ?> mapper = cr.getNode(MAPPER_NAME);
        DualInputOperator<?, ?, ?, ?> joiner = cr.getNode(JOIN_NAME);
        setSourceStatistics(ordersSource, orderSize, 100f);
        setSourceStatistics(lineItemSource, lineitemSize, 140f);
        mapper.getCompilerHints().setAvgOutputRecordSize(16f);
        mapper.getCompilerHints().setFilterFactor(orderSelectivity);
        joiner.getCompilerHints().setFilterFactor(joinSelectivity);
        // compile
        final OptimizedPlan plan = compileWithStats(p);
        final OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(plan);
        // get the nodes from the final plan
        final SinkPlanNode sink = or.getNode(SINK);
        final SingleInputPlanNode reducer = or.getNode(REDUCE_NAME);
        final SingleInputPlanNode combiner = reducer.getPredecessor() instanceof SingleInputPlanNode ? (SingleInputPlanNode) reducer.getPredecessor() : null;
        final DualInputPlanNode join = or.getNode(JOIN_NAME);
        final SingleInputPlanNode filteringMapper = or.getNode(MAPPER_NAME);
        checkStandardStrategies(filteringMapper, join, combiner, reducer, sink);
        // check the possible variants and that the variant ia allowed in this specific setting
        if (checkBroadcastShipStrategies(join, reducer, combiner)) {
            Assert.assertTrue("Broadcast join incorrectly chosen.", broadcastOkay);
            if (checkHashJoinStrategies(join, reducer, true)) {
                Assert.assertTrue("Hash join (build orders) incorrectly chosen", hashJoinFirstOkay);
            } else if (checkHashJoinStrategies(join, reducer, false)) {
                Assert.assertTrue("Hash join (build lineitem) incorrectly chosen", hashJoinSecondOkay);
            } else if (checkBroadcastMergeJoin(join, reducer)) {
                Assert.assertTrue("Merge join incorrectly chosen", mergeJoinOkay);
            } else {
                Assert.fail("Plan has no correct hash join or merge join strategies.");
            }
        } else if (checkRepartitionShipStrategies(join, reducer, combiner)) {
            Assert.assertTrue("Partitioned join incorrectly chosen.", partitionedOkay);
            if (checkHashJoinStrategies(join, reducer, true)) {
                Assert.assertTrue("Hash join (build orders) incorrectly chosen", hashJoinFirstOkay);
            } else if (checkHashJoinStrategies(join, reducer, false)) {
                Assert.assertTrue("Hash join (build lineitem) incorrectly chosen", hashJoinSecondOkay);
            } else if (checkRepartitionMergeJoin(join, reducer)) {
                Assert.assertTrue("Merge join incorrectly chosen", mergeJoinOkay);
            } else {
                Assert.fail("Plan has no correct hash join or merge join strategies.");
            }
        } else {
            Assert.fail("Plan has neither correct BC join or partitioned join configuration.");
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) OperatorResolver(org.apache.flink.optimizer.util.OperatorResolver) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan)

Example 8 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class PregelCompilerTest method testPregelCompilerWithBroadcastVariable.

@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
    try {
        final String BC_VAR_NAME = "borat variable";
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        // compose test program
        {
            DataSet<Long> bcVar = env.fromElements(1L);
            DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
            DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

                public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
                    return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
                }
            });
            Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
            VertexCentricConfiguration parameters = new VertexCentricConfiguration();
            parameters.addBroadcastSet(BC_VAR_NAME, bcVar);
            DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100, parameters).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
        }
        Plan p = env.createProgramPlan("Pregel Connected Components");
        OptimizedPlan op = compileNoStats(p);
        // check the sink
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
        // check the iteration
        WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
        // check the solution set delta
        PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
        assertTrue(ssDelta instanceof SingleInputPlanNode);
        SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
        assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
        // check the computation coGroup
        DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
        assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
        assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
        assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
        assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
        // check that the initial partitioning is pushed out of the loop
        assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
        assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2ToVertexMap(org.apache.flink.graph.utils.Tuple2ToVertexMap) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 9 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class SpargelCompilerTest method testSpargelCompilerWithBroadcastVariable.

@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
    try {
        final String BC_VAR_NAME = "borat variable";
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        // compose test program
        {
            DataSet<Long> bcVar = env.fromElements(1L);
            DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
            DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

                public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
                    return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
                }
            });
            Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
            ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
            parameters.addBroadcastSetForScatterFunction(BC_VAR_NAME, bcVar);
            parameters.addBroadcastSetForGatherFunction(BC_VAR_NAME, bcVar);
            DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(new ConnectedComponents.CCMessenger<Long, Long>(BasicTypeInfo.LONG_TYPE_INFO), new ConnectedComponents.CCUpdater<Long, Long>(), 100).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
        }
        Plan p = env.createProgramPlan("Spargel Connected Components");
        OptimizedPlan op = compileNoStats(p);
        // check the sink
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
        // check the iteration
        WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
        // check the solution set join and the delta
        PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
        // this is only true if the update functions preserves the partitioning
        assertTrue(ssDelta instanceof DualInputPlanNode);
        DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
        assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
        assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
        assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
        // check the workset set join
        DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
        assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
        assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
        assertTrue(edgeJoin.getInput1().getTempMode().isCached());
        assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());
        // check that the initial partitioning is pushed out of the loop
        assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
        assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
        assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2ToVertexMap(org.apache.flink.graph.utils.Tuple2ToVertexMap) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) ConnectedComponents(org.apache.flink.graph.library.ConnectedComponents) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 10 with DualInputPlanNode

use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.

the class GSACompilerTest method testGSACompiler.

@Test
public void testGSACompiler() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        // compose test program
        {
            DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
            Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
            DataSet<Vertex<Long, Long>> result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), 100).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
        }
        Plan p = env.createProgramPlan("GSA Connected Components");
        OptimizedPlan op = compileNoStats(p);
        // check the sink
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
        assertEquals(PartitioningProperty.HASH_PARTITIONED, sink.getGlobalProperties().getPartitioning());
        // check the iteration
        WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
        // check the solution set join and the delta
        PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
        // this is only true if the update function preserves the partitioning
        assertTrue(ssDelta instanceof DualInputPlanNode);
        DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
        assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
        assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
        assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
        // check the workset set join
        SingleInputPlanNode sumReducer = (SingleInputPlanNode) ssJoin.getInput1().getSource();
        SingleInputPlanNode gatherMapper = (SingleInputPlanNode) sumReducer.getInput().getSource();
        DualInputPlanNode edgeJoin = (DualInputPlanNode) gatherMapper.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
        // input1 is the workset
        assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput1().getShipStrategy());
        // input2 is the edges
        assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput2().getShipStrategy());
        assertTrue(edgeJoin.getInput2().getTempMode().isCached());
        assertEquals(new FieldList(0), edgeJoin.getInput2().getShipStrategyKeys());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple3ToEdgeMap(org.apache.flink.graph.utils.Tuple3ToEdgeMap) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)96 Test (org.junit.Test)86 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)81 Plan (org.apache.flink.api.common.Plan)76 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)67 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)65 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)36 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)31 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)19 Channel (org.apache.flink.optimizer.plan.Channel)14 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)13 FieldList (org.apache.flink.api.common.operators.util.FieldList)12 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)11 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)11 PlanNode (org.apache.flink.optimizer.plan.PlanNode)11 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)10 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)10 ShipStrategyType (org.apache.flink.runtime.operators.shipping.ShipStrategyType)10 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)8