Search in sources :

Example 81 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class DistinctCompilationTest method testDistinctWithCombineHint.

@Test
public void testDistinctWithCombineHint() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.distinct().setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0, 1), reduceNode.getKeys(0));
        assertEquals(new FieldList(0, 1), combineNode.getKeys(0));
        assertEquals(new FieldList(0, 1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 82 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks.

/**
	 * 
	 * <pre>

	 *              (SINK A)
	 *                  |    (SINK B)    (SINK C)
	 *                CROSS    /          /
	 *               /     \   |  +------+
	 *              /       \  | /
	 *          REDUCE      MATCH2
	 *             |    +---/    \
	 *              \  /          |
	 *               MAP          |
	 *                |           |
	 *             COGROUP      MATCH1
	 *             /     \     /     \
	 *        (SRC A)    (SRC B)    (SRC C)
	 * </pre>
	 */
@Test
public void testBranchingWithMultipleDataSinks() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
        DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
        DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
        DataSet<Tuple2<Long, Long>> mapped = sourceA.coGroup(sourceB).where(0).equalTo(1).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

            @Override
            public void coGroup(Iterable<Tuple2<Long, Long>> first, Iterable<Tuple2<Long, Long>> second, Collector<Tuple2<Long, Long>> out) {
            }
        }).map(new IdentityMapper<Tuple2<Long, Long>>());
        DataSet<Tuple2<Long, Long>> joined = sourceB.join(sourceC).where(0).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
        DataSet<Tuple2<Long, Long>> joined2 = mapped.join(joined).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
        DataSet<Tuple2<Long, Long>> reduced = mapped.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>());
        reduced.cross(joined2).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
        joined2.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        joined2.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan plan = env.createProgramPlan();
        OptimizedPlan oPlan = compileNoStats(plan);
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DummyCoGroupFunction(org.apache.flink.optimizer.testfunctions.DummyCoGroupFunction) CoGroupFunction(org.apache.flink.api.common.functions.CoGroupFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Collector(org.apache.flink.util.Collector) Test(org.junit.Test)

Example 83 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks2.

/**
	 * 
	 * <pre>
	 *                (SRC A)  
	 *                   |
	 *                (MAP A)
	 *             /         \   
	 *          (MAP B)      (MAP C)
	 *           /           /     \
	 *        (SINK A)    (SINK B)  (SINK C)
	 * </pre>
	 */
@SuppressWarnings("unchecked")
@Test
public void testBranchingWithMultipleDataSinks2() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Long> source = env.generateSequence(1, 10000);
        DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
        DataSet<Long> mappedB = mappedA.map(new IdentityMapper<Long>());
        DataSet<Long> mappedC = mappedA.map(new IdentityMapper<Long>());
        mappedB.output(new DiscardingOutputFormat<Long>());
        mappedC.output(new DiscardingOutputFormat<Long>());
        mappedC.output(new DiscardingOutputFormat<Long>());
        Plan plan = env.createProgramPlan();
        Set<Operator<?>> sinks = new HashSet<Operator<?>>(plan.getDataSinks());
        OptimizedPlan oPlan = compileNoStats(plan);
        // ---------- check the optimizer plan ----------
        // number of sinks
        assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
        // remove matching sinks to check relation
        for (SinkPlanNode sink : oPlan.getDataSinks()) {
            assertTrue(sinks.remove(sink.getProgramOperator()));
        }
        assertTrue(sinks.isEmpty());
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 84 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class BranchingPlansCompilerTest method testCostComputationWithMultipleDataSinks.

@Test
public void testCostComputationWithMultipleDataSinks() {
    final int SINKS = 5;
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Long> source = env.generateSequence(1, 10000);
        DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
        DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());
        for (int sink = 0; sink < SINKS; sink++) {
            mappedA.output(new DiscardingOutputFormat<Long>());
            mappedC.output(new DiscardingOutputFormat<Long>());
        }
        Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
        OptimizedPlan oPlan = compileNoStats(plan);
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JoinHint(org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 85 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class BranchingPlansCompilerTest method testBranchingUnion.

@Test
public void testBranchingUnion() {
    try {
        // construct the plan
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Long> source1 = env.generateSequence(0, 1);
        DataSet<Long> source2 = env.generateSequence(0, 1);
        DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
        DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");
        DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
        DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");
        DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");
        DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");
        DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3).join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 2");
        join2.output(new DiscardingOutputFormat<Long>());
        Plan plan = env.createProgramPlan();
        OptimizedPlan oPlan = compileNoStats(plan);
        JobGraphGenerator jobGen = new JobGraphGenerator();
        //Compile plan to verify that no error is thrown
        jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) IdentityJoiner(org.apache.flink.optimizer.testfunctions.IdentityJoiner) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)221 Test (org.junit.Test)197 Plan (org.apache.flink.api.common.Plan)192 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)183 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)146 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)91 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)83 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)82 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)55 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)54 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)48 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)33 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)27 FieldList (org.apache.flink.api.common.operators.util.FieldList)27 Channel (org.apache.flink.optimizer.plan.Channel)26 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)25 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)25 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)25 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)20 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)16