Search in sources :

Example 21 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class CachedMatchStrategyCompilerTest method testCorrectChoosing.

/**
	 * This test simulates a join of a big left side with a small right side inside of an iteration, where the small side is on a static path.
	 * Currently the best execution plan is a HYBRIDHASH_BUILD_SECOND_CACHED, where the small side is hashed and cached.
	 * This test also makes sure that all relevant plans are correctly enumerated by the optimizer.
	 */
@Test
public void testCorrectChoosing() {
    try {
        Plan plan = getTestPlanRightStatic("");
        SourceCollectorVisitor sourceCollector = new SourceCollectorVisitor();
        plan.accept(sourceCollector);
        for (GenericDataSourceBase<?, ?> s : sourceCollector.getSources()) {
            if (s.getName().equals("bigFile")) {
                this.setSourceStatistics(s, 10000000, 1000);
            } else if (s.getName().equals("smallFile")) {
                this.setSourceStatistics(s, 100, 100);
            }
        }
        OptimizedPlan oPlan = compileNoStats(plan);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
        DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
        // verify correct join strategy
        assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy());
        assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
        assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test errored: " + e.getMessage());
    }
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 22 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks.

/**
	 * 
	 * <pre>

	 *              (SINK A)
	 *                  |    (SINK B)    (SINK C)
	 *                CROSS    /          /
	 *               /     \   |  +------+
	 *              /       \  | /
	 *          REDUCE      MATCH2
	 *             |    +---/    \
	 *              \  /          |
	 *               MAP          |
	 *                |           |
	 *             COGROUP      MATCH1
	 *             /     \     /     \
	 *        (SRC A)    (SRC B)    (SRC C)
	 * </pre>
	 */
@Test
public void testBranchingWithMultipleDataSinks() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
        DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
        DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
        DataSet<Tuple2<Long, Long>> mapped = sourceA.coGroup(sourceB).where(0).equalTo(1).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {

            @Override
            public void coGroup(Iterable<Tuple2<Long, Long>> first, Iterable<Tuple2<Long, Long>> second, Collector<Tuple2<Long, Long>> out) {
            }
        }).map(new IdentityMapper<Tuple2<Long, Long>>());
        DataSet<Tuple2<Long, Long>> joined = sourceB.join(sourceC).where(0).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
        DataSet<Tuple2<Long, Long>> joined2 = mapped.join(joined).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
        DataSet<Tuple2<Long, Long>> reduced = mapped.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>());
        reduced.cross(joined2).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
        joined2.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        joined2.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan plan = env.createProgramPlan();
        OptimizedPlan oPlan = compileNoStats(plan);
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DummyCoGroupFunction(org.apache.flink.optimizer.testfunctions.DummyCoGroupFunction) CoGroupFunction(org.apache.flink.api.common.functions.CoGroupFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Collector(org.apache.flink.util.Collector) Test(org.junit.Test)

Example 23 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks2.

/**
	 * 
	 * <pre>
	 *                (SRC A)  
	 *                   |
	 *                (MAP A)
	 *             /         \   
	 *          (MAP B)      (MAP C)
	 *           /           /     \
	 *        (SINK A)    (SINK B)  (SINK C)
	 * </pre>
	 */
@SuppressWarnings("unchecked")
@Test
public void testBranchingWithMultipleDataSinks2() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Long> source = env.generateSequence(1, 10000);
        DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
        DataSet<Long> mappedB = mappedA.map(new IdentityMapper<Long>());
        DataSet<Long> mappedC = mappedA.map(new IdentityMapper<Long>());
        mappedB.output(new DiscardingOutputFormat<Long>());
        mappedC.output(new DiscardingOutputFormat<Long>());
        mappedC.output(new DiscardingOutputFormat<Long>());
        Plan plan = env.createProgramPlan();
        Set<Operator<?>> sinks = new HashSet<Operator<?>>(plan.getDataSinks());
        OptimizedPlan oPlan = compileNoStats(plan);
        // ---------- check the optimizer plan ----------
        // number of sinks
        assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
        // remove matching sinks to check relation
        for (SinkPlanNode sink : oPlan.getDataSinks()) {
            assertTrue(sinks.remove(sink.getProgramOperator()));
        }
        assertTrue(sinks.isEmpty());
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 24 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class BranchingPlansCompilerTest method testCostComputationWithMultipleDataSinks.

@Test
public void testCostComputationWithMultipleDataSinks() {
    final int SINKS = 5;
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Long> source = env.generateSequence(1, 10000);
        DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
        DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());
        for (int sink = 0; sink < SINKS; sink++) {
            mappedA.output(new DiscardingOutputFormat<Long>());
            mappedC.output(new DiscardingOutputFormat<Long>());
        }
        Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
        OptimizedPlan oPlan = compileNoStats(plan);
        new JobGraphGenerator().compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JoinHint(org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 25 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class BranchingPlansCompilerTest method testBranchingUnion.

@Test
public void testBranchingUnion() {
    try {
        // construct the plan
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        DataSet<Long> source1 = env.generateSequence(0, 1);
        DataSet<Long> source2 = env.generateSequence(0, 1);
        DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
        DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");
        DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
        DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");
        DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");
        DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");
        DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3).join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 2");
        join2.output(new DiscardingOutputFormat<Long>());
        Plan plan = env.createProgramPlan();
        OptimizedPlan oPlan = compileNoStats(plan);
        JobGraphGenerator jobGen = new JobGraphGenerator();
        //Compile plan to verify that no error is thrown
        jobGen.compileJobGraph(oPlan);
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) IdentityJoiner(org.apache.flink.optimizer.testfunctions.IdentityJoiner) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)55 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)55 Test (org.junit.Test)49 Plan (org.apache.flink.api.common.Plan)47 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)35 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)19 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)17 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)11 Channel (org.apache.flink.optimizer.plan.Channel)9 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)8 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)6 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)6 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)6 IdentityGroupReducer (org.apache.flink.optimizer.testfunctions.IdentityGroupReducer)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 DataStatistics (org.apache.flink.optimizer.DataStatistics)4 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)4 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)3 FieldList (org.apache.flink.api.common.operators.util.FieldList)3