Search in sources :

Example 11 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class IterationsCompilerTest method testIterationPushingWorkOut.

@Test
public void testIterationPushingWorkOut() throws Exception {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
        DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);
        // we do two join operations with input1 which is the partial solution
        // it is cheaper to push the partitioning out so that the feedback channel and the
        // initial input do the partitioning
        doBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        assertEquals(1, op.getDataSinks().size());
        assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);
        BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
        // check that work has been pushed out
        for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
            assertEquals(ShipStrategyType.FORWARD, c.getShipStrategy());
        }
        // the end of the step function has to produce the necessary properties
        for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
            assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        }
        assertEquals(ShipStrategyType.PARTITION_HASH, bipn.getInput().getShipStrategy());
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 12 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class IterationsCompilerTest method testTwoIterationsDirectlyChained.

@Test
public void testTwoIterationsDirectlyChained() throws Exception {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
        DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
        depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        assertEquals(1, op.getDataSinks().size());
        assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
        WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
        BulkIterationPlanNode bipn = (BulkIterationPlanNode) wipn.getInput1().getSource();
        // the hash partitioning has been pushed out of the delta iteration into the bulk iteration
        assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());
        // since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning
        for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
            assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        }
        assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
        assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
        assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
        assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) Test(org.junit.Test)

Example 13 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class IterationsCompilerTest method testTwoWorksetIterationsDirectlyChained.

@Test
public void testTwoWorksetIterationsDirectlyChained() throws Exception {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
        DataSet<Tuple2<Long, Long>> firstResult = doDeltaIteration(verticesWithInitialId, edges);
        DataSet<Tuple2<Long, Long>> secondResult = doDeltaIteration(firstResult, edges);
        secondResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        assertEquals(1, op.getDataSinks().size());
        assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
        WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());
        assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
        assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
        assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
        assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 14 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class NestedIterationsTest method testBulkIterationInClosure.

@Test
public void testBulkIterationInClosure() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Long> data1 = env.generateSequence(1, 100);
        DataSet<Long> data2 = env.generateSequence(1, 100);
        IterativeDataSet<Long> firstIteration = data1.iterate(100);
        DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
        IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
        DataSet<Long> joined = mainIteration.join(firstResult).where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()).with(new DummyFlatJoinFunction<Long>());
        DataSet<Long> mainResult = mainIteration.closeWith(joined);
        mainResult.output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        // optimizer should be able to translate this
        OptimizedPlan op = compileNoStats(p);
        // job graph generator should be able to translate this
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IdentityKeyExtractor(org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Test(org.junit.Test)

Example 15 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class ParallelismChangeTest method checkPropertyHandlingWithTwoInputs.

/**
	 * Checks that re-partitioning happens when the inputs of a two-input contract have different parallelisms.
	 * 
	 * Test Plan:
	 * <pre>
	 * 
	 * (source) -> reduce -\
	 *                      Match -> (sink)
	 * (source) -> reduce -/
	 * 
	 * </pre>
	 * 
	 */
@Test
public void checkPropertyHandlingWithTwoInputs() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(5);
    DataSet<Long> set2 = env.generateSequence(0, 1).setParallelism(7);
    DataSet<Long> reduce1 = set1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(5);
    DataSet<Long> reduce2 = set2.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(7);
    reduce1.join(reduce2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).setParallelism(5).output(new DiscardingOutputFormat<Long>()).setParallelism(5);
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    //Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof DualInputPlanNode) {
                DualInputPlanNode node = (DualInputPlanNode) visitable;
                Channel c1 = node.getInput1();
                Channel c2 = node.getInput2();
                Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.FORWARD, c1.getShipStrategy());
                Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.PARTITION_HASH, c2.getShipStrategy());
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityJoiner(org.apache.flink.optimizer.testfunctions.IdentityJoiner) Test(org.junit.Test)

Aggregations

OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)55 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)55 Test (org.junit.Test)49 Plan (org.apache.flink.api.common.Plan)47 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)35 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)19 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)17 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)11 Channel (org.apache.flink.optimizer.plan.Channel)9 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)8 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)6 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)6 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)6 IdentityGroupReducer (org.apache.flink.optimizer.testfunctions.IdentityGroupReducer)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 DataStatistics (org.apache.flink.optimizer.DataStatistics)4 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)4 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)3 FieldList (org.apache.flink.api.common.operators.util.FieldList)3