Search in sources :

Example 31 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class IterationsCompilerTest method testSolutionSetDeltaDependsOnBroadcastVariable.

@Test
public void testSolutionSetDeltaDependsOnBroadcastVariable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> source = env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
        DataSet<Tuple2<Long, Long>> invariantInput = env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
        // iteration from here
        DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = source.iterateDelta(source, 1000, 1);
        DataSet<Tuple2<Long, Long>> result = invariantInput.map(new IdentityMapper<Tuple2<Long, Long>>()).withBroadcastSet(iter.getWorkset(), "bc data").join(iter.getSolutionSet()).where(0).equalTo(1).projectFirst(1).projectSecond(1);
        iter.closeWith(result.map(new IdentityMapper<Tuple2<Long, Long>>()), result).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        OptimizedPlan p = compileNoStats(env.createProgramPlan());
        // check that the JSON generator accepts this plan
        new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(p);
        // check that the JobGraphGenerator accepts the plan
        new JobGraphGenerator().compileJobGraph(p);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) PlanJSONDumpGenerator(org.apache.flink.optimizer.plandump.PlanJSONDumpGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 32 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class IterationsCompilerTest method testIterationNotPushingWorkOut.

@Test
public void testIterationNotPushingWorkOut() throws Exception {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());
        DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);
        // Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
        // to do the hash partitioning between the partial solution node and the join node
        // instead of pushing the partitioning out
        doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        assertEquals(1, op.getDataSinks().size());
        assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);
        BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
        // check that work has not been pushed out
        for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
            assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
        }
        assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 33 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class UnionBetweenDynamicAndStaticPathTest method testUnionStaticSecond.

@Test
public void testUnionStaticSecond() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Long> input1 = env.generateSequence(1, 10);
        DataSet<Long> input2 = env.generateSequence(1, 10);
        IterativeDataSet<Long> iteration = input1.iterate(10);
        DataSet<Long> iterResult = iteration.closeWith(iteration.union(iteration).union(input2.union(input2)));
        iterResult.output(new DiscardingOutputFormat<Long>());
        iterResult.output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        assertEquals(2, op.getDataSinks().size());
        BulkIterationPlanNode iterPlan = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
        SingleInputPlanNode noopNode = (SingleInputPlanNode) iterPlan.getRootOfStepFunction();
        BinaryUnionPlanNode mixedUnion = (BinaryUnionPlanNode) noopNode.getInput().getSource();
        NAryUnionPlanNode staticUnion = (NAryUnionPlanNode) mixedUnion.getInput1().getSource();
        NAryUnionPlanNode dynamicUnion = (NAryUnionPlanNode) mixedUnion.getInput2().getSource();
        assertTrue(mixedUnion.unionsStaticAndDynamicPath());
        assertFalse(mixedUnion.getInput1().isOnDynamicPath());
        assertTrue(mixedUnion.getInput2().isOnDynamicPath());
        assertTrue(mixedUnion.getInput1().getTempMode().isCached());
        assertEquals(0.5, iterPlan.getRelativeMemoryPerSubTask(), 0.0);
        assertEquals(0.5, mixedUnion.getInput1().getRelativeTempMemory(), 0.0);
        assertEquals(0.0, mixedUnion.getInput2().getRelativeTempMemory(), 0.0);
        for (Channel c : staticUnion.getInputs()) {
            assertFalse(c.isOnDynamicPath());
        }
        for (Channel c : dynamicUnion.getInputs()) {
            assertTrue(c.isOnDynamicPath());
        }
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) BinaryUnionPlanNode(org.apache.flink.optimizer.plan.BinaryUnionPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) Test(org.junit.Test)

Example 34 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class UnionBetweenDynamicAndStaticPathTest method testUnionStaticFirst.

@Test
public void testUnionStaticFirst() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Long> input1 = env.generateSequence(1, 10);
        DataSet<Long> input2 = env.generateSequence(1, 10);
        IterativeDataSet<Long> iteration = input1.iterate(10);
        DataSet<Long> result = iteration.closeWith(input2.union(input2).union(iteration.union(iteration)));
        result.output(new DiscardingOutputFormat<Long>());
        result.output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        assertEquals(2, op.getDataSinks().size());
        BulkIterationPlanNode iterPlan = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
        SingleInputPlanNode noopNode = (SingleInputPlanNode) iterPlan.getRootOfStepFunction();
        BinaryUnionPlanNode mixedUnion = (BinaryUnionPlanNode) noopNode.getInput().getSource();
        NAryUnionPlanNode staticUnion = (NAryUnionPlanNode) mixedUnion.getInput1().getSource();
        NAryUnionPlanNode dynamicUnion = (NAryUnionPlanNode) mixedUnion.getInput2().getSource();
        assertTrue(mixedUnion.unionsStaticAndDynamicPath());
        assertFalse(mixedUnion.getInput1().isOnDynamicPath());
        assertTrue(mixedUnion.getInput2().isOnDynamicPath());
        assertTrue(mixedUnion.getInput1().getTempMode().isCached());
        for (Channel c : staticUnion.getInputs()) {
            assertFalse(c.isOnDynamicPath());
        }
        for (Channel c : dynamicUnion.getInputs()) {
            assertTrue(c.isOnDynamicPath());
        }
        assertEquals(0.5, iterPlan.getRelativeMemoryPerSubTask(), 0.0);
        assertEquals(0.5, mixedUnion.getInput1().getRelativeTempMemory(), 0.0);
        assertEquals(0.0, mixedUnion.getInput2().getRelativeTempMemory(), 0.0);
        new JobGraphGenerator().compileJobGraph(op);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) BinaryUnionPlanNode(org.apache.flink.optimizer.plan.BinaryUnionPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) Test(org.junit.Test)

Example 35 with JobGraphGenerator

use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.

the class UnionPropertyPropagationTest method testUnion2.

@Test
public void testUnion2() {
    final int NUM_INPUTS = 4;
    // construct the plan it will be multiple flat maps, all unioned
    // and the "unioned" inputDataSet will be grouped
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> source = env.readTextFile(IN_FILE);
    DataSet<Tuple2<String, Integer>> lastUnion = source.flatMap(new DummyFlatMap());
    for (int i = 1; i < NUM_INPUTS; i++) {
        lastUnion = lastUnion.union(source.flatMap(new DummyFlatMap()));
    }
    DataSet<Tuple2<String, Integer>> result = lastUnion.groupBy(0).aggregate(Aggregations.SUM, 1);
    result.writeAsText(OUT_FILE);
    // return the plan
    Plan plan = env.createProgramPlan("Test union on new java-api");
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    // Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            /* Test on the union output connections
				 * It must be under the GroupOperator and the strategy should be forward
				 */
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
                final Channel inConn = ((SingleInputPlanNode) visitable).getInput();
                Assert.assertTrue("Union should just forward the Partitioning", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
                Assert.assertTrue("Union Node should be under Group operator", inConn.getSource() instanceof NAryUnionPlanNode);
            }
            /* Test on the union input connections
				 * Must be NUM_INPUTS input connections, all FlatMapOperators with a own partitioning strategy(propably hash)
				 */
            if (visitable instanceof NAryUnionPlanNode) {
                int numberInputs = 0;
                for (Iterator<Channel> inputs = visitable.getInputs().iterator(); inputs.hasNext(); numberInputs++) {
                    final Channel inConn = inputs.next();
                    PlanNode inNode = inConn.getSource();
                    Assert.assertTrue("Input of Union should be FlatMapOperators", inNode.getProgramOperator() instanceof FlatMapOperatorBase);
                    Assert.assertTrue("Shipment strategy under union should partition the data", inConn.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
                }
                Assert.assertTrue("NAryUnion should have " + NUM_INPUTS + " inputs", numberInputs == NUM_INPUTS);
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Iterator(java.util.Iterator) Test(org.junit.Test)

Aggregations

OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)55 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)55 Test (org.junit.Test)49 Plan (org.apache.flink.api.common.Plan)47 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)35 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)19 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)17 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)17 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)11 Channel (org.apache.flink.optimizer.plan.Channel)9 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)8 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)6 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)6 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)6 IdentityGroupReducer (org.apache.flink.optimizer.testfunctions.IdentityGroupReducer)5 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)5 DataStatistics (org.apache.flink.optimizer.DataStatistics)4 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)4 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)3 FieldList (org.apache.flink.api.common.operators.util.FieldList)3