Search in sources :

Example 6 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class JobGraphGeneratorTest method testResourcesForChainedOperators.

/**
	 * Verifies that the resources are merged correctly for chained operators when
	 * generating job graph
	 */
@Test
public void testResourcesForChainedOperators() throws Exception {
    ResourceSpec resource1 = new ResourceSpec(0.1, 100);
    ResourceSpec resource2 = new ResourceSpec(0.2, 200);
    ResourceSpec resource3 = new ResourceSpec(0.3, 300);
    ResourceSpec resource4 = new ResourceSpec(0.4, 400);
    ResourceSpec resource5 = new ResourceSpec(0.5, 500);
    ResourceSpec resource6 = new ResourceSpec(0.6, 600);
    ResourceSpec resource7 = new ResourceSpec(0.7, 700);
    Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class);
    opMethod.setAccessible(true);
    Method sinkMethod = DataSink.class.getDeclaredMethod("setResources", ResourceSpec.class);
    sinkMethod.setAccessible(true);
    MapFunction<Long, Long> mapFunction = new MapFunction<Long, Long>() {

        @Override
        public Long map(Long value) throws Exception {
            return value;
        }
    };
    FilterFunction<Long> filterFunction = new FilterFunction<Long>() {

        @Override
        public boolean filter(Long value) throws Exception {
            return false;
        }
    };
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Long> input = env.fromElements(1L, 2L, 3L);
    opMethod.invoke(input, resource1);
    DataSet<Long> map1 = input.map(mapFunction);
    opMethod.invoke(map1, resource2);
    // CHAIN(Source -> Map -> Filter)
    DataSet<Long> filter1 = map1.filter(filterFunction);
    opMethod.invoke(filter1, resource3);
    IterativeDataSet<Long> startOfIteration = filter1.iterate(10);
    opMethod.invoke(startOfIteration, resource4);
    DataSet<Long> map2 = startOfIteration.map(mapFunction);
    opMethod.invoke(map2, resource5);
    // CHAIN(Map -> Filter)
    DataSet<Long> feedback = map2.filter(filterFunction);
    opMethod.invoke(feedback, resource6);
    DataSink<Long> sink = startOfIteration.closeWith(feedback).output(new DiscardingOutputFormat<Long>());
    sinkMethod.invoke(sink, resource7);
    Plan plan = env.createProgramPlan();
    Optimizer pc = new Optimizer(new Configuration());
    OptimizedPlan op = pc.compile(plan);
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(op);
    JobVertex sourceMapFilterVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(0);
    JobVertex iterationHeadVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    JobVertex feedbackVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(2);
    JobVertex sinkVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(3);
    JobVertex iterationSyncVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(4);
    assertTrue(sourceMapFilterVertex.getMinResources().equals(resource1.merge(resource2).merge(resource3)));
    assertTrue(iterationHeadVertex.getPreferredResources().equals(resource4));
    assertTrue(feedbackVertex.getMinResources().equals(resource5.merge(resource6)));
    assertTrue(sinkVertex.getPreferredResources().equals(resource7));
    assertTrue(iterationSyncVertex.getMinResources().equals(resource4));
}
Also used : FilterFunction(org.apache.flink.api.common.functions.FilterFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) Optimizer(org.apache.flink.optimizer.Optimizer) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) Method(java.lang.reflect.Method) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Test(org.junit.Test)

Example 7 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class TempInIterationsTest method testTempInIterationTest.

/*
	 * Tests whether temps barriers are correctly set in within iterations
	 */
@Test
public void testTempInIterationTest() throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);
    DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 1, 0);
    DataSet<Tuple2<Long, Long>> update = iteration.getWorkset().join(iteration.getSolutionSet()).where(0).equalTo(0).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
    iteration.closeWith(update, update).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jg = jgg.compileJobGraph(oPlan);
    boolean solutionSetUpdateChecked = false;
    for (JobVertex v : jg.getVertices()) {
        if (v.getName().equals("SolutionSet Delta")) {
            // check if input of solution set delta is temped
            TaskConfig tc = new TaskConfig(v.getConfiguration());
            assertTrue(tc.isInputAsynchronouslyMaterialized(0));
            solutionSetUpdateChecked = true;
        }
    }
    assertTrue(solutionSetUpdateChecked);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) Optimizer(org.apache.flink.optimizer.Optimizer) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 8 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class DistinctAndGroupingOptimizerTest method testDistinctPreservesPartitioningOfDistinctFields.

@Test
public void testDistinctPreservesPartitioningOfDistinctFields() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(4);
        @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)).map(new IdentityMapper<Tuple2<Long, Long>>()).setParallelism(4);
        data.distinct(0).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        // reducer can be forward, reuses partitioning from distinct
        assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
        // distinct reducer is partitioned
        assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 9 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class GroupReduceCompilationTest method testGroupedReduceWithFieldPositionKeyCombinable.

@Test
public void testGroupedReduceWithFieldPositionKeyCombinable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        GroupReduceOperator<Tuple2<String, Double>, Tuple2<String, Double>> reduced = data.groupBy(1).reduceGroup(new CombineReducer()).name("reducer");
        reduced.setCombinable(true);
        reduced.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(1), reduceNode.getKeys(0));
        assertEquals(new FieldList(1), combineNode.getKeys(0));
        assertEquals(new FieldList(1), combineNode.getKeys(1));
        assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 10 with OptimizedPlan

use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.

the class GroupReduceCompilationTest method testGroupedReduceWithSelectorFunctionKeyCombinable.

@Test
public void testGroupedReduceWithSelectorFunctionKeyCombinable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        GroupReduceOperator<Tuple2<String, Double>, Tuple2<String, Double>> reduced = data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {

            public String getKey(Tuple2<String, Double> value) {
                return value.f0;
            }
        }).reduceGroup(new CombineReducer()).name("reducer");
        reduced.setCombinable(true);
        reduced.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // get the key extractors and projectors
        SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
        SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, keyExtractor.getInput().getSource());
        assertEquals(keyProjector, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0), reduceNode.getKeys(0));
        assertEquals(new FieldList(0), combineNode.getKeys(0));
        assertEquals(new FieldList(0), combineNode.getKeys(1));
        assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, keyExtractor.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, keyProjector.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)221 Test (org.junit.Test)197 Plan (org.apache.flink.api.common.Plan)192 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)183 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)146 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)91 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)83 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)82 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)55 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)54 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)48 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)33 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)27 FieldList (org.apache.flink.api.common.operators.util.FieldList)27 Channel (org.apache.flink.optimizer.plan.Channel)26 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)25 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)25 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)25 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)20 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)16