Search in sources :

Example 1 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields2.

@Test
public void testRangePartitionOperatorPreservesFields2() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
        PartitionOperator<Tuple2<Long, Long>> rangePartitioned = data.partitionByRange(1);
        rangePartitioned.groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        data.groupBy(0).aggregate(Aggregations.SUM, 1).map(new MapFunction<Tuple2<Long, Long>, Long>() {

            @Override
            public Long map(Tuple2<Long, Long> value) throws Exception {
                return value.f1;
            }
        }).output(new DiscardingOutputFormat<Long>());
        rangePartitioned.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) throws Exception {
                return value.f0 % 2 == 0;
            }
        }).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
        SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
        SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
        assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
        SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
        List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
        assertEquals(3, sourceOutgoingChannels.size());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(2).getShipStrategy());
        assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
        assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(1).getDataExchangeMode());
        assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(2).getDataExchangeMode());
        List<Channel> partitionOutputChannels = partitionNode.getOutgoingChannels();
        assertEquals(2, partitionOutputChannels.size());
        assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(0).getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(1).getShipStrategy());
        assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(0).getDataExchangeMode());
        assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(1).getDataExchangeMode());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) Channel(org.apache.flink.optimizer.plan.Channel) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) IdentityGroupReducerCombinable(org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 2 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class ReduceCompilationTest method testGroupedReduceWithSelectorFunctionKey.

@Test
public void testGroupedReduceWithSelectorFunctionKey() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {

            public String getKey(Tuple2<String, Double> value) {
                return value.f0;
            }
        }).reduce(new RichReduceFunction<Tuple2<String, Double>>() {

            @Override
            public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
                return null;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // get the key extractors and projectors
        SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
        SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, keyExtractor.getInput().getSource());
        assertEquals(keyProjector, sinkNode.getInput().getSource());
        // check the strategies
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0), reduceNode.getKeys(0));
        assertEquals(new FieldList(0), combineNode.getKeys(0));
        assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, keyExtractor.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, keyProjector.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 3 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class ReduceCompilationTest method testAllReduceWithCombiner.

@Test
public void testAllReduceWithCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
        data.reduce(new RichReduceFunction<Long>() {

            @Override
            public Long reduce(Long value1, Long value2) {
                return value1 + value2;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Long>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.ALL_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.ALL_REDUCE, combineNode.getDriverStrategy());
        // check parallelism
        assertEquals(8, sourceNode.getParallelism());
        assertEquals(8, combineNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 4 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class UnionClosedBranchingTest method testUnionClosedBranchingTest.

@Test
public void testUnionClosedBranchingTest() throws Exception {
    // -----------------------------------------------------------------------------------------
    // Build test program
    // -----------------------------------------------------------------------------------------
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setExecutionMode(executionMode);
    env.setParallelism(4);
    DataSet<Tuple1<Integer>> src1 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
    DataSet<Tuple1<Integer>> src2 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
    DataSet<Tuple1<Integer>> union = src1.union(src2);
    DataSet<Tuple2<Integer, Integer>> join = union.join(union).where(0).equalTo(0).projectFirst(0).projectSecond(0);
    join.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
    // -----------------------------------------------------------------------------------------
    // Verify optimized plan
    // -----------------------------------------------------------------------------------------
    OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
    SinkPlanNode sinkNode = optimizedPlan.getDataSinks().iterator().next();
    DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
    // Verify that the compiler correctly sets the expected data exchange modes.
    for (Channel channel : joinNode.getInputs()) {
        assertEquals("Unexpected data exchange mode between union and join node.", unionToJoin, channel.getDataExchangeMode());
        assertEquals("Unexpected ship strategy between union and join node.", unionToJoinStrategy, channel.getShipStrategy());
    }
    for (SourcePlanNode src : optimizedPlan.getDataSources()) {
        for (Channel channel : src.getOutgoingChannels()) {
            assertEquals("Unexpected data exchange mode between source and union node.", sourceToUnion, channel.getDataExchangeMode());
            assertEquals("Unexpected ship strategy between source and union node.", sourceToUnionStrategy, channel.getShipStrategy());
        }
    }
    // -----------------------------------------------------------------------------------------
    // Verify generated JobGraph
    // -----------------------------------------------------------------------------------------
    JobGraphGenerator jgg = new JobGraphGenerator();
    JobGraph jobGraph = jgg.compileJobGraph(optimizedPlan);
    List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    // Sanity check for the test setup
    assertEquals("Unexpected number of vertices created.", 4, vertices.size());
    // Verify all sources
    JobVertex[] sources = new JobVertex[] { vertices.get(0), vertices.get(1) };
    for (JobVertex src : sources) {
        // Sanity check
        assertTrue("Unexpected vertex type. Test setup is broken.", src.isInputVertex());
        // The union is not translated to an extra union task, but the join uses a union
        // input gate to read multiple inputs. The source create a single result per consumer.
        assertEquals("Unexpected number of created results.", 2, src.getNumberOfProducedIntermediateDataSets());
        for (IntermediateDataSet dataSet : src.getProducedDataSets()) {
            ResultPartitionType dsType = dataSet.getResultType();
            // Ensure batch exchange unless PIPELINED_FORCE is enabled.
            if (!executionMode.equals(ExecutionMode.PIPELINED_FORCED)) {
                assertTrue("Expected batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
            } else {
                assertFalse("Expected non-batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
            }
        }
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) Channel(org.apache.flink.optimizer.plan.Channel) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Test(org.junit.Test)

Example 5 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class GroupReduceCompilationTest method testGroupedReduceWithSelectorFunctionKeyNoncombinable.

@Test
public void testGroupedReduceWithSelectorFunctionKeyNoncombinable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {

            public String getKey(Tuple2<String, Double> value) {
                return value.f0;
            }
        }).reduceGroup(new RichGroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {

            public void reduce(Iterable<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the key extractors and projectors
        SingleInputPlanNode keyExtractor = (SingleInputPlanNode) reduceNode.getInput().getSource();
        SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, keyExtractor.getInput().getSource());
        assertEquals(keyProjector, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0), reduceNode.getKeys(0));
        assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, keyExtractor.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, keyProjector.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)61 Test (org.junit.Test)55 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)51 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)48 Plan (org.apache.flink.api.common.Plan)45 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)45 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)33 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)31 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)30 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)27 FieldList (org.apache.flink.api.common.operators.util.FieldList)18 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)18 Channel (org.apache.flink.optimizer.plan.Channel)17 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)11 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)11 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)8 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)7 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)7 FeedbackPropertiesMeetRequirementsReport (org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport)7