Search in sources :

Example 36 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class GroupReduceCompilationTest method testGroupedReduceWithFieldPositionKeyNonCombinable.

@Test
public void testGroupedReduceWithFieldPositionKeyNonCombinable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(1).reduceGroup(new RichGroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {

            public void reduce(Iterable<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(1), reduceNode.getKeys(0));
        assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 37 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class GroupReduceCompilationTest method testAllReduceWithCombiner.

@Test
public void testAllReduceWithCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
        GroupReduceOperator<Long, Long> reduced = data.reduceGroup(new CombineReducer2()).name("reducer");
        reduced.setCombinable(true);
        reduced.output(new DiscardingOutputFormat<Long>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE_COMBINE, combineNode.getDriverStrategy());
        // check parallelism
        assertEquals(8, sourceNode.getParallelism());
        assertEquals(8, combineNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 38 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class GroupReduceCompilationTest method testAllGroupReduceNoCombiner.

@Test
public void testAllGroupReduceNoCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
        data.reduceGroup(new RichGroupReduceFunction<Double, Double>() {

            public void reduce(Iterable<Double> values, Collector<Double> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Double>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // the all-reduce has no combiner, when the parallelism of the input is one
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that reduce has the right strategy
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check parallelism
        assertEquals(1, sourceNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 39 with DiscardingOutputFormat

use of org.apache.flink.api.java.io.DiscardingOutputFormat in project flink by apache.

the class WordCountCompilerTest method checkWordCount.

private void checkWordCount(boolean estimates) {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    // get input data
    DataSet<String> lines = env.readTextFile(IN_FILE).name("Input Lines");
    lines.map(new MapFunction<String, Tuple2<String, Integer>>() {

        private static final long serialVersionUID = -3952739820618875030L;

        @Override
        public Tuple2<String, Integer> map(String v) throws Exception {
            return new Tuple2<>(v, 1);
        }
    }).name("Tokenize Lines").groupBy(0).sum(1).name("Count Words").output(new DiscardingOutputFormat<Tuple2<String, Integer>>()).name("Word Counts");
    // get the plan and compile it
    Plan p = env.createProgramPlan();
    p.setExecutionConfig(new ExecutionConfig());
    OptimizedPlan plan;
    if (estimates) {
        GenericDataSourceBase<?, ?> source = getContractResolver(p).getNode("Input Lines");
        setSourceStatistics(source, 1024 * 1024 * 1024 * 1024L, 24f);
        plan = compileWithStats(p);
    } else {
        plan = compileNoStats(p);
    }
    // get the optimizer plan nodes
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(plan);
    SinkPlanNode sink = resolver.getNode("Word Counts");
    SingleInputPlanNode reducer = resolver.getNode("Count Words");
    SingleInputPlanNode mapper = resolver.getNode("Tokenize Lines");
    // verify the strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, mapper.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    Channel c = reducer.getInput();
    Assert.assertEquals(LocalStrategy.COMBININGSORT, c.getLocalStrategy());
    FieldList l = new FieldList(0);
    Assert.assertEquals(l, c.getShipStrategyKeys());
    Assert.assertEquals(l, c.getLocalStrategyKeys());
    Assert.assertTrue(Arrays.equals(c.getLocalStrategySortOrder(), reducer.getSortOrders(0)));
    // check the combiner
    SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getPredecessor();
    Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy());
    Assert.assertEquals(l, combiner.getKeys(0));
    Assert.assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode)

Aggregations

ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)39 Test (org.junit.Test)35 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)33 Plan (org.apache.flink.api.common.Plan)28 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)28 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)27 FieldList (org.apache.flink.api.common.operators.util.FieldList)21 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)17 DataSet (org.apache.flink.api.java.DataSet)11 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)11 Graph (org.apache.flink.graph.Graph)10 Channel (org.apache.flink.optimizer.plan.Channel)8 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 NullValue (org.apache.flink.types.NullValue)7 MapFunction (org.apache.flink.api.common.functions.MapFunction)6 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)6 LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)5 Edge (org.apache.flink.graph.Edge)5