Search in sources :

Example 6 with RichGroupReduceFunction

use of org.apache.flink.api.common.functions.RichGroupReduceFunction in project flink by apache.

the class GroupReduceCompilationTest method testAllGroupReduceNoCombiner.

@Test
public void testAllGroupReduceNoCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
        data.reduceGroup(new RichGroupReduceFunction<Double, Double>() {

            public void reduce(Iterable<Double> values, Collector<Double> out) {
            }
        }).name("reducer").output(new DiscardingOutputFormat<Double>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // the all-reduce has no combiner, when the parallelism of the input is one
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // check wiring
        assertEquals(sourceNode, reduceNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that reduce has the right strategy
        assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
        // check parallelism
        assertEquals(1, sourceNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Collector(org.apache.flink.util.Collector) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 7 with RichGroupReduceFunction

use of org.apache.flink.api.common.functions.RichGroupReduceFunction in project flink by apache.

the class GroupReduceOperatorTest method testGroupReduceCollectionWithRuntimeContext.

@Test
public void testGroupReduceCollectionWithRuntimeContext() {
    try {
        final String taskName = "Test Task";
        final AtomicBoolean opened = new AtomicBoolean();
        final AtomicBoolean closed = new AtomicBoolean();
        final RichGroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>> reducer = new RichGroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>() {

            @Override
            public void reduce(Iterable<Tuple2<String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception {
                Iterator<Tuple2<String, Integer>> input = values.iterator();
                Tuple2<String, Integer> result = input.next();
                int sum = result.f1;
                while (input.hasNext()) {
                    Tuple2<String, Integer> next = input.next();
                    sum += next.f1;
                }
                result.f1 = sum;
                out.collect(result);
            }

            @Override
            public void open(Configuration parameters) throws Exception {
                opened.set(true);
                RuntimeContext ctx = getRuntimeContext();
                assertEquals(0, ctx.getIndexOfThisSubtask());
                assertEquals(1, ctx.getNumberOfParallelSubtasks());
                assertEquals(taskName, ctx.getTaskName());
            }

            @Override
            public void close() throws Exception {
                closed.set(true);
            }
        };
        GroupReduceOperatorBase<Tuple2<String, Integer>, Tuple2<String, Integer>, GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>> op = new GroupReduceOperatorBase<>(reducer, new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE), new int[] { 0 }, "TestReducer");
        List<Tuple2<String, Integer>> input = new ArrayList<>(asList(new Tuple2<>("foo", 1), new Tuple2<>("foo", 3), new Tuple2<>("bar", 2), new Tuple2<>("bar", 4)));
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), new HashMap<>(), UnregisteredMetricsGroup.createOperatorMetricGroup()), executionConfig);
        Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<>(resultMutableSafe);
        Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<>(resultRegular);
        Set<Tuple2<String, Integer>> expectedResult = new HashSet<>(asList(new Tuple2<>("foo", 4), new Tuple2<>("bar", 6)));
        assertEquals(expectedResult, resultSetMutableSafe);
        assertEquals(expectedResult, resultSetRegular);
        assertTrue(opened.get());
        assertTrue(closed.get());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) Collector(org.apache.flink.util.Collector) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) HashSet(java.util.HashSet) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) Test(org.junit.Test)

Aggregations

RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)7 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)4 Collector (org.apache.flink.util.Collector)4 Test (org.junit.Test)4 Plan (org.apache.flink.api.common.Plan)3 UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)3 GroupReduceOperatorBase (org.apache.flink.api.common.operators.base.GroupReduceOperatorBase)3 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)3 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)3 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)3 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)3 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)3 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)3 Keys (org.apache.flink.api.common.operators.Keys)2 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)2 FieldList (org.apache.flink.api.common.operators.util.FieldList)2 AggregationFunction (org.apache.flink.api.java.aggregation.AggregationFunction)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1