Search in sources :

Example 1 with RichReduceFunction

use of org.apache.flink.api.common.functions.RichReduceFunction in project flink by apache.

the class ReduceCompilationTest method testAllReduceWithCombiner.

@Test
public void testAllReduceWithCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
        data.reduce(new RichReduceFunction<Long>() {

            @Override
            public Long reduce(Long value1, Long value2) {
                return value1 + value2;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Long>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.ALL_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.ALL_REDUCE, combineNode.getDriverStrategy());
        // check parallelism
        assertEquals(8, sourceNode.getParallelism());
        assertEquals(8, combineNode.getParallelism());
        assertEquals(1, reduceNode.getParallelism());
        assertEquals(1, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 2 with RichReduceFunction

use of org.apache.flink.api.common.functions.RichReduceFunction in project flink by apache.

the class ReduceCompilationTest method testGroupedReduceWithSelectorFunctionKey.

@Test
public void testGroupedReduceWithSelectorFunctionKey() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {

            public String getKey(Tuple2<String, Double> value) {
                return value.f0;
            }
        }).reduce(new RichReduceFunction<Tuple2<String, Double>>() {

            @Override
            public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
                return null;
            }
        }).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // get the key extractors and projectors
        SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
        SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, keyExtractor.getInput().getSource());
        assertEquals(keyProjector, sinkNode.getInput().getSource());
        // check the strategies
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0), reduceNode.getKeys(0));
        assertEquals(new FieldList(0), combineNode.getKeys(0));
        assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, keyExtractor.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, keyProjector.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 3 with RichReduceFunction

use of org.apache.flink.api.common.functions.RichReduceFunction in project flink by apache.

the class ReduceTranslationTests method translateNonGroupedReduce.

@Test
public void translateNonGroupedReduce() {
    try {
        final int parallelism = 8;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
        initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {

            public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
                return value1;
            }
        }).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
        // check types
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
        // check keys
        assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);
        // parallelism was not configured on the operator
        assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);
        assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Plan(org.apache.flink.api.common.Plan) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LongValue(org.apache.flink.types.LongValue) StringValue(org.apache.flink.types.StringValue) Test(org.junit.Test)

Example 4 with RichReduceFunction

use of org.apache.flink.api.common.functions.RichReduceFunction in project flink by apache.

the class ReduceOperatorTest method testReduceCollectionWithRuntimeContext.

@Test
public void testReduceCollectionWithRuntimeContext() {
    try {
        final String taskName = "Test Task";
        final AtomicBoolean opened = new AtomicBoolean();
        final AtomicBoolean closed = new AtomicBoolean();
        final ReduceFunction<Tuple2<String, Integer>> reducer = new RichReduceFunction<Tuple2<String, Integer>>() {

            @Override
            public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
                return new Tuple2<String, Integer>(value1.f0, value1.f1 + value2.f1);
            }

            @Override
            public void open(Configuration parameters) throws Exception {
                opened.set(true);
                RuntimeContext ctx = getRuntimeContext();
                assertEquals(0, ctx.getIndexOfThisSubtask());
                assertEquals(1, ctx.getNumberOfParallelSubtasks());
                assertEquals(taskName, ctx.getTaskName());
            }

            @Override
            public void close() throws Exception {
                closed.set(true);
            }
        };
        ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>> op = new ReduceOperatorBase<Tuple2<String, Integer>, ReduceFunction<Tuple2<String, Integer>>>(reducer, new UnaryOperatorInformation<Tuple2<String, Integer>, Tuple2<String, Integer>>(TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, Integer>"), TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, " + "Integer>")), new int[] { 0 }, "TestReducer");
        List<Tuple2<String, Integer>> input = new ArrayList<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 1), new Tuple2<String, Integer>("foo", 3), new Tuple2<String, Integer>("bar", 2), new Tuple2<String, Integer>("bar", 4)));
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
        Set<Tuple2<String, Integer>> resultSetMutableSafe = new HashSet<Tuple2<String, Integer>>(resultMutableSafe);
        Set<Tuple2<String, Integer>> resultSetRegular = new HashSet<Tuple2<String, Integer>>(resultRegular);
        Set<Tuple2<String, Integer>> expectedResult = new HashSet<Tuple2<String, Integer>>(asList(new Tuple2<String, Integer>("foo", 4), new Tuple2<String, Integer>("bar", 6)));
        assertEquals(expectedResult, resultSetMutableSafe);
        assertEquals(expectedResult, resultSetRegular);
        assertTrue(opened.get());
        assertTrue(closed.get());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with RichReduceFunction

use of org.apache.flink.api.common.functions.RichReduceFunction in project flink by apache.

the class ReduceTranslationTests method translateGroupedReduceNoMapper.

@Test
public void translateGroupedReduceNoMapper() {
    try {
        final int parallelism = 8;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
        initialData.groupBy(2).reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {

            public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
                return value1;
            }
        }).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
        // check types
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
        // parallelism was not configured on the operator
        assertTrue(reducer.getParallelism() == parallelism || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);
        // check keys
        assertArrayEquals(new int[] { 2 }, reducer.getKeyColumns(0));
        assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Plan(org.apache.flink.api.common.Plan) RichReduceFunction(org.apache.flink.api.common.functions.RichReduceFunction) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LongValue(org.apache.flink.types.LongValue) StringValue(org.apache.flink.types.StringValue) Test(org.junit.Test)

Aggregations

RichReduceFunction (org.apache.flink.api.common.functions.RichReduceFunction)7 Test (org.junit.Test)7 Plan (org.apache.flink.api.common.Plan)6 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)6 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)4 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)4 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)4 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)4 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 ReduceOperatorBase (org.apache.flink.api.common.operators.base.ReduceOperatorBase)2 FieldList (org.apache.flink.api.common.operators.util.FieldList)2 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)2 LongValue (org.apache.flink.types.LongValue)2 StringValue (org.apache.flink.types.StringValue)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1