Search in sources :

Example 11 with GroupReduceFunction

use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.

the class GroupReduceOperator method translateToDataFlow.

// --------------------------------------------------------------------------------------------
//  Translation
// --------------------------------------------------------------------------------------------
@Override
@SuppressWarnings("unchecked")
protected GroupReduceOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) {
    String name = getName() != null ? getName() : "GroupReduce at " + defaultName;
    // wrap CombineFunction in GroupCombineFunction if combinable
    if (combinable && function instanceof CombineFunction<?, ?>) {
        this.function = function instanceof RichGroupReduceFunction<?, ?> ? new RichCombineToGroupCombineWrapper((RichGroupReduceFunction<?, ?>) function) : new CombineToGroupCombineWrapper((CombineFunction<?, ?>) function);
    }
    // distinguish between grouped reduce and non-grouped reduce
    if (grouper == null) {
        // non grouped reduce
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, new int[0], name);
        po.setCombinable(combinable);
        po.setInput(input);
        // the parallelism for a non grouped reduce can only be 1
        po.setParallelism(1);
        return po;
    }
    if (grouper.getKeys() instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys();
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper;
            SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey();
            Ordering groupOrder = sortedGrouping.getGroupOrdering();
            PlanUnwrappingSortedReduceGroupOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input, isCombinable());
            po.setParallelism(this.getParallelism());
            po.setCustomPartitioner(grouper.getCustomPartitioner());
            return po;
        } else {
            PlanUnwrappingReduceGroupOperator<IN, OUT, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getResultType(), name, input, isCombinable());
            po.setParallelism(this.getParallelism());
            po.setCustomPartitioner(grouper.getCustomPartitioner());
            return po;
        }
    } else if (grouper.getKeys() instanceof ExpressionKeys) {
        int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(combinable);
        po.setInput(input);
        po.setParallelism(getParallelism());
        po.setCustomPartitioner(grouper.getCustomPartitioner());
        // set group order
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper;
            int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
            Order[] sortOrders = sortedGrouper.getGroupSortOrders();
            Ordering o = new Ordering();
            for (int i = 0; i < sortKeyPositions.length; i++) {
                o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
            }
            po.setGroupOrder(o);
        }
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) ExpressionKeys(org.apache.flink.api.common.operators.Keys.ExpressionKeys) RichCombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.RichCombineToGroupCombineWrapper) CombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.CombineToGroupCombineWrapper) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) RichCombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.RichCombineToGroupCombineWrapper) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Ordering(org.apache.flink.api.common.operators.Ordering)

Example 12 with GroupReduceFunction

use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.

the class GroupReduceDriverTest method testAllReduceDriverMutable.

@Test
public void testAllReduceDriverMutable() {
    try {
        TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>();
        List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
        TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
        TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>(typeInfo.createSerializer(new ExecutionConfig()));
        context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setComparator1(comparator);
        context.setCollector(result);
        context.setUdf(new ConcatSumMutableReducer());
        GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>> driver = new GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>();
        driver.setup(context);
        driver.prepare();
        driver.run();
        Object[] res = result.getList().toArray();
        Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();
        DriverTestData.compareTupleArrays(expected, res);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceDriver(org.apache.flink.runtime.operators.GroupReduceDriver) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) RegularToMutableObjectIterator(org.apache.flink.runtime.util.RegularToMutableObjectIterator) Test(org.junit.Test)

Example 13 with GroupReduceFunction

use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.

the class GroupReduceITCase method testGroupReduceSelectorKeysWithSemProps.

@Test
public void testGroupReduceSelectorKeysWithSemProps() throws Exception {
    /*
		 * Test that semantic properties are correctly adapted when using Selector Keys
		 */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
    DataSet<Tuple2<Integer, Long>> reduceDs = ds.groupBy(new KeySelector<Tuple5<Integer, Long, Integer, String, Long>, Long>() {

        @Override
        public Long getKey(Tuple5<Integer, Long, Integer, String, Long> v) throws Exception {
            return (v.f0 * v.f1) - (v.f2 * v.f4);
        }
    }).reduceGroup(new GroupReduceFunction<Tuple5<Integer, Long, Integer, String, Long>, Tuple5<Integer, Long, Integer, String, Long>>() {

        @Override
        public void reduce(Iterable<Tuple5<Integer, Long, Integer, String, Long>> values, Collector<Tuple5<Integer, Long, Integer, String, Long>> out) throws Exception {
            for (Tuple5<Integer, Long, Integer, String, Long> v : values) {
                out.collect(v);
            }
        }
    }).withForwardedFields("0").groupBy(0).reduceGroup(new GroupReduceFunction<Tuple5<Integer, Long, Integer, String, Long>, Tuple2<Integer, Long>>() {

        @Override
        public void reduce(Iterable<Tuple5<Integer, Long, Integer, String, Long>> values, Collector<Tuple2<Integer, Long>> out) throws Exception {
            int k = 0;
            long s = 0;
            for (Tuple5<Integer, Long, Integer, String, Long> v : values) {
                k = v.f0;
                s += v.f1;
            }
            out.collect(new Tuple2<>(k, s));
        }
    });
    List<Tuple2<Integer, Long>> result = reduceDs.collect();
    String expected = "1,1\n" + "2,5\n" + "3,15\n" + "4,34\n" + "5,65\n";
    compareResultAsTuples(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Tuple5(org.apache.flink.api.java.tuple.Tuple5) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) Test(org.junit.Test)

Aggregations

GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)13 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)11 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)10 Test (org.junit.Test)8 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)7 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)5 GroupReduceDriver (org.apache.flink.runtime.operators.GroupReduceDriver)5 Collector (org.apache.flink.util.Collector)5 RegularToMutableObjectIterator (org.apache.flink.runtime.util.RegularToMutableObjectIterator)4 HashSet (java.util.HashSet)3 UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)3 GroupReduceOperatorBase (org.apache.flink.api.common.operators.base.GroupReduceOperatorBase)3 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)3 IntValue (org.apache.flink.types.IntValue)3 StringValue (org.apache.flink.types.StringValue)3 ArrayList (java.util.ArrayList)2 Keys (org.apache.flink.api.common.operators.Keys)2 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)2 AggregationFunction (org.apache.flink.api.java.aggregation.AggregationFunction)2 HashMap (java.util.HashMap)1