Search in sources :

Example 1 with GroupReduceFunction

use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.

the class ScalaAggregateOperator method translateToDataFlow.

@SuppressWarnings("unchecked")
@Override
protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> translateToDataFlow(Operator<IN> input) {
    // sanity check
    if (this.aggregationFunctions.isEmpty() || this.aggregationFunctions.size() != this.fields.size()) {
        throw new IllegalStateException();
    }
    // construct the aggregation function
    AggregationFunction<Object>[] aggFunctions = new AggregationFunction[this.aggregationFunctions.size()];
    int[] fields = new int[this.fields.size()];
    StringBuilder genName = new StringBuilder();
    for (int i = 0; i < fields.length; i++) {
        aggFunctions[i] = (AggregationFunction<Object>) this.aggregationFunctions.get(i);
        fields[i] = this.fields.get(i);
        genName.append(aggFunctions[i].toString()).append('(').append(fields[i]).append(')').append(',');
    }
    genName.setLength(genName.length() - 1);
    @SuppressWarnings("rawtypes") RichGroupReduceFunction<IN, IN> function = new AggregatingUdf(getInputType(), aggFunctions, fields);
    String name = getName() != null ? getName() : genName.toString();
    // distinguish between grouped reduce and non-grouped reduce
    if (this.grouping == null) {
        // non grouped aggregation
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, new int[0], name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        return po;
    }
    if (this.grouping.getKeys() instanceof Keys.ExpressionKeys) {
        // grouped aggregation
        int[] logicalKeyPositions = this.grouping.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        SingleInputSemanticProperties props = new SingleInputSemanticProperties();
        for (int keyField : logicalKeyPositions) {
            boolean keyFieldUsedInAgg = false;
            for (int aggField : fields) {
                if (keyField == aggField) {
                    keyFieldUsedInAgg = true;
                    break;
                }
            }
            if (!keyFieldUsedInAgg) {
                props.addForwardedField(keyField, keyField);
            }
        }
        po.setSemanticProperties(props);
        po.setCustomPartitioner(grouping.getCustomPartitioner());
        return po;
    } else if (this.grouping.getKeys() instanceof Keys.SelectorFunctionKeys) {
        throw new UnsupportedOperationException("Aggregate does not support grouping with KeySelector functions, yet.");
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) AggregationFunction(org.apache.flink.api.java.aggregation.AggregationFunction) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) Keys(org.apache.flink.api.common.operators.Keys) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties)

Example 2 with GroupReduceFunction

use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.

the class GroupReduceDriverTest method testAllReduceDriverAccumulatingImmutable.

@Test
public void testAllReduceDriverAccumulatingImmutable() {
    try {
        TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>();
        List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
        TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
        TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>(typeInfo.createSerializer(new ExecutionConfig()));
        context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setComparator1(comparator);
        context.setCollector(result);
        context.setUdf(new ConcatSumMutableAccumulatingReducer());
        context.setMutableObjectMode(false);
        GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>> driver = new GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>();
        driver.setup(context);
        driver.prepare();
        driver.run();
        Object[] res = result.getList().toArray();
        Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();
        DriverTestData.compareTupleArrays(expected, res);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceDriver(org.apache.flink.runtime.operators.GroupReduceDriver) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) RegularToMutableObjectIterator(org.apache.flink.runtime.util.RegularToMutableObjectIterator) Test(org.junit.Test)

Example 3 with GroupReduceFunction

use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.

the class GroupReduceDriverTest method testAllReduceDriverImmutableEmpty.

@Test
public void testAllReduceDriverImmutableEmpty() {
    try {
        TestTaskContext<GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>, Tuple2<String, Integer>>();
        List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
        TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
        TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
        GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setComparator1(comparator);
        context.setCollector(result);
        GroupReduceDriver<Tuple2<String, Integer>, Tuple2<String, Integer>> driver = new GroupReduceDriver<Tuple2<String, Integer>, Tuple2<String, Integer>>();
        driver.setup(context);
        driver.prepare();
        driver.run();
        Assert.assertTrue(result.getList().isEmpty());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceDriver(org.apache.flink.runtime.operators.GroupReduceDriver) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 4 with GroupReduceFunction

use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.

the class GroupReduceDriverTest method testAllReduceDriverImmutable.

@Test
public void testAllReduceDriverImmutable() {
    try {
        TestTaskContext<GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>, Tuple2<String, Integer>>();
        List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
        TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<String, Integer>> input = new RegularToMutableObjectIterator<Tuple2<String, Integer>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
        TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
        context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setCollector(result);
        context.setComparator1(comparator);
        context.setUdf(new ConcatSumReducer());
        GroupReduceDriver<Tuple2<String, Integer>, Tuple2<String, Integer>> driver = new GroupReduceDriver<Tuple2<String, Integer>, Tuple2<String, Integer>>();
        driver.setup(context);
        driver.prepare();
        driver.run();
        Object[] res = result.getList().toArray();
        Object[] expected = DriverTestData.createReduceImmutableDataGroupedResult().toArray();
        DriverTestData.compareTupleArrays(expected, res);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceDriver(org.apache.flink.runtime.operators.GroupReduceDriver) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RegularToMutableObjectIterator(org.apache.flink.runtime.util.RegularToMutableObjectIterator) Test(org.junit.Test)

Example 5 with GroupReduceFunction

use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.

the class GroupReduceDriverTest method testAllReduceDriverIncorrectlyAccumulatingMutable.

@Test
public void testAllReduceDriverIncorrectlyAccumulatingMutable() {
    try {
        TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>();
        List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
        TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
        MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
        TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
        GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>(typeInfo.createSerializer(new ExecutionConfig()));
        context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
        context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
        context.setComparator1(comparator);
        context.setCollector(result);
        context.setUdf(new ConcatSumMutableAccumulatingReducer());
        GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>> driver = new GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>();
        driver.setup(context);
        driver.prepare();
        driver.run();
        Object[] res = result.getList().toArray();
        Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();
        try {
            DriverTestData.compareTupleArrays(expected, res);
            Assert.fail("Accumulationg mutable objects is expected to result in incorrect values.");
        } catch (AssertionError e) {
        // expected
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceDriver(org.apache.flink.runtime.operators.GroupReduceDriver) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StringValue(org.apache.flink.types.StringValue) IntValue(org.apache.flink.types.IntValue) RegularToMutableObjectIterator(org.apache.flink.runtime.util.RegularToMutableObjectIterator) Test(org.junit.Test)

Aggregations

GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)13 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)11 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)10 Test (org.junit.Test)8 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)7 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)5 GroupReduceDriver (org.apache.flink.runtime.operators.GroupReduceDriver)5 Collector (org.apache.flink.util.Collector)5 RegularToMutableObjectIterator (org.apache.flink.runtime.util.RegularToMutableObjectIterator)4 HashSet (java.util.HashSet)3 UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)3 GroupReduceOperatorBase (org.apache.flink.api.common.operators.base.GroupReduceOperatorBase)3 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)3 IntValue (org.apache.flink.types.IntValue)3 StringValue (org.apache.flink.types.StringValue)3 ArrayList (java.util.ArrayList)2 Keys (org.apache.flink.api.common.operators.Keys)2 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)2 AggregationFunction (org.apache.flink.api.java.aggregation.AggregationFunction)2 HashMap (java.util.HashMap)1