use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.
the class GroupReduceOperator method translateToDataFlow.
// --------------------------------------------------------------------------------------------
// Translation
// --------------------------------------------------------------------------------------------
@Override
@SuppressWarnings("unchecked")
protected GroupReduceOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) {
String name = getName() != null ? getName() : "GroupReduce at " + defaultName;
// wrap CombineFunction in GroupCombineFunction if combinable
if (combinable && function instanceof CombineFunction<?, ?>) {
this.function = function instanceof RichGroupReduceFunction<?, ?> ? new RichCombineToGroupCombineWrapper((RichGroupReduceFunction<?, ?>) function) : new CombineToGroupCombineWrapper((CombineFunction<?, ?>) function);
}
// distinguish between grouped reduce and non-grouped reduce
if (grouper == null) {
// non grouped reduce
UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, new int[0], name);
po.setCombinable(combinable);
po.setInput(input);
// the parallelism for a non grouped reduce can only be 1
po.setParallelism(1);
return po;
}
if (grouper.getKeys() instanceof SelectorFunctionKeys) {
@SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys();
if (grouper instanceof SortedGrouping) {
SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper;
SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey();
Ordering groupOrder = sortedGrouping.getGroupOrdering();
PlanUnwrappingSortedReduceGroupOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input, isCombinable());
po.setParallelism(this.getParallelism());
po.setCustomPartitioner(grouper.getCustomPartitioner());
return po;
} else {
PlanUnwrappingReduceGroupOperator<IN, OUT, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getResultType(), name, input, isCombinable());
po.setParallelism(this.getParallelism());
po.setCustomPartitioner(grouper.getCustomPartitioner());
return po;
}
} else if (grouper.getKeys() instanceof ExpressionKeys) {
int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
po.setCombinable(combinable);
po.setInput(input);
po.setParallelism(getParallelism());
po.setCustomPartitioner(grouper.getCustomPartitioner());
// set group order
if (grouper instanceof SortedGrouping) {
SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper;
int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
Order[] sortOrders = sortedGrouper.getGroupSortOrders();
Ordering o = new Ordering();
for (int i = 0; i < sortKeyPositions.length; i++) {
o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
}
po.setGroupOrder(o);
}
return po;
} else {
throw new UnsupportedOperationException("Unrecognized key type.");
}
}
use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.
the class GroupReduceDriverTest method testAllReduceDriverMutable.
@Test
public void testAllReduceDriverMutable() {
try {
TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>();
List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>(typeInfo.createSerializer(new ExecutionConfig()));
context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
context.setComparator1(comparator);
context.setCollector(result);
context.setUdf(new ConcatSumMutableReducer());
GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>> driver = new GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>();
driver.setup(context);
driver.prepare();
driver.run();
Object[] res = result.getList().toArray();
Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();
DriverTestData.compareTupleArrays(expected, res);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.
the class GroupReduceITCase method testGroupReduceSelectorKeysWithSemProps.
@Test
public void testGroupReduceSelectorKeysWithSemProps() throws Exception {
/*
* Test that semantic properties are correctly adapted when using Selector Keys
*/
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
DataSet<Tuple2<Integer, Long>> reduceDs = ds.groupBy(new KeySelector<Tuple5<Integer, Long, Integer, String, Long>, Long>() {
@Override
public Long getKey(Tuple5<Integer, Long, Integer, String, Long> v) throws Exception {
return (v.f0 * v.f1) - (v.f2 * v.f4);
}
}).reduceGroup(new GroupReduceFunction<Tuple5<Integer, Long, Integer, String, Long>, Tuple5<Integer, Long, Integer, String, Long>>() {
@Override
public void reduce(Iterable<Tuple5<Integer, Long, Integer, String, Long>> values, Collector<Tuple5<Integer, Long, Integer, String, Long>> out) throws Exception {
for (Tuple5<Integer, Long, Integer, String, Long> v : values) {
out.collect(v);
}
}
}).withForwardedFields("0").groupBy(0).reduceGroup(new GroupReduceFunction<Tuple5<Integer, Long, Integer, String, Long>, Tuple2<Integer, Long>>() {
@Override
public void reduce(Iterable<Tuple5<Integer, Long, Integer, String, Long>> values, Collector<Tuple2<Integer, Long>> out) throws Exception {
int k = 0;
long s = 0;
for (Tuple5<Integer, Long, Integer, String, Long> v : values) {
k = v.f0;
s += v.f1;
}
out.collect(new Tuple2<>(k, s));
}
});
List<Tuple2<Integer, Long>> result = reduceDs.collect();
String expected = "1,1\n" + "2,5\n" + "3,15\n" + "4,34\n" + "5,65\n";
compareResultAsTuples(result, expected);
}
Aggregations