use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.
the class ScalaAggregateOperator method translateToDataFlow.
@SuppressWarnings("unchecked")
@Override
protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> translateToDataFlow(Operator<IN> input) {
// sanity check
if (this.aggregationFunctions.isEmpty() || this.aggregationFunctions.size() != this.fields.size()) {
throw new IllegalStateException();
}
// construct the aggregation function
AggregationFunction<Object>[] aggFunctions = new AggregationFunction[this.aggregationFunctions.size()];
int[] fields = new int[this.fields.size()];
StringBuilder genName = new StringBuilder();
for (int i = 0; i < fields.length; i++) {
aggFunctions[i] = (AggregationFunction<Object>) this.aggregationFunctions.get(i);
fields[i] = this.fields.get(i);
genName.append(aggFunctions[i].toString()).append('(').append(fields[i]).append(')').append(',');
}
genName.setLength(genName.length() - 1);
@SuppressWarnings("rawtypes") RichGroupReduceFunction<IN, IN> function = new AggregatingUdf(getInputType(), aggFunctions, fields);
String name = getName() != null ? getName() : genName.toString();
// distinguish between grouped reduce and non-grouped reduce
if (this.grouping == null) {
// non grouped aggregation
UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, new int[0], name);
po.setCombinable(true);
// set input
po.setInput(input);
// set parallelism
po.setParallelism(this.getParallelism());
return po;
}
if (this.grouping.getKeys() instanceof Keys.ExpressionKeys) {
// grouped aggregation
int[] logicalKeyPositions = this.grouping.getKeys().computeLogicalKeyPositions();
UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, logicalKeyPositions, name);
po.setCombinable(true);
// set input
po.setInput(input);
// set parallelism
po.setParallelism(this.getParallelism());
SingleInputSemanticProperties props = new SingleInputSemanticProperties();
for (int keyField : logicalKeyPositions) {
boolean keyFieldUsedInAgg = false;
for (int aggField : fields) {
if (keyField == aggField) {
keyFieldUsedInAgg = true;
break;
}
}
if (!keyFieldUsedInAgg) {
props.addForwardedField(keyField, keyField);
}
}
po.setSemanticProperties(props);
po.setCustomPartitioner(grouping.getCustomPartitioner());
return po;
} else if (this.grouping.getKeys() instanceof Keys.SelectorFunctionKeys) {
throw new UnsupportedOperationException("Aggregate does not support grouping with KeySelector functions, yet.");
} else {
throw new UnsupportedOperationException("Unrecognized key type.");
}
}
use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.
the class GroupReduceDriverTest method testAllReduceDriverAccumulatingImmutable.
@Test
public void testAllReduceDriverAccumulatingImmutable() {
try {
TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>();
List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>(typeInfo.createSerializer(new ExecutionConfig()));
context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
context.setComparator1(comparator);
context.setCollector(result);
context.setUdf(new ConcatSumMutableAccumulatingReducer());
context.setMutableObjectMode(false);
GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>> driver = new GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>();
driver.setup(context);
driver.prepare();
driver.run();
Object[] res = result.getList().toArray();
Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();
DriverTestData.compareTupleArrays(expected, res);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.
the class GroupReduceDriverTest method testAllReduceDriverImmutableEmpty.
@Test
public void testAllReduceDriverImmutableEmpty() {
try {
TestTaskContext<GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>, Tuple2<String, Integer>>();
List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
context.setComparator1(comparator);
context.setCollector(result);
GroupReduceDriver<Tuple2<String, Integer>, Tuple2<String, Integer>> driver = new GroupReduceDriver<Tuple2<String, Integer>, Tuple2<String, Integer>>();
driver.setup(context);
driver.prepare();
driver.run();
Assert.assertTrue(result.getList().isEmpty());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.
the class GroupReduceDriverTest method testAllReduceDriverImmutable.
@Test
public void testAllReduceDriverImmutable() {
try {
TestTaskContext<GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>, Tuple2<String, Integer>> context = new TestTaskContext<GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>, Tuple2<String, Integer>>();
List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
MutableObjectIterator<Tuple2<String, Integer>> input = new RegularToMutableObjectIterator<Tuple2<String, Integer>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String, Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
context.setCollector(result);
context.setComparator1(comparator);
context.setUdf(new ConcatSumReducer());
GroupReduceDriver<Tuple2<String, Integer>, Tuple2<String, Integer>> driver = new GroupReduceDriver<Tuple2<String, Integer>, Tuple2<String, Integer>>();
driver.setup(context);
driver.prepare();
driver.run();
Object[] res = result.getList().toArray();
Object[] expected = DriverTestData.createReduceImmutableDataGroupedResult().toArray();
DriverTestData.compareTupleArrays(expected, res);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.api.common.functions.GroupReduceFunction in project flink by apache.
the class GroupReduceDriverTest method testAllReduceDriverIncorrectlyAccumulatingMutable.
@Test
public void testAllReduceDriverIncorrectlyAccumulatingMutable() {
try {
TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>> context = new TestTaskContext<GroupReduceFunction<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>, Tuple2<StringValue, IntValue>>();
List<Tuple2<StringValue, IntValue>> data = DriverTestData.createReduceMutableData();
TupleTypeInfo<Tuple2<StringValue, IntValue>> typeInfo = (TupleTypeInfo<Tuple2<StringValue, IntValue>>) TypeExtractor.getForObject(data.get(0));
MutableObjectIterator<Tuple2<StringValue, IntValue>> input = new RegularToMutableObjectIterator<Tuple2<StringValue, IntValue>>(data.iterator(), typeInfo.createSerializer(new ExecutionConfig()));
TypeComparator<Tuple2<StringValue, IntValue>> comparator = typeInfo.createComparator(new int[] { 0 }, new boolean[] { true }, 0, new ExecutionConfig());
GatheringCollector<Tuple2<StringValue, IntValue>> result = new GatheringCollector<Tuple2<StringValue, IntValue>>(typeInfo.createSerializer(new ExecutionConfig()));
context.setDriverStrategy(DriverStrategy.SORTED_GROUP_REDUCE);
context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
context.setComparator1(comparator);
context.setCollector(result);
context.setUdf(new ConcatSumMutableAccumulatingReducer());
GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>> driver = new GroupReduceDriver<Tuple2<StringValue, IntValue>, Tuple2<StringValue, IntValue>>();
driver.setup(context);
driver.prepare();
driver.run();
Object[] res = result.getList().toArray();
Object[] expected = DriverTestData.createReduceMutableDataGroupedResult().toArray();
try {
DriverTestData.compareTupleArrays(expected, res);
Assert.fail("Accumulationg mutable objects is expected to result in incorrect values.");
} catch (AssertionError e) {
// expected
}
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
Aggregations