use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.
the class BatchTask method initInputLocalStrategy.
private void initInputLocalStrategy(int inputNum) throws Exception {
// check if there is already a strategy
if (this.localStrategies[inputNum] != null) {
throw new IllegalStateException();
}
// now set up the local strategy
final LocalStrategy localStrategy = this.config.getInputLocalStrategy(inputNum);
if (localStrategy != null) {
switch(localStrategy) {
case NONE:
// the input is as it is
this.inputs[inputNum] = this.inputIterators[inputNum];
break;
case SORT:
@SuppressWarnings({ "rawtypes", "unchecked" }) UnilateralSortMerger<?> sorter = new UnilateralSortMerger(getMemoryManager(), getIOManager(), this.inputIterators[inputNum], this, this.inputSerializers[inputNum], getLocalStrategyComparator(inputNum), this.config.getRelativeMemoryInput(inputNum), this.config.getFilehandlesInput(inputNum), this.config.getSpillingThresholdInput(inputNum), this.config.getUseLargeRecordHandler(), this.getExecutionConfig().isObjectReuseEnabled());
// set the input to null such that it will be lazily fetched from the input strategy
this.inputs[inputNum] = null;
this.localStrategies[inputNum] = sorter;
break;
case COMBININGSORT:
// we should have nested configurations for the local strategies to solve that
if (inputNum != 0) {
throw new IllegalStateException("Performing combining sort outside a (group)reduce task!");
}
// instantiate ourselves a combiner. we should not use the stub, because the sort and the
// subsequent (group)reduce would otherwise share it multi-threaded
final Class<S> userCodeFunctionType = this.driver.getStubType();
if (userCodeFunctionType == null) {
throw new IllegalStateException("Performing combining sort outside a reduce task!");
}
final S localStub;
try {
localStub = initStub(userCodeFunctionType);
} catch (Exception e) {
throw new RuntimeException("Initializing the user code and the configuration failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
}
if (!(localStub instanceof GroupCombineFunction)) {
throw new IllegalStateException("Performing combining sort outside a reduce task!");
}
@SuppressWarnings({ "rawtypes", "unchecked" }) CombiningUnilateralSortMerger<?> cSorter = new CombiningUnilateralSortMerger((GroupCombineFunction) localStub, getMemoryManager(), getIOManager(), this.inputIterators[inputNum], this, this.inputSerializers[inputNum], getLocalStrategyComparator(inputNum), this.config.getRelativeMemoryInput(inputNum), this.config.getFilehandlesInput(inputNum), this.config.getSpillingThresholdInput(inputNum), this.getTaskConfig().getUseLargeRecordHandler(), this.getExecutionConfig().isObjectReuseEnabled());
cSorter.setUdfConfiguration(this.config.getStubParameters());
// set the input to null such that it will be lazily fetched from the input strategy
this.inputs[inputNum] = null;
this.localStrategies[inputNum] = cSorter;
break;
default:
throw new Exception("Unrecognized local strategy provided: " + localStrategy.name());
}
} else {
// no local strategy in the config
this.inputs[inputNum] = this.inputIterators[inputNum];
}
}
use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.
the class GroupCombineITCase method testCheckPartitionShuffleGroupBy.
@Test
public // check if no shuffle is being executed
void testCheckPartitionShuffleGroupBy() throws Exception {
org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// data
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
// partition and group data
UnsortedGrouping<Tuple3<Integer, Long, String>> partitionedDS = ds.partitionByHash(0).groupBy(1);
List<Tuple2<Long, Integer>> result = partitionedDS.combineGroup(new GroupCombineFunction<Tuple3<Integer, Long, String>, Tuple2<Long, Integer>>() {
@Override
public void combine(Iterable<Tuple3<Integer, Long, String>> values, Collector<Tuple2<Long, Integer>> out) throws Exception {
int count = 0;
long key = 0;
for (Tuple3<Integer, Long, String> value : values) {
key = value.f1;
count++;
}
out.collect(new Tuple2<>(key, count));
}
}).collect();
String[] localExpected = new String[] { "(6,6)", "(5,5)" + "(4,4)", "(3,3)", "(2,2)", "(1,1)" };
String[] resultAsStringArray = new String[result.size()];
for (int i = 0; i < resultAsStringArray.length; ++i) {
resultAsStringArray[i] = result.get(i).toString();
}
Arrays.sort(resultAsStringArray);
Assert.assertEquals("The two arrays were identical.", false, Arrays.equals(localExpected, resultAsStringArray));
}
use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.
the class GroupCombineITCase method testCheckPartitionShuffleDOP1.
@Test
public // check if parallelism of 1 results in the same data like a shuffle
void testCheckPartitionShuffleDOP1() throws Exception {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// data
DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
// partition and group data
UnsortedGrouping<Tuple3<Integer, Long, String>> partitionedDS = ds.partitionByHash(0).groupBy(1);
List<Tuple2<Long, Integer>> result = partitionedDS.combineGroup(new GroupCombineFunction<Tuple3<Integer, Long, String>, Tuple2<Long, Integer>>() {
@Override
public void combine(Iterable<Tuple3<Integer, Long, String>> values, Collector<Tuple2<Long, Integer>> out) throws Exception {
int count = 0;
long key = 0;
for (Tuple3<Integer, Long, String> value : values) {
key = value.f1;
count++;
}
out.collect(new Tuple2<>(key, count));
}
}).collect();
String expected = "6,6\n" + "5,5\n" + "4,4\n" + "3,3\n" + "2,2\n" + "1,1\n";
compareResultAsTuples(result, expected);
}
use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.
the class GroupReduceOperator method checkCombinability.
private boolean checkCombinability() {
if (function instanceof GroupCombineFunction || function instanceof CombineFunction) {
// check if the generic types of GroupCombineFunction and GroupReduceFunction match, i.e.,
// GroupCombineFunction<IN, IN> and GroupReduceFunction<IN, OUT>.
// This is a best effort check. If the check cannot be done, we might fail at runtime.
Type[] reduceTypes = null;
Type[] combineTypes = null;
Type[] genInterfaces = function.getClass().getGenericInterfaces();
for (Type genInterface : genInterfaces) {
if (genInterface instanceof ParameterizedType) {
// get parameters of GroupReduceFunction
if (((ParameterizedType) genInterface).getRawType().equals(GroupReduceFunction.class)) {
reduceTypes = ((ParameterizedType) genInterface).getActualTypeArguments();
// get parameters of GroupCombineFunction
} else if ((((ParameterizedType) genInterface).getRawType().equals(GroupCombineFunction.class)) || (((ParameterizedType) genInterface).getRawType().equals(CombineFunction.class))) {
combineTypes = ((ParameterizedType) genInterface).getActualTypeArguments();
}
}
}
if (reduceTypes != null && reduceTypes.length == 2 && combineTypes != null && combineTypes.length == 2) {
if (reduceTypes[0].equals(combineTypes[0]) && reduceTypes[0].equals(combineTypes[1])) {
return true;
} else {
LOG.warn("GroupCombineFunction cannot be used as combiner for GroupReduceFunction. " + "Generic types are incompatible.");
return false;
}
} else if (reduceTypes == null || reduceTypes.length != 2) {
LOG.warn("Cannot check generic types of GroupReduceFunction. " + "Enabling combiner but combine function might fail at runtime.");
return true;
} else {
LOG.warn("Cannot check generic types of GroupCombineFunction. " + "Enabling combiner but combine function might fail at runtime.");
return true;
}
}
return false;
}
use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.
the class GroupCombineOperator method translateToDataFlow.
// --------------------------------------------------------------------------------------------
// Translation
// --------------------------------------------------------------------------------------------
@Override
protected GroupCombineOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) {
String name = getName() != null ? getName() : "GroupCombine at " + defaultName;
// distinguish between grouped reduce and non-grouped reduce
if (grouper == null) {
// non grouped reduce
UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, new int[0], name);
po.setInput(input);
// the parallelism for a non grouped reduce can only be 1
po.setParallelism(1);
return po;
}
if (grouper.getKeys() instanceof SelectorFunctionKeys) {
@SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys();
if (grouper instanceof SortedGrouping) {
SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper;
SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey();
Ordering groupOrder = sortedGrouping.getGroupOrdering();
PlanUnwrappingSortedGroupCombineOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input);
po.setParallelism(this.getParallelism());
return po;
} else {
PlanUnwrappingGroupCombineOperator<IN, OUT, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getResultType(), name, input);
po.setParallelism(this.getParallelism());
return po;
}
} else if (grouper.getKeys() instanceof Keys.ExpressionKeys) {
int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
po.setInput(input);
po.setParallelism(getParallelism());
// set group order
if (grouper instanceof SortedGrouping) {
SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper;
int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
Order[] sortOrders = sortedGrouper.getGroupSortOrders();
Ordering o = new Ordering();
for (int i = 0; i < sortKeyPositions.length; i++) {
o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
}
po.setGroupOrder(o);
}
return po;
} else {
throw new UnsupportedOperationException("Unrecognized key type.");
}
}
Aggregations