Search in sources :

Example 1 with GroupCombineFunction

use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.

the class BatchTask method initInputLocalStrategy.

private void initInputLocalStrategy(int inputNum) throws Exception {
    // check if there is already a strategy
    if (this.localStrategies[inputNum] != null) {
        throw new IllegalStateException();
    }
    // now set up the local strategy
    final LocalStrategy localStrategy = this.config.getInputLocalStrategy(inputNum);
    if (localStrategy != null) {
        switch(localStrategy) {
            case NONE:
                // the input is as it is
                this.inputs[inputNum] = this.inputIterators[inputNum];
                break;
            case SORT:
                @SuppressWarnings({ "rawtypes", "unchecked" }) UnilateralSortMerger<?> sorter = new UnilateralSortMerger(getMemoryManager(), getIOManager(), this.inputIterators[inputNum], this, this.inputSerializers[inputNum], getLocalStrategyComparator(inputNum), this.config.getRelativeMemoryInput(inputNum), this.config.getFilehandlesInput(inputNum), this.config.getSpillingThresholdInput(inputNum), this.config.getUseLargeRecordHandler(), this.getExecutionConfig().isObjectReuseEnabled());
                // set the input to null such that it will be lazily fetched from the input strategy
                this.inputs[inputNum] = null;
                this.localStrategies[inputNum] = sorter;
                break;
            case COMBININGSORT:
                // we should have nested configurations for the local strategies to solve that
                if (inputNum != 0) {
                    throw new IllegalStateException("Performing combining sort outside a (group)reduce task!");
                }
                // instantiate ourselves a combiner. we should not use the stub, because the sort and the
                // subsequent (group)reduce would otherwise share it multi-threaded
                final Class<S> userCodeFunctionType = this.driver.getStubType();
                if (userCodeFunctionType == null) {
                    throw new IllegalStateException("Performing combining sort outside a reduce task!");
                }
                final S localStub;
                try {
                    localStub = initStub(userCodeFunctionType);
                } catch (Exception e) {
                    throw new RuntimeException("Initializing the user code and the configuration failed" + (e.getMessage() == null ? "." : ": " + e.getMessage()), e);
                }
                if (!(localStub instanceof GroupCombineFunction)) {
                    throw new IllegalStateException("Performing combining sort outside a reduce task!");
                }
                @SuppressWarnings({ "rawtypes", "unchecked" }) CombiningUnilateralSortMerger<?> cSorter = new CombiningUnilateralSortMerger((GroupCombineFunction) localStub, getMemoryManager(), getIOManager(), this.inputIterators[inputNum], this, this.inputSerializers[inputNum], getLocalStrategyComparator(inputNum), this.config.getRelativeMemoryInput(inputNum), this.config.getFilehandlesInput(inputNum), this.config.getSpillingThresholdInput(inputNum), this.getTaskConfig().getUseLargeRecordHandler(), this.getExecutionConfig().isObjectReuseEnabled());
                cSorter.setUdfConfiguration(this.config.getStubParameters());
                // set the input to null such that it will be lazily fetched from the input strategy
                this.inputs[inputNum] = null;
                this.localStrategies[inputNum] = cSorter;
                break;
            default:
                throw new Exception("Unrecognized local strategy provided: " + localStrategy.name());
        }
    } else {
        // no local strategy in the config
        this.inputs[inputNum] = this.inputIterators[inputNum];
    }
}
Also used : CombiningUnilateralSortMerger(org.apache.flink.runtime.operators.sort.CombiningUnilateralSortMerger) LocalStrategy(org.apache.flink.runtime.operators.util.LocalStrategy) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) UnilateralSortMerger(org.apache.flink.runtime.operators.sort.UnilateralSortMerger) CombiningUnilateralSortMerger(org.apache.flink.runtime.operators.sort.CombiningUnilateralSortMerger) ExceptionInChainedStubException(org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) IOException(java.io.IOException)

Example 2 with GroupCombineFunction

use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.

the class GroupCombineITCase method testCheckPartitionShuffleGroupBy.

@Test
public // check if no shuffle is being executed
void testCheckPartitionShuffleGroupBy() throws Exception {
    org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // data
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
    // partition and group data
    UnsortedGrouping<Tuple3<Integer, Long, String>> partitionedDS = ds.partitionByHash(0).groupBy(1);
    List<Tuple2<Long, Integer>> result = partitionedDS.combineGroup(new GroupCombineFunction<Tuple3<Integer, Long, String>, Tuple2<Long, Integer>>() {

        @Override
        public void combine(Iterable<Tuple3<Integer, Long, String>> values, Collector<Tuple2<Long, Integer>> out) throws Exception {
            int count = 0;
            long key = 0;
            for (Tuple3<Integer, Long, String> value : values) {
                key = value.f1;
                count++;
            }
            out.collect(new Tuple2<>(key, count));
        }
    }).collect();
    String[] localExpected = new String[] { "(6,6)", "(5,5)" + "(4,4)", "(3,3)", "(2,2)", "(1,1)" };
    String[] resultAsStringArray = new String[result.size()];
    for (int i = 0; i < resultAsStringArray.length; ++i) {
        resultAsStringArray[i] = result.get(i).toString();
    }
    Arrays.sort(resultAsStringArray);
    Assert.assertEquals("The two arrays were identical.", false, Arrays.equals(localExpected, resultAsStringArray));
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collector(org.apache.flink.util.Collector) Test(org.junit.Test)

Example 3 with GroupCombineFunction

use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.

the class GroupCombineITCase method testCheckPartitionShuffleDOP1.

@Test
public // check if parallelism of 1 results in the same data like a shuffle
void testCheckPartitionShuffleDOP1() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    // data
    DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
    // partition and group data
    UnsortedGrouping<Tuple3<Integer, Long, String>> partitionedDS = ds.partitionByHash(0).groupBy(1);
    List<Tuple2<Long, Integer>> result = partitionedDS.combineGroup(new GroupCombineFunction<Tuple3<Integer, Long, String>, Tuple2<Long, Integer>>() {

        @Override
        public void combine(Iterable<Tuple3<Integer, Long, String>> values, Collector<Tuple2<Long, Integer>> out) throws Exception {
            int count = 0;
            long key = 0;
            for (Tuple3<Integer, Long, String> value : values) {
                key = value.f1;
                count++;
            }
            out.collect(new Tuple2<>(key, count));
        }
    }).collect();
    String expected = "6,6\n" + "5,5\n" + "4,4\n" + "3,3\n" + "2,2\n" + "1,1\n";
    compareResultAsTuples(result, expected);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collector(org.apache.flink.util.Collector) Test(org.junit.Test)

Example 4 with GroupCombineFunction

use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.

the class GroupReduceOperator method checkCombinability.

private boolean checkCombinability() {
    if (function instanceof GroupCombineFunction || function instanceof CombineFunction) {
        // check if the generic types of GroupCombineFunction and GroupReduceFunction match, i.e.,
        //   GroupCombineFunction<IN, IN> and GroupReduceFunction<IN, OUT>.
        // This is a best effort check. If the check cannot be done, we might fail at runtime.
        Type[] reduceTypes = null;
        Type[] combineTypes = null;
        Type[] genInterfaces = function.getClass().getGenericInterfaces();
        for (Type genInterface : genInterfaces) {
            if (genInterface instanceof ParameterizedType) {
                // get parameters of GroupReduceFunction
                if (((ParameterizedType) genInterface).getRawType().equals(GroupReduceFunction.class)) {
                    reduceTypes = ((ParameterizedType) genInterface).getActualTypeArguments();
                // get parameters of GroupCombineFunction
                } else if ((((ParameterizedType) genInterface).getRawType().equals(GroupCombineFunction.class)) || (((ParameterizedType) genInterface).getRawType().equals(CombineFunction.class))) {
                    combineTypes = ((ParameterizedType) genInterface).getActualTypeArguments();
                }
            }
        }
        if (reduceTypes != null && reduceTypes.length == 2 && combineTypes != null && combineTypes.length == 2) {
            if (reduceTypes[0].equals(combineTypes[0]) && reduceTypes[0].equals(combineTypes[1])) {
                return true;
            } else {
                LOG.warn("GroupCombineFunction cannot be used as combiner for GroupReduceFunction. " + "Generic types are incompatible.");
                return false;
            }
        } else if (reduceTypes == null || reduceTypes.length != 2) {
            LOG.warn("Cannot check generic types of GroupReduceFunction. " + "Enabling combiner but combine function might fail at runtime.");
            return true;
        } else {
            LOG.warn("Cannot check generic types of GroupCombineFunction. " + "Enabling combiner but combine function might fail at runtime.");
            return true;
        }
    }
    return false;
}
Also used : CombineFunction(org.apache.flink.api.common.functions.CombineFunction) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) ParameterizedType(java.lang.reflect.ParameterizedType) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction)

Example 5 with GroupCombineFunction

use of org.apache.flink.api.common.functions.GroupCombineFunction in project flink by apache.

the class GroupCombineOperator method translateToDataFlow.

// --------------------------------------------------------------------------------------------
//  Translation
// --------------------------------------------------------------------------------------------
@Override
protected GroupCombineOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) {
    String name = getName() != null ? getName() : "GroupCombine at " + defaultName;
    // distinguish between grouped reduce and non-grouped reduce
    if (grouper == null) {
        // non grouped reduce
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, new int[0], name);
        po.setInput(input);
        // the parallelism for a non grouped reduce can only be 1
        po.setParallelism(1);
        return po;
    }
    if (grouper.getKeys() instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys();
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper;
            SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey();
            Ordering groupOrder = sortedGrouping.getGroupOrdering();
            PlanUnwrappingSortedGroupCombineOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input);
            po.setParallelism(this.getParallelism());
            return po;
        } else {
            PlanUnwrappingGroupCombineOperator<IN, OUT, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getResultType(), name, input);
            po.setParallelism(this.getParallelism());
            return po;
        }
    } else if (grouper.getKeys() instanceof Keys.ExpressionKeys) {
        int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setInput(input);
        po.setParallelism(getParallelism());
        // set group order
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper;
            int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
            Order[] sortOrders = sortedGrouper.getGroupSortOrders();
            Ordering o = new Ordering();
            for (int i = 0; i < sortKeyPositions.length; i++) {
                o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
            }
            po.setGroupOrder(o);
        }
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) GroupCombineOperatorBase(org.apache.flink.api.common.operators.base.GroupCombineOperatorBase) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) Keys(org.apache.flink.api.common.operators.Keys) Ordering(org.apache.flink.api.common.operators.Ordering)

Aggregations

GroupCombineFunction (org.apache.flink.api.common.functions.GroupCombineFunction)6 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)2 Collector (org.apache.flink.util.Collector)2 Test (org.junit.Test)2 IOException (java.io.IOException)1 ParameterizedType (java.lang.reflect.ParameterizedType)1 Type (java.lang.reflect.Type)1 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)1 CombineFunction (org.apache.flink.api.common.functions.CombineFunction)1 Keys (org.apache.flink.api.common.operators.Keys)1 SelectorFunctionKeys (org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys)1 Ordering (org.apache.flink.api.common.operators.Ordering)1 UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)1 GroupCombineOperatorBase (org.apache.flink.api.common.operators.base.GroupCombineOperatorBase)1 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)1 ExceptionInChainedStubException (org.apache.flink.runtime.operators.chaining.ExceptionInChainedStubException)1 CombiningUnilateralSortMerger (org.apache.flink.runtime.operators.sort.CombiningUnilateralSortMerger)1 UnilateralSortMerger (org.apache.flink.runtime.operators.sort.UnilateralSortMerger)1