Search in sources :

Example 1 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class AggregateOperator method translateToDataFlow.

@SuppressWarnings("unchecked")
@Override
@Internal
protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> translateToDataFlow(Operator<IN> input) {
    // sanity check
    if (this.aggregationFunctions.isEmpty() || this.aggregationFunctions.size() != this.fields.size()) {
        throw new IllegalStateException();
    }
    // construct the aggregation function
    AggregationFunction<Object>[] aggFunctions = new AggregationFunction[this.aggregationFunctions.size()];
    int[] fields = new int[this.fields.size()];
    StringBuilder genName = new StringBuilder();
    for (int i = 0; i < fields.length; i++) {
        aggFunctions[i] = (AggregationFunction<Object>) this.aggregationFunctions.get(i);
        fields[i] = this.fields.get(i);
        genName.append(aggFunctions[i].toString()).append('(').append(fields[i]).append(')').append(',');
    }
    genName.append(" at ").append(aggregateLocationName);
    genName.setLength(genName.length() - 1);
    @SuppressWarnings("rawtypes") RichGroupReduceFunction<IN, IN> function = new AggregatingUdf(aggFunctions, fields);
    String name = getName() != null ? getName() : genName.toString();
    // distinguish between grouped reduce and non-grouped reduce
    if (this.grouping == null) {
        // non grouped aggregation
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, new int[0], name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        return po;
    }
    if (this.grouping.getKeys() instanceof Keys.ExpressionKeys) {
        // grouped aggregation
        int[] logicalKeyPositions = this.grouping.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(true);
        po.setInput(input);
        po.setParallelism(this.getParallelism());
        po.setCustomPartitioner(grouping.getCustomPartitioner());
        SingleInputSemanticProperties props = new SingleInputSemanticProperties();
        for (int keyField : logicalKeyPositions) {
            boolean keyFieldUsedInAgg = false;
            for (int aggField : fields) {
                if (keyField == aggField) {
                    keyFieldUsedInAgg = true;
                    break;
                }
            }
            if (!keyFieldUsedInAgg) {
                props.addForwardedField(keyField, keyField);
            }
        }
        po.setSemanticProperties(props);
        return po;
    } else if (this.grouping.getKeys() instanceof Keys.SelectorFunctionKeys) {
        throw new UnsupportedOperationException("Aggregate does not support grouping with KeySelector functions, yet.");
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) AggregationFunction(org.apache.flink.api.java.aggregation.AggregationFunction) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) Keys(org.apache.flink.api.common.operators.Keys) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties) Internal(org.apache.flink.annotation.Internal)

Example 2 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class KeyFunctions method appendKeyExtractor.

@SuppressWarnings("unchecked")
public static <T, K> org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> appendKeyExtractor(org.apache.flink.api.common.operators.Operator<T> input, SelectorFunctionKeys<T, K> key) {
    if (input instanceof Union) {
        // if input is a union, we apply the key extractors recursively to all inputs
        org.apache.flink.api.common.operators.Operator<T> firstInput = ((Union) input).getFirstInput();
        org.apache.flink.api.common.operators.Operator<T> secondInput = ((Union) input).getSecondInput();
        org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> firstInputWithKey = appendKeyExtractor(firstInput, key);
        org.apache.flink.api.common.operators.Operator<Tuple2<K, T>> secondInputWithKey = appendKeyExtractor(secondInput, key);
        return new Union(firstInputWithKey, secondInputWithKey, input.getName());
    }
    TypeInformation<T> inputType = key.getInputType();
    TypeInformation<Tuple2<K, T>> typeInfoWithKey = createTypeWithKey(key);
    KeyExtractingMapper<T, K> extractor = new KeyExtractingMapper(key.getKeyExtractor());
    MapOperatorBase<T, Tuple2<K, T>, MapFunction<T, Tuple2<K, T>>> mapper = new MapOperatorBase<T, Tuple2<K, T>, MapFunction<T, Tuple2<K, T>>>(extractor, new UnaryOperatorInformation(inputType, typeInfoWithKey), "Key Extractor");
    mapper.setInput(input);
    mapper.setParallelism(input.getParallelism());
    return mapper;
}
Also used : MapFunction(org.apache.flink.api.common.functions.MapFunction) Union(org.apache.flink.api.common.operators.Union) KeyExtractingMapper(org.apache.flink.api.java.operators.translation.KeyExtractingMapper) TwoKeyExtractingMapper(org.apache.flink.api.java.operators.translation.TwoKeyExtractingMapper) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Example 3 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class ScalaAggregateOperator method translateToDataFlow.

@SuppressWarnings("unchecked")
@Override
protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> translateToDataFlow(Operator<IN> input) {
    // sanity check
    if (this.aggregationFunctions.isEmpty() || this.aggregationFunctions.size() != this.fields.size()) {
        throw new IllegalStateException();
    }
    // construct the aggregation function
    AggregationFunction<Object>[] aggFunctions = new AggregationFunction[this.aggregationFunctions.size()];
    int[] fields = new int[this.fields.size()];
    StringBuilder genName = new StringBuilder();
    for (int i = 0; i < fields.length; i++) {
        aggFunctions[i] = (AggregationFunction<Object>) this.aggregationFunctions.get(i);
        fields[i] = this.fields.get(i);
        genName.append(aggFunctions[i].toString()).append('(').append(fields[i]).append(')').append(',');
    }
    genName.setLength(genName.length() - 1);
    @SuppressWarnings("rawtypes") RichGroupReduceFunction<IN, IN> function = new AggregatingUdf(getInputType(), aggFunctions, fields);
    String name = getName() != null ? getName() : genName.toString();
    // distinguish between grouped reduce and non-grouped reduce
    if (this.grouping == null) {
        // non grouped aggregation
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, new int[0], name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        return po;
    }
    if (this.grouping.getKeys() instanceof Keys.ExpressionKeys) {
        // grouped aggregation
        int[] logicalKeyPositions = this.grouping.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        SingleInputSemanticProperties props = new SingleInputSemanticProperties();
        for (int keyField : logicalKeyPositions) {
            boolean keyFieldUsedInAgg = false;
            for (int aggField : fields) {
                if (keyField == aggField) {
                    keyFieldUsedInAgg = true;
                    break;
                }
            }
            if (!keyFieldUsedInAgg) {
                props.addForwardedField(keyField, keyField);
            }
        }
        po.setSemanticProperties(props);
        po.setCustomPartitioner(grouping.getCustomPartitioner());
        return po;
    } else if (this.grouping.getKeys() instanceof Keys.SelectorFunctionKeys) {
        throw new UnsupportedOperationException("Aggregate does not support grouping with KeySelector functions, yet.");
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) AggregationFunction(org.apache.flink.api.java.aggregation.AggregationFunction) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) Keys(org.apache.flink.api.common.operators.Keys) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties)

Example 4 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class GroupCombineOperator method translateToDataFlow.

// --------------------------------------------------------------------------------------------
// Translation
// --------------------------------------------------------------------------------------------
@Override
protected GroupCombineOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) {
    String name = getName() != null ? getName() : "GroupCombine at " + defaultName;
    // distinguish between grouped reduce and non-grouped reduce
    if (grouper == null) {
        // non grouped reduce
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, new int[0], name);
        po.setInput(input);
        // the parallelism for a non grouped reduce can only be 1
        po.setParallelism(1);
        return po;
    }
    if (grouper.getKeys() instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys();
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper;
            SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey();
            Ordering groupOrder = sortedGrouping.getGroupOrdering();
            PlanUnwrappingSortedGroupCombineOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input);
            po.setParallelism(this.getParallelism());
            return po;
        } else {
            PlanUnwrappingGroupCombineOperator<IN, OUT, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getResultType(), name, input);
            po.setParallelism(this.getParallelism());
            return po;
        }
    } else if (grouper.getKeys() instanceof Keys.ExpressionKeys) {
        int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setInput(input);
        po.setParallelism(getParallelism());
        // set group order
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper;
            int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
            Order[] sortOrders = sortedGrouper.getGroupSortOrders();
            Ordering o = new Ordering();
            for (int i = 0; i < sortKeyPositions.length; i++) {
                o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
            }
            po.setGroupOrder(o);
        }
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) GroupCombineOperatorBase(org.apache.flink.api.common.operators.base.GroupCombineOperatorBase) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) Keys(org.apache.flink.api.common.operators.Keys) Ordering(org.apache.flink.api.common.operators.Ordering)

Example 5 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class DistinctOperator method translateToDataFlow.

@Override
protected org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateToDataFlow(Operator<T> input) {
    final ReduceFunction<T> function = new DistinctFunction<>();
    String name = getName() != null ? getName() : "Distinct at " + distinctLocationName;
    if (keys instanceof Keys.ExpressionKeys) {
        int[] logicalKeyPositions = keys.computeLogicalKeyPositions();
        UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        ReduceOperatorBase<T, ReduceFunction<T>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombineHint(hint);
        po.setInput(input);
        po.setParallelism(getParallelism());
        // operate
        if (getType().isTupleType()) {
            SingleInputSemanticProperties sProps = new SingleInputSemanticProperties();
            for (int field : keys.computeLogicalKeyPositions()) {
                sProps.addForwardedField(field, field);
            }
            po.setSemanticProperties(sProps);
        }
        return po;
    } else if (keys instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<T, ?> selectorKeys = (SelectorFunctionKeys<T, ?>) keys;
        org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> po = translateSelectorFunctionDistinct(selectorKeys, function, getResultType(), name, input, parallelism, hint);
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) CombineHint(org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties)

Aggregations

UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)10 Keys (org.apache.flink.api.common.operators.Keys)5 Ordering (org.apache.flink.api.common.operators.Ordering)5 GroupReduceOperatorBase (org.apache.flink.api.common.operators.base.GroupReduceOperatorBase)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 ArrayList (java.util.ArrayList)3 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)3 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)3 SelectorFunctionKeys (org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys)3 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)3 ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)2 MapOperatorBase (org.apache.flink.api.common.operators.base.MapOperatorBase)2 SortPartitionOperatorBase (org.apache.flink.api.common.operators.base.SortPartitionOperatorBase)2 AggregationFunction (org.apache.flink.api.java.aggregation.AggregationFunction)2 Serializable (java.io.Serializable)1 Arrays.asList (java.util.Arrays.asList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Set (java.util.Set)1