Search in sources :

Example 6 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class DistinctOperator method translateToDataFlow.

@Override
protected org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> translateToDataFlow(Operator<T> input) {
    final ReduceFunction<T> function = new DistinctFunction<>();
    String name = getName() != null ? getName() : "Distinct at " + distinctLocationName;
    if (keys instanceof Keys.ExpressionKeys) {
        int[] logicalKeyPositions = keys.computeLogicalKeyPositions();
        UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        ReduceOperatorBase<T, ReduceFunction<T>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombineHint(hint);
        po.setInput(input);
        po.setParallelism(getParallelism());
        // make sure that distinct preserves the partitioning for the fields on which they operate
        if (getType().isTupleType()) {
            SingleInputSemanticProperties sProps = new SingleInputSemanticProperties();
            for (int field : keys.computeLogicalKeyPositions()) {
                sProps.addForwardedField(field, field);
            }
            po.setSemanticProperties(sProps);
        }
        return po;
    } else if (keys instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<T, ?> selectorKeys = (SelectorFunctionKeys<T, ?>) keys;
        org.apache.flink.api.common.operators.SingleInputOperator<?, T, ?> po = translateSelectorFunctionDistinct(selectorKeys, function, getResultType(), name, input, parallelism, hint);
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) CombineHint(org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties)

Example 7 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class GroupCombineOperator method translateToDataFlow.

// --------------------------------------------------------------------------------------------
//  Translation
// --------------------------------------------------------------------------------------------
@Override
protected GroupCombineOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) {
    String name = getName() != null ? getName() : "GroupCombine at " + defaultName;
    // distinguish between grouped reduce and non-grouped reduce
    if (grouper == null) {
        // non grouped reduce
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, new int[0], name);
        po.setInput(input);
        // the parallelism for a non grouped reduce can only be 1
        po.setParallelism(1);
        return po;
    }
    if (grouper.getKeys() instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys();
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper;
            SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey();
            Ordering groupOrder = sortedGrouping.getGroupOrdering();
            PlanUnwrappingSortedGroupCombineOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input);
            po.setParallelism(this.getParallelism());
            return po;
        } else {
            PlanUnwrappingGroupCombineOperator<IN, OUT, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getResultType(), name, input);
            po.setParallelism(this.getParallelism());
            return po;
        }
    } else if (grouper.getKeys() instanceof Keys.ExpressionKeys) {
        int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupCombineOperatorBase<IN, OUT, GroupCombineFunction<IN, OUT>> po = new GroupCombineOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setInput(input);
        po.setParallelism(getParallelism());
        // set group order
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper;
            int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
            Order[] sortOrders = sortedGrouper.getGroupSortOrders();
            Ordering o = new Ordering();
            for (int i = 0; i < sortKeyPositions.length; i++) {
                o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
            }
            po.setGroupOrder(o);
        }
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) GroupCombineFunction(org.apache.flink.api.common.functions.GroupCombineFunction) GroupCombineOperatorBase(org.apache.flink.api.common.operators.base.GroupCombineOperatorBase) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) Keys(org.apache.flink.api.common.operators.Keys) Ordering(org.apache.flink.api.common.operators.Ordering)

Example 8 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class GroupReduceOperator method translateToDataFlow.

// --------------------------------------------------------------------------------------------
//  Translation
// --------------------------------------------------------------------------------------------
@Override
@SuppressWarnings("unchecked")
protected GroupReduceOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) {
    String name = getName() != null ? getName() : "GroupReduce at " + defaultName;
    // wrap CombineFunction in GroupCombineFunction if combinable
    if (combinable && function instanceof CombineFunction<?, ?>) {
        this.function = function instanceof RichGroupReduceFunction<?, ?> ? new RichCombineToGroupCombineWrapper((RichGroupReduceFunction<?, ?>) function) : new CombineToGroupCombineWrapper((CombineFunction<?, ?>) function);
    }
    // distinguish between grouped reduce and non-grouped reduce
    if (grouper == null) {
        // non grouped reduce
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, new int[0], name);
        po.setCombinable(combinable);
        po.setInput(input);
        // the parallelism for a non grouped reduce can only be 1
        po.setParallelism(1);
        return po;
    }
    if (grouper.getKeys() instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys();
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper;
            SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey();
            Ordering groupOrder = sortedGrouping.getGroupOrdering();
            PlanUnwrappingSortedReduceGroupOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input, isCombinable());
            po.setParallelism(this.getParallelism());
            po.setCustomPartitioner(grouper.getCustomPartitioner());
            return po;
        } else {
            PlanUnwrappingReduceGroupOperator<IN, OUT, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getResultType(), name, input, isCombinable());
            po.setParallelism(this.getParallelism());
            po.setCustomPartitioner(grouper.getCustomPartitioner());
            return po;
        }
    } else if (grouper.getKeys() instanceof ExpressionKeys) {
        int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(combinable);
        po.setInput(input);
        po.setParallelism(getParallelism());
        po.setCustomPartitioner(grouper.getCustomPartitioner());
        // set group order
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper;
            int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
            Order[] sortOrders = sortedGrouper.getGroupSortOrders();
            Ordering o = new Ordering();
            for (int i = 0; i < sortKeyPositions.length; i++) {
                o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
            }
            po.setGroupOrder(o);
        }
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}
Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) ExpressionKeys(org.apache.flink.api.common.operators.Keys.ExpressionKeys) RichCombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.RichCombineToGroupCombineWrapper) CombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.CombineToGroupCombineWrapper) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) RichCombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.RichCombineToGroupCombineWrapper) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Ordering(org.apache.flink.api.common.operators.Ordering)

Example 9 with UnaryOperatorInformation

use of org.apache.flink.api.common.operators.UnaryOperatorInformation in project flink by apache.

the class RangePartitionRewriter method rewriteRangePartitionChannel.

private List<Channel> rewriteRangePartitionChannel(Channel channel) {
    final List<Channel> sourceNewOutputChannels = new ArrayList<>();
    final PlanNode sourceNode = channel.getSource();
    final PlanNode targetNode = channel.getTarget();
    final int sourceParallelism = sourceNode.getParallelism();
    final int targetParallelism = targetNode.getParallelism();
    final Costs defaultZeroCosts = new Costs(0, 0, 0);
    final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
    // 1. Fixed size sample in each partitions.
    final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
    final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
    final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
    final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
    final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
    final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
    final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
    final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
    sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
    sipNode.setParallelism(sourceParallelism);
    sipPlanNode.setParallelism(sourceParallelism);
    sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sipPlanNode.setCosts(defaultZeroCosts);
    sipChannel.setTarget(sipPlanNode);
    this.plan.getAllNodes().add(sipPlanNode);
    sourceNewOutputChannels.add(sipChannel);
    // 2. Fixed size sample in a single coordinator.
    final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
    final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
    final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
    final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
    final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
    sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
    sicNode.setParallelism(1);
    sicPlanNode.setParallelism(1);
    sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sicPlanNode.setCosts(defaultZeroCosts);
    sicChannel.setTarget(sicPlanNode);
    sipPlanNode.addOutgoingChannel(sicChannel);
    this.plan.getAllNodes().add(sicPlanNode);
    // 3. Use sampled data to build range boundaries.
    final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
    final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
    final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
    final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
    final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
    final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
    rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
    rbNode.setParallelism(1);
    rbPlanNode.setParallelism(1);
    rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    rbPlanNode.setCosts(defaultZeroCosts);
    rbChannel.setTarget(rbPlanNode);
    sicPlanNode.addOutgoingChannel(rbChannel);
    this.plan.getAllNodes().add(rbPlanNode);
    // 4. Take range boundaries as broadcast input and take the tuple of partition id and record as output.
    final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
    final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
    final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
    final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
    final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
    final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
    // To avoid deadlock, set the DataExchangeMode of channel between source node and this to Batch.
    ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
    final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
    ariNode.setParallelism(sourceParallelism);
    ariPlanNode.setParallelism(sourceParallelism);
    ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    ariPlanNode.setCosts(defaultZeroCosts);
    ariChannel.setTarget(ariPlanNode);
    this.plan.getAllNodes().add(ariPlanNode);
    sourceNewOutputChannels.add(ariChannel);
    final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
    broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
    broadcastChannel.setTarget(ariPlanNode);
    List<NamedChannel> broadcastChannels = new ArrayList<>(1);
    broadcastChannels.add(broadcastChannel);
    ariPlanNode.setBroadcastInputs(broadcastChannels);
    // 5. Remove the partition id.
    final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
    final FieldList keys = new FieldList(0);
    partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
    ariPlanNode.addOutgoingChannel(partChannel);
    final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
    final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
    final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
    final MapNode prRemoverNode = new MapNode(prOperatorBase);
    final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
    partChannel.setTarget(prPlanNode);
    prRemoverNode.setParallelism(targetParallelism);
    prPlanNode.setParallelism(targetParallelism);
    GlobalProperties globalProperties = new GlobalProperties();
    globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
    prPlanNode.initProperties(globalProperties, new LocalProperties());
    prPlanNode.setCosts(defaultZeroCosts);
    this.plan.getAllNodes().add(prPlanNode);
    // 6. Connect to target node.
    channel.setSource(prPlanNode);
    channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    prPlanNode.addOutgoingChannel(channel);
    return sourceNewOutputChannels;
}
Also used : SampleInPartition(org.apache.flink.api.java.functions.SampleInPartition) Costs(org.apache.flink.optimizer.costs.Costs) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) ArrayList(java.util.ArrayList) SampleInCoordinator(org.apache.flink.api.java.functions.SampleInCoordinator) MapNode(org.apache.flink.optimizer.dag.MapNode) RangeBoundaryBuilder(org.apache.flink.runtime.operators.udf.RangeBoundaryBuilder) FieldList(org.apache.flink.api.common.operators.util.FieldList) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) RemoveRangeIndex(org.apache.flink.runtime.operators.udf.RemoveRangeIndex) Ordering(org.apache.flink.api.common.operators.Ordering) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) AssignRangeIndex(org.apache.flink.runtime.operators.udf.AssignRangeIndex) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IntermediateSampleData(org.apache.flink.api.java.sampling.IntermediateSampleData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) CommonRangeBoundaries(org.apache.flink.api.common.distributions.CommonRangeBoundaries)

Aggregations

UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)9 Keys (org.apache.flink.api.common.operators.Keys)5 Ordering (org.apache.flink.api.common.operators.Ordering)5 GroupReduceOperatorBase (org.apache.flink.api.common.operators.base.GroupReduceOperatorBase)4 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)3 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)3 SelectorFunctionKeys (org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys)3 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)3 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 ArrayList (java.util.ArrayList)2 MapOperatorBase (org.apache.flink.api.common.operators.base.MapOperatorBase)2 SortPartitionOperatorBase (org.apache.flink.api.common.operators.base.SortPartitionOperatorBase)2 AggregationFunction (org.apache.flink.api.java.aggregation.AggregationFunction)2 Internal (org.apache.flink.annotation.Internal)1 CommonRangeBoundaries (org.apache.flink.api.common.distributions.CommonRangeBoundaries)1 GroupCombineFunction (org.apache.flink.api.common.functions.GroupCombineFunction)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)1 ExpressionKeys (org.apache.flink.api.common.operators.Keys.ExpressionKeys)1 Order (org.apache.flink.api.common.operators.Order)1