Search in sources :

Example 26 with Ordering

use of org.apache.flink.api.common.operators.Ordering in project flink by apache.

the class GroupReduceNode method initPossibleProperties.

private List<OperatorDescriptorSingle> initPossibleProperties(Partitioner<?> customPartitioner) {
    // see if an internal hint dictates the strategy to use
    final Configuration conf = getOperator().getParameters();
    final String localStrategy = conf.getString(Optimizer.HINT_LOCAL_STRATEGY, null);
    final boolean useCombiner;
    if (localStrategy != null) {
        if (Optimizer.HINT_LOCAL_STRATEGY_SORT.equals(localStrategy)) {
            useCombiner = false;
        } else if (Optimizer.HINT_LOCAL_STRATEGY_COMBINING_SORT.equals(localStrategy)) {
            if (!isCombineable()) {
                Optimizer.LOG.warn("Strategy hint for GroupReduce '" + getOperator().getName() + "' requires combinable reduce, but user function is not marked combinable.");
            }
            useCombiner = true;
        } else {
            throw new CompilerException("Invalid local strategy hint for match contract: " + localStrategy);
        }
    } else {
        useCombiner = isCombineable();
    }
    // check if we can work with a grouping (simple reducer), or if we need ordering because of a group order
    Ordering groupOrder = null;
    if (getOperator() instanceof GroupReduceOperatorBase) {
        groupOrder = getOperator().getGroupOrder();
        if (groupOrder != null && groupOrder.getNumberOfFields() == 0) {
            groupOrder = null;
        }
    }
    OperatorDescriptorSingle props = useCombiner ? (this.keys == null ? new AllGroupWithPartialPreGroupProperties() : new GroupReduceWithCombineProperties(this.keys, groupOrder, customPartitioner)) : (this.keys == null ? new AllGroupReduceProperties() : new GroupReduceProperties(this.keys, groupOrder, customPartitioner));
    return Collections.singletonList(props);
}
Also used : OperatorDescriptorSingle(org.apache.flink.optimizer.operators.OperatorDescriptorSingle) AllGroupWithPartialPreGroupProperties(org.apache.flink.optimizer.operators.AllGroupWithPartialPreGroupProperties) Configuration(org.apache.flink.configuration.Configuration) GroupReduceWithCombineProperties(org.apache.flink.optimizer.operators.GroupReduceWithCombineProperties) AllGroupReduceProperties(org.apache.flink.optimizer.operators.AllGroupReduceProperties) Ordering(org.apache.flink.api.common.operators.Ordering) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) CompilerException(org.apache.flink.optimizer.CompilerException) GroupReduceProperties(org.apache.flink.optimizer.operators.GroupReduceProperties) AllGroupReduceProperties(org.apache.flink.optimizer.operators.AllGroupReduceProperties)

Example 27 with Ordering

use of org.apache.flink.api.common.operators.Ordering in project flink by apache.

the class RangePartitionRewriter method rewriteRangePartitionChannel.

private List<Channel> rewriteRangePartitionChannel(Channel channel) {
    final List<Channel> sourceNewOutputChannels = new ArrayList<>();
    final PlanNode sourceNode = channel.getSource();
    final PlanNode targetNode = channel.getTarget();
    final int sourceParallelism = sourceNode.getParallelism();
    final int targetParallelism = targetNode.getParallelism();
    final Costs defaultZeroCosts = new Costs(0, 0, 0);
    final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
    // 1. Fixed size sample in each partitions.
    final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
    final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
    final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
    final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
    final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
    final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
    final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
    final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
    sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
    sipNode.setParallelism(sourceParallelism);
    sipPlanNode.setParallelism(sourceParallelism);
    sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sipPlanNode.setCosts(defaultZeroCosts);
    sipChannel.setTarget(sipPlanNode);
    this.plan.getAllNodes().add(sipPlanNode);
    sourceNewOutputChannels.add(sipChannel);
    // 2. Fixed size sample in a single coordinator.
    final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
    final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
    final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
    final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
    final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
    sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
    sicNode.setParallelism(1);
    sicPlanNode.setParallelism(1);
    sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sicPlanNode.setCosts(defaultZeroCosts);
    sicChannel.setTarget(sicPlanNode);
    sipPlanNode.addOutgoingChannel(sicChannel);
    this.plan.getAllNodes().add(sicPlanNode);
    // 3. Use sampled data to build range boundaries.
    final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
    final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
    final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
    final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
    final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
    final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
    rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
    rbNode.setParallelism(1);
    rbPlanNode.setParallelism(1);
    rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    rbPlanNode.setCosts(defaultZeroCosts);
    rbChannel.setTarget(rbPlanNode);
    sicPlanNode.addOutgoingChannel(rbChannel);
    this.plan.getAllNodes().add(rbPlanNode);
    // 4. Take range boundaries as broadcast input and take the tuple of partition id and record as output.
    final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
    final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
    final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
    final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
    final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
    final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
    // To avoid deadlock, set the DataExchangeMode of channel between source node and this to Batch.
    ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
    final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
    ariNode.setParallelism(sourceParallelism);
    ariPlanNode.setParallelism(sourceParallelism);
    ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    ariPlanNode.setCosts(defaultZeroCosts);
    ariChannel.setTarget(ariPlanNode);
    this.plan.getAllNodes().add(ariPlanNode);
    sourceNewOutputChannels.add(ariChannel);
    final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
    broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
    broadcastChannel.setTarget(ariPlanNode);
    List<NamedChannel> broadcastChannels = new ArrayList<>(1);
    broadcastChannels.add(broadcastChannel);
    ariPlanNode.setBroadcastInputs(broadcastChannels);
    // 5. Remove the partition id.
    final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
    final FieldList keys = new FieldList(0);
    partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
    ariPlanNode.addOutgoingChannel(partChannel);
    final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
    final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
    final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
    final MapNode prRemoverNode = new MapNode(prOperatorBase);
    final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
    partChannel.setTarget(prPlanNode);
    prRemoverNode.setParallelism(targetParallelism);
    prPlanNode.setParallelism(targetParallelism);
    GlobalProperties globalProperties = new GlobalProperties();
    globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
    prPlanNode.initProperties(globalProperties, new LocalProperties());
    prPlanNode.setCosts(defaultZeroCosts);
    this.plan.getAllNodes().add(prPlanNode);
    // 6. Connect to target node.
    channel.setSource(prPlanNode);
    channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    prPlanNode.addOutgoingChannel(channel);
    return sourceNewOutputChannels;
}
Also used : SampleInPartition(org.apache.flink.api.java.functions.SampleInPartition) Costs(org.apache.flink.optimizer.costs.Costs) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) ArrayList(java.util.ArrayList) SampleInCoordinator(org.apache.flink.api.java.functions.SampleInCoordinator) MapNode(org.apache.flink.optimizer.dag.MapNode) RangeBoundaryBuilder(org.apache.flink.runtime.operators.udf.RangeBoundaryBuilder) FieldList(org.apache.flink.api.common.operators.util.FieldList) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) RemoveRangeIndex(org.apache.flink.runtime.operators.udf.RemoveRangeIndex) Ordering(org.apache.flink.api.common.operators.Ordering) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) AssignRangeIndex(org.apache.flink.runtime.operators.udf.AssignRangeIndex) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IntermediateSampleData(org.apache.flink.api.java.sampling.IntermediateSampleData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) CommonRangeBoundaries(org.apache.flink.api.common.distributions.CommonRangeBoundaries)

Example 28 with Ordering

use of org.apache.flink.api.common.operators.Ordering in project flink by apache.

the class LocalProperties method filterBySemanticProperties.

// --------------------------------------------------------------------------------------------
/**
	 * Filters these LocalProperties by the fields that are forwarded to the output
	 * as described by the SemanticProperties.
	 *
	 * @param props The semantic properties holding information about forwarded fields.
	 * @param input The index of the input.
	 * @return The filtered LocalProperties
	 */
public LocalProperties filterBySemanticProperties(SemanticProperties props, int input) {
    if (props == null) {
        throw new NullPointerException("SemanticProperties may not be null.");
    }
    LocalProperties returnProps = new LocalProperties();
    // check if sorting is preserved
    if (this.ordering != null) {
        Ordering newOrdering = new Ordering();
        for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
            int sourceField = this.ordering.getInvolvedIndexes().get(i);
            FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
            if (targetField == null || targetField.size() == 0) {
                if (i == 0) {
                    // order fully destroyed
                    newOrdering = null;
                    break;
                } else {
                    // order partially preserved
                    break;
                }
            } else {
                // use any field of target fields for now.  We should use something like field equivalence sets in the future.
                if (targetField.size() > 1) {
                    LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
                }
                newOrdering.appendOrdering(targetField.toArray()[0], this.ordering.getType(i), this.ordering.getOrder(i));
            }
        }
        returnProps.ordering = newOrdering;
        if (newOrdering != null) {
            returnProps.groupedFields = newOrdering.getInvolvedIndexes();
        } else {
            returnProps.groupedFields = null;
        }
    } else // check if grouping is preserved
    if (this.groupedFields != null) {
        FieldList newGroupedFields = new FieldList();
        for (Integer sourceField : this.groupedFields) {
            FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
            if (targetField == null || targetField.size() == 0) {
                newGroupedFields = null;
                break;
            } else {
                // use any field of target fields for now.  We should use something like field equivalence sets in the future.
                if (targetField.size() > 1) {
                    LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
                }
                newGroupedFields = newGroupedFields.addField(targetField.toArray()[0]);
            }
        }
        returnProps.groupedFields = newGroupedFields;
    }
    if (this.uniqueFields != null) {
        Set<FieldSet> newUniqueFields = new HashSet<FieldSet>();
        for (FieldSet fields : this.uniqueFields) {
            FieldSet newFields = new FieldSet();
            for (Integer sourceField : fields) {
                FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
                if (targetField == null || targetField.size() == 0) {
                    newFields = null;
                    break;
                } else {
                    // use any field of target fields for now.  We should use something like field equivalence sets in the future.
                    if (targetField.size() > 1) {
                        LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
                    }
                    newFields = newFields.addField(targetField.toArray()[0]);
                }
            }
            if (newFields != null) {
                newUniqueFields.add(newFields);
            }
        }
        if (!newUniqueFields.isEmpty()) {
            returnProps.uniqueFields = newUniqueFields;
        } else {
            returnProps.uniqueFields = null;
        }
    }
    return returnProps;
}
Also used : FieldSet(org.apache.flink.api.common.operators.util.FieldSet) Ordering(org.apache.flink.api.common.operators.Ordering) FieldList(org.apache.flink.api.common.operators.util.FieldList) HashSet(java.util.HashSet)

Example 29 with Ordering

use of org.apache.flink.api.common.operators.Ordering in project flink by apache.

the class RequestedGlobalProperties method filterBySemanticProperties.

/**
	 * Filters these properties by what can be preserved by the given SemanticProperties when propagated down
	 * to the given input.
	 *
	 * @param props The SemanticProperties which define which fields are preserved.
	 * @param input The index of the operator's input.
	 * @return The filtered RequestedGlobalProperties
	 */
public RequestedGlobalProperties filterBySemanticProperties(SemanticProperties props, int input) {
    // no semantic properties available. All global properties are filtered.
    if (props == null) {
        throw new NullPointerException("SemanticProperties may not be null.");
    }
    RequestedGlobalProperties rgProp = new RequestedGlobalProperties();
    switch(this.partitioning) {
        case FULL_REPLICATION:
        case FORCED_REBALANCED:
        case CUSTOM_PARTITIONING:
        case RANDOM_PARTITIONED:
        case ANY_DISTRIBUTION:
            // make sure that certain properties are not pushed down
            return null;
        case HASH_PARTITIONED:
        case ANY_PARTITIONING:
            FieldSet newFields;
            if (this.partitioningFields instanceof FieldList) {
                newFields = new FieldList();
            } else {
                newFields = new FieldSet();
            }
            for (Integer targetField : this.partitioningFields) {
                int sourceField = props.getForwardingSourceField(input, targetField);
                if (sourceField >= 0) {
                    newFields = newFields.addField(sourceField);
                } else {
                    // partial partitionings are not preserved to avoid skewed partitioning
                    return null;
                }
            }
            rgProp.partitioning = this.partitioning;
            rgProp.partitioningFields = newFields;
            return rgProp;
        case RANGE_PARTITIONED:
            // range partitioning
            Ordering newOrdering = new Ordering();
            for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
                int value = this.ordering.getInvolvedIndexes().get(i);
                int sourceField = props.getForwardingSourceField(input, value);
                if (sourceField >= 0) {
                    newOrdering.appendOrdering(sourceField, this.ordering.getType(i), this.ordering.getOrder(i));
                } else {
                    return null;
                }
            }
            rgProp.partitioning = this.partitioning;
            rgProp.ordering = newOrdering;
            rgProp.dataDistribution = this.dataDistribution;
            return rgProp;
        default:
            throw new RuntimeException("Unknown partitioning type encountered.");
    }
}
Also used : FieldSet(org.apache.flink.api.common.operators.util.FieldSet) Ordering(org.apache.flink.api.common.operators.Ordering) FieldList(org.apache.flink.api.common.operators.util.FieldList)

Example 30 with Ordering

use of org.apache.flink.api.common.operators.Ordering in project flink by apache.

the class RequestedLocalProperties method filterBySemanticProperties.

// --------------------------------------------------------------------------------------------
/**
	 * Filters these properties by what can be preserved by the given SemanticProperties when propagated down
	 * to the given input.
	 *
	 * @param props The SemanticProperties which define which fields are preserved.
	 * @param input The index of the operator's input.
	 * @return The filtered RequestedLocalProperties
	 */
public RequestedLocalProperties filterBySemanticProperties(SemanticProperties props, int input) {
    // no semantic properties, all local properties are filtered
    if (props == null) {
        throw new NullPointerException("SemanticProperties may not be null.");
    }
    if (this.ordering != null) {
        Ordering newOrdering = new Ordering();
        for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
            int targetField = this.ordering.getInvolvedIndexes().get(i);
            int sourceField = props.getForwardingSourceField(input, targetField);
            if (sourceField >= 0) {
                newOrdering.appendOrdering(sourceField, this.ordering.getType(i), this.ordering.getOrder(i));
            } else {
                return null;
            }
        }
        return new RequestedLocalProperties(newOrdering);
    } else if (this.groupedFields != null) {
        FieldSet newGrouping = new FieldSet();
        // check, whether the local key grouping is preserved
        for (Integer targetField : this.groupedFields) {
            int sourceField = props.getForwardingSourceField(input, targetField);
            if (sourceField >= 0) {
                newGrouping = newGrouping.addField(sourceField);
            } else {
                return null;
            }
        }
        return new RequestedLocalProperties(newGrouping);
    } else {
        return null;
    }
}
Also used : FieldSet(org.apache.flink.api.common.operators.util.FieldSet) Ordering(org.apache.flink.api.common.operators.Ordering)

Aggregations

Ordering (org.apache.flink.api.common.operators.Ordering)52 Test (org.junit.Test)28 FieldList (org.apache.flink.api.common.operators.util.FieldList)24 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)15 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)9 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)9 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)9 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)6 Channel (org.apache.flink.optimizer.plan.Channel)6 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)6 UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)5 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)5 Partitioner (org.apache.flink.api.common.functions.Partitioner)4 Keys (org.apache.flink.api.common.operators.Keys)4 SelectorFunctionKeys (org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys)4 CompilerException (org.apache.flink.optimizer.CompilerException)4 FeedbackPropertiesMeetRequirementsReport (org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport)4 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)4 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)3