Search in sources :

Example 1 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class RangePartitionRewriter method postVisit.

@Override
public void postVisit(PlanNode node) {
    if (node instanceof IterationPlanNode) {
        IterationPlanNode iNode = (IterationPlanNode) node;
        if (!visitedIterationNodes.contains(iNode)) {
            visitedIterationNodes.add(iNode);
            iNode.acceptForStepFunction(this);
        }
    }
    final Iterable<Channel> inputChannels = node.getInputs();
    for (Channel channel : inputChannels) {
        ShipStrategyType shipStrategy = channel.getShipStrategy();
        // times.
        if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {
            if (channel.getDataDistribution() == null) {
                if (node.isOnDynamicPath()) {
                    throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
                }
                PlanNode channelSource = channel.getSource();
                List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
                channelSource.getOutgoingChannels().remove(channel);
                channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
            }
        }
    }
}
Also used : IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType)

Example 2 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class TwoInputNode method setInput.

@Override
public void setInput(Map<Operator<?>, OptimizerNode> contractToNode, ExecutionMode defaultExecutionMode) {
    // see if there is a hint that dictates which shipping strategy to use for BOTH inputs
    final Configuration conf = getOperator().getParameters();
    ShipStrategyType preSet1 = null;
    ShipStrategyType preSet2 = null;
    String shipStrategy = conf.getString(Optimizer.HINT_SHIP_STRATEGY, null);
    if (shipStrategy != null) {
        if (Optimizer.HINT_SHIP_STRATEGY_FORWARD.equals(shipStrategy)) {
            preSet1 = preSet2 = ShipStrategyType.FORWARD;
        } else if (Optimizer.HINT_SHIP_STRATEGY_BROADCAST.equals(shipStrategy)) {
            preSet1 = preSet2 = ShipStrategyType.BROADCAST;
        } else if (Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH.equals(shipStrategy)) {
            preSet1 = preSet2 = ShipStrategyType.PARTITION_HASH;
        } else if (Optimizer.HINT_SHIP_STRATEGY_REPARTITION_RANGE.equals(shipStrategy)) {
            preSet1 = preSet2 = ShipStrategyType.PARTITION_RANGE;
        } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION)) {
            preSet1 = preSet2 = ShipStrategyType.PARTITION_RANDOM;
        } else {
            throw new CompilerException("Unknown hint for shipping strategy: " + shipStrategy);
        }
    }
    // see if there is a hint that dictates which shipping strategy to use for the FIRST input
    shipStrategy = conf.getString(Optimizer.HINT_SHIP_STRATEGY_FIRST_INPUT, null);
    if (shipStrategy != null) {
        if (Optimizer.HINT_SHIP_STRATEGY_FORWARD.equals(shipStrategy)) {
            preSet1 = ShipStrategyType.FORWARD;
        } else if (Optimizer.HINT_SHIP_STRATEGY_BROADCAST.equals(shipStrategy)) {
            preSet1 = ShipStrategyType.BROADCAST;
        } else if (Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH.equals(shipStrategy)) {
            preSet1 = ShipStrategyType.PARTITION_HASH;
        } else if (Optimizer.HINT_SHIP_STRATEGY_REPARTITION_RANGE.equals(shipStrategy)) {
            preSet1 = ShipStrategyType.PARTITION_RANGE;
        } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION)) {
            preSet1 = ShipStrategyType.PARTITION_RANDOM;
        } else {
            throw new CompilerException("Unknown hint for shipping strategy of input one: " + shipStrategy);
        }
    }
    // see if there is a hint that dictates which shipping strategy to use for the SECOND input
    shipStrategy = conf.getString(Optimizer.HINT_SHIP_STRATEGY_SECOND_INPUT, null);
    if (shipStrategy != null) {
        if (Optimizer.HINT_SHIP_STRATEGY_FORWARD.equals(shipStrategy)) {
            preSet2 = ShipStrategyType.FORWARD;
        } else if (Optimizer.HINT_SHIP_STRATEGY_BROADCAST.equals(shipStrategy)) {
            preSet2 = ShipStrategyType.BROADCAST;
        } else if (Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH.equals(shipStrategy)) {
            preSet2 = ShipStrategyType.PARTITION_HASH;
        } else if (Optimizer.HINT_SHIP_STRATEGY_REPARTITION_RANGE.equals(shipStrategy)) {
            preSet2 = ShipStrategyType.PARTITION_RANGE;
        } else if (shipStrategy.equalsIgnoreCase(Optimizer.HINT_SHIP_STRATEGY_REPARTITION)) {
            preSet2 = ShipStrategyType.PARTITION_RANDOM;
        } else {
            throw new CompilerException("Unknown hint for shipping strategy of input two: " + shipStrategy);
        }
    }
    // get the predecessors
    DualInputOperator<?, ?, ?, ?> contr = getOperator();
    Operator<?> leftPred = contr.getFirstInput();
    Operator<?> rightPred = contr.getSecondInput();
    OptimizerNode pred1;
    DagConnection conn1;
    if (leftPred == null) {
        throw new CompilerException("Error: Node for '" + getOperator().getName() + "' has no input set for first input.");
    } else {
        pred1 = contractToNode.get(leftPred);
        conn1 = new DagConnection(pred1, this, defaultExecutionMode);
        if (preSet1 != null) {
            conn1.setShipStrategy(preSet1);
        }
    }
    // create the connection and add it
    this.input1 = conn1;
    pred1.addOutgoingConnection(conn1);
    OptimizerNode pred2;
    DagConnection conn2;
    if (rightPred == null) {
        throw new CompilerException("Error: Node for '" + getOperator().getName() + "' has no input set for second input.");
    } else {
        pred2 = contractToNode.get(rightPred);
        conn2 = new DagConnection(pred2, this, defaultExecutionMode);
        if (preSet2 != null) {
            conn2.setShipStrategy(preSet2);
        }
    }
    // create the connection and add it
    this.input2 = conn2;
    pred2.addOutgoingConnection(conn2);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) CompilerException(org.apache.flink.optimizer.CompilerException) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType)

Example 3 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class GlobalProperties method parameterizeChannel.

public void parameterizeChannel(Channel channel, boolean globalDopChange, ExecutionMode exchangeMode, boolean breakPipeline) {
    ShipStrategyType shipType;
    FieldList partitionKeys;
    boolean[] sortDirection;
    Partitioner<?> partitioner;
    switch(this.partitioning) {
        case RANDOM_PARTITIONED:
            shipType = globalDopChange ? ShipStrategyType.PARTITION_RANDOM : ShipStrategyType.FORWARD;
            partitionKeys = null;
            sortDirection = null;
            partitioner = null;
            break;
        case FULL_REPLICATION:
            shipType = ShipStrategyType.BROADCAST;
            partitionKeys = null;
            sortDirection = null;
            partitioner = null;
            break;
        case ANY_PARTITIONING:
        case HASH_PARTITIONED:
            shipType = ShipStrategyType.PARTITION_HASH;
            partitionKeys = Utils.createOrderedFromSet(this.partitioningFields);
            sortDirection = null;
            partitioner = null;
            break;
        case RANGE_PARTITIONED:
            shipType = ShipStrategyType.PARTITION_RANGE;
            partitionKeys = this.ordering.getInvolvedIndexes();
            sortDirection = this.ordering.getFieldSortDirections();
            partitioner = null;
            break;
        case FORCED_REBALANCED:
            shipType = ShipStrategyType.PARTITION_RANDOM;
            partitionKeys = null;
            sortDirection = null;
            partitioner = null;
            break;
        case CUSTOM_PARTITIONING:
            shipType = ShipStrategyType.PARTITION_CUSTOM;
            partitionKeys = this.partitioningFields;
            sortDirection = null;
            partitioner = this.customPartitioner;
            break;
        default:
            throw new CompilerException("Unsupported partitioning strategy");
    }
    channel.setDataDistribution(this.distribution);
    DataExchangeMode exMode = DataExchangeMode.select(exchangeMode, shipType, breakPipeline);
    channel.setShipStrategy(shipType, partitionKeys, sortDirection, partitioner, exMode);
}
Also used : DataExchangeMode(org.apache.flink.runtime.io.network.DataExchangeMode) CompilerException(org.apache.flink.optimizer.CompilerException) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) FieldList(org.apache.flink.api.common.operators.util.FieldList)

Example 4 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class RequestedGlobalProperties method parameterizeChannel.

/**
 * Parametrizes the ship strategy fields of a channel such that the channel produces the desired
 * global properties.
 *
 * @param channel The channel to parametrize.
 * @param globalDopChange Flag indicating whether the parallelism changes between sender and
 *     receiver.
 * @param exchangeMode The mode of data exchange (pipelined, always batch, batch only on
 *     shuffle, ...)
 * @param breakPipeline Indicates whether this data exchange should break pipelines (unless
 *     pipelines are forced).
 */
public void parameterizeChannel(Channel channel, boolean globalDopChange, ExecutionMode exchangeMode, boolean breakPipeline) {
    // safety check. Fully replicated input must be preserved.
    if (channel.getSource().getGlobalProperties().isFullyReplicated() && !(this.partitioning == PartitioningProperty.FULL_REPLICATION || this.partitioning == PartitioningProperty.ANY_DISTRIBUTION)) {
        throw new CompilerException("Fully replicated input must be preserved " + "and may not be converted into another global property.");
    }
    // the same, randomly repartition otherwise
    if (isTrivial() || this.partitioning == PartitioningProperty.ANY_DISTRIBUTION) {
        ShipStrategyType shipStrategy = globalDopChange ? ShipStrategyType.PARTITION_RANDOM : ShipStrategyType.FORWARD;
        DataExchangeMode em = DataExchangeMode.select(exchangeMode, shipStrategy, breakPipeline);
        channel.setShipStrategy(shipStrategy, em);
        return;
    }
    final GlobalProperties inGlobals = channel.getSource().getGlobalProperties();
    // properties
    if (!globalDopChange && isMetBy(inGlobals)) {
        DataExchangeMode em = DataExchangeMode.select(exchangeMode, ShipStrategyType.FORWARD, breakPipeline);
        channel.setShipStrategy(ShipStrategyType.FORWARD, em);
        return;
    }
    // if we fall through the conditions until here, we need to re-establish
    ShipStrategyType shipType;
    FieldList partitionKeys;
    boolean[] sortDirection;
    Partitioner<?> partitioner;
    switch(this.partitioning) {
        case FULL_REPLICATION:
            shipType = ShipStrategyType.BROADCAST;
            partitionKeys = null;
            sortDirection = null;
            partitioner = null;
            break;
        case ANY_PARTITIONING:
        case HASH_PARTITIONED:
            shipType = ShipStrategyType.PARTITION_HASH;
            partitionKeys = Utils.createOrderedFromSet(this.partitioningFields);
            sortDirection = null;
            partitioner = null;
            break;
        case RANGE_PARTITIONED:
            shipType = ShipStrategyType.PARTITION_RANGE;
            partitionKeys = this.ordering.getInvolvedIndexes();
            sortDirection = this.ordering.getFieldSortDirections();
            partitioner = null;
            if (this.dataDistribution != null) {
                channel.setDataDistribution(this.dataDistribution);
            }
            break;
        case FORCED_REBALANCED:
            shipType = ShipStrategyType.PARTITION_FORCED_REBALANCE;
            partitionKeys = null;
            sortDirection = null;
            partitioner = null;
            break;
        case CUSTOM_PARTITIONING:
            shipType = ShipStrategyType.PARTITION_CUSTOM;
            partitionKeys = Utils.createOrderedFromSet(this.partitioningFields);
            sortDirection = null;
            partitioner = this.customPartitioner;
            break;
        default:
            throw new CompilerException("Invalid partitioning to create through a data exchange: " + this.partitioning.name());
    }
    DataExchangeMode exMode = DataExchangeMode.select(exchangeMode, shipType, breakPipeline);
    channel.setShipStrategy(shipType, partitionKeys, sortDirection, partitioner, exMode);
}
Also used : DataExchangeMode(org.apache.flink.runtime.io.network.DataExchangeMode) CompilerException(org.apache.flink.optimizer.CompilerException) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) FieldList(org.apache.flink.api.common.operators.util.FieldList)

Example 5 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class SingleInputNode method getAlternativePlans.

@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    // check if we have a cached version
    if (this.cachedPlans != null) {
        return this.cachedPlans;
    }
    boolean childrenSkippedDueToReplicatedInput = false;
    // calculate alternative sub-plans for predecessor
    final List<? extends PlanNode> subPlans = getPredecessorNode().getAlternativePlans(estimator);
    final Set<RequestedGlobalProperties> intGlobal = this.inConn.getInterestingProperties().getGlobalProperties();
    // calculate alternative sub-plans for broadcast inputs
    final List<Set<? extends NamedChannel>> broadcastPlanChannels = new ArrayList<Set<? extends NamedChannel>>();
    List<DagConnection> broadcastConnections = getBroadcastConnections();
    List<String> broadcastConnectionNames = getBroadcastConnectionNames();
    for (int i = 0; i < broadcastConnections.size(); i++) {
        DagConnection broadcastConnection = broadcastConnections.get(i);
        String broadcastConnectionName = broadcastConnectionNames.get(i);
        List<PlanNode> broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);
        // wrap the plan candidates in named channels
        HashSet<NamedChannel> broadcastChannels = new HashSet<NamedChannel>(broadcastPlanCandidates.size());
        for (PlanNode plan : broadcastPlanCandidates) {
            NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
            DataExchangeMode exMode = DataExchangeMode.select(broadcastConnection.getDataExchangeMode(), ShipStrategyType.BROADCAST, broadcastConnection.isBreakingPipeline());
            c.setShipStrategy(ShipStrategyType.BROADCAST, exMode);
            broadcastChannels.add(c);
        }
        broadcastPlanChannels.add(broadcastChannels);
    }
    final RequestedGlobalProperties[] allValidGlobals;
    {
        Set<RequestedGlobalProperties> pairs = new HashSet<RequestedGlobalProperties>();
        for (OperatorDescriptorSingle ods : getPossibleProperties()) {
            pairs.addAll(ods.getPossibleGlobalProperties());
        }
        allValidGlobals = pairs.toArray(new RequestedGlobalProperties[pairs.size()]);
    }
    final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
    final ExecutionMode executionMode = this.inConn.getDataExchangeMode();
    final int parallelism = getParallelism();
    final int inParallelism = getPredecessorNode().getParallelism();
    final boolean parallelismChange = inParallelism != parallelism;
    final boolean breaksPipeline = this.inConn.isBreakingPipeline();
    // create all candidates
    for (PlanNode child : subPlans) {
        if (child.getGlobalProperties().isFullyReplicated()) {
            // changed
            if (parallelismChange) {
                // can not continue with this child
                childrenSkippedDueToReplicatedInput = true;
                continue;
            } else {
                this.inConn.setShipStrategy(ShipStrategyType.FORWARD);
            }
        }
        if (this.inConn.getShipStrategy() == null) {
            // pick the strategy ourselves
            for (RequestedGlobalProperties igps : intGlobal) {
                final Channel c = new Channel(child, this.inConn.getMaterializationMode());
                igps.parameterizeChannel(c, parallelismChange, executionMode, breaksPipeline);
                // ship strategy preserves/establishes them even under changing parallelisms
                if (parallelismChange && !c.getShipStrategy().isNetworkStrategy()) {
                    c.getGlobalProperties().reset();
                }
                // requested properties
                for (RequestedGlobalProperties rgps : allValidGlobals) {
                    if (rgps.isMetBy(c.getGlobalProperties())) {
                        c.setRequiredGlobalProps(rgps);
                        addLocalCandidates(c, broadcastPlanChannels, igps, outputPlans, estimator);
                        break;
                    }
                }
            }
        } else {
            // hint fixed the strategy
            final Channel c = new Channel(child, this.inConn.getMaterializationMode());
            final ShipStrategyType shipStrategy = this.inConn.getShipStrategy();
            final DataExchangeMode exMode = DataExchangeMode.select(executionMode, shipStrategy, breaksPipeline);
            if (this.keys != null) {
                c.setShipStrategy(shipStrategy, this.keys.toFieldList(), exMode);
            } else {
                c.setShipStrategy(shipStrategy, exMode);
            }
            if (parallelismChange) {
                c.adjustGlobalPropertiesForFullParallelismChange();
            }
            // check whether we meet any of the accepted properties
            for (RequestedGlobalProperties rgps : allValidGlobals) {
                if (rgps.isMetBy(c.getGlobalProperties())) {
                    addLocalCandidates(c, broadcastPlanChannels, rgps, outputPlans, estimator);
                    break;
                }
            }
        }
    }
    if (outputPlans.isEmpty()) {
        if (childrenSkippedDueToReplicatedInput) {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
        } else {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
        }
    }
    // cost and prune the plans
    for (PlanNode node : outputPlans) {
        estimator.costOperator(node);
    }
    prunePlanAlternatives(outputPlans);
    outputPlans.trimToSize();
    this.cachedPlans = outputPlans;
    return outputPlans;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) ArrayList(java.util.ArrayList) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) OperatorDescriptorSingle(org.apache.flink.optimizer.operators.OperatorDescriptorSingle) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DataExchangeMode(org.apache.flink.runtime.io.network.DataExchangeMode) CompilerException(org.apache.flink.optimizer.CompilerException) HashSet(java.util.HashSet) RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) ExecutionMode(org.apache.flink.api.common.ExecutionMode) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel)

Aggregations

ShipStrategyType (org.apache.flink.runtime.operators.shipping.ShipStrategyType)23 Plan (org.apache.flink.api.common.Plan)13 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)13 Test (org.junit.Test)13 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)12 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)11 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)11 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)8 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)8 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)8 Path (org.apache.flink.core.fs.Path)8 CompilerException (org.apache.flink.optimizer.CompilerException)8 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)8 Channel (org.apache.flink.optimizer.plan.Channel)5 PlanNode (org.apache.flink.optimizer.plan.PlanNode)5 DataExchangeMode (org.apache.flink.runtime.io.network.DataExchangeMode)5 ArrayList (java.util.ArrayList)4 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)4 Set (java.util.Set)3