Search in sources :

Example 1 with ExecutionMode

use of org.apache.flink.api.common.ExecutionMode in project flink by apache.

the class SingleInputNode method getAlternativePlans.

@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    // check if we have a cached version
    if (this.cachedPlans != null) {
        return this.cachedPlans;
    }
    boolean childrenSkippedDueToReplicatedInput = false;
    // calculate alternative sub-plans for predecessor
    final List<? extends PlanNode> subPlans = getPredecessorNode().getAlternativePlans(estimator);
    final Set<RequestedGlobalProperties> intGlobal = this.inConn.getInterestingProperties().getGlobalProperties();
    // calculate alternative sub-plans for broadcast inputs
    final List<Set<? extends NamedChannel>> broadcastPlanChannels = new ArrayList<Set<? extends NamedChannel>>();
    List<DagConnection> broadcastConnections = getBroadcastConnections();
    List<String> broadcastConnectionNames = getBroadcastConnectionNames();
    for (int i = 0; i < broadcastConnections.size(); i++) {
        DagConnection broadcastConnection = broadcastConnections.get(i);
        String broadcastConnectionName = broadcastConnectionNames.get(i);
        List<PlanNode> broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);
        // wrap the plan candidates in named channels
        HashSet<NamedChannel> broadcastChannels = new HashSet<NamedChannel>(broadcastPlanCandidates.size());
        for (PlanNode plan : broadcastPlanCandidates) {
            NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
            DataExchangeMode exMode = DataExchangeMode.select(broadcastConnection.getDataExchangeMode(), ShipStrategyType.BROADCAST, broadcastConnection.isBreakingPipeline());
            c.setShipStrategy(ShipStrategyType.BROADCAST, exMode);
            broadcastChannels.add(c);
        }
        broadcastPlanChannels.add(broadcastChannels);
    }
    final RequestedGlobalProperties[] allValidGlobals;
    {
        Set<RequestedGlobalProperties> pairs = new HashSet<RequestedGlobalProperties>();
        for (OperatorDescriptorSingle ods : getPossibleProperties()) {
            pairs.addAll(ods.getPossibleGlobalProperties());
        }
        allValidGlobals = pairs.toArray(new RequestedGlobalProperties[pairs.size()]);
    }
    final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
    final ExecutionMode executionMode = this.inConn.getDataExchangeMode();
    final int parallelism = getParallelism();
    final int inParallelism = getPredecessorNode().getParallelism();
    final boolean parallelismChange = inParallelism != parallelism;
    final boolean breaksPipeline = this.inConn.isBreakingPipeline();
    // create all candidates
    for (PlanNode child : subPlans) {
        if (child.getGlobalProperties().isFullyReplicated()) {
            // fully replicated input is always locally forwarded if the parallelism is not changed
            if (parallelismChange) {
                // can not continue with this child
                childrenSkippedDueToReplicatedInput = true;
                continue;
            } else {
                this.inConn.setShipStrategy(ShipStrategyType.FORWARD);
            }
        }
        if (this.inConn.getShipStrategy() == null) {
            // pick the strategy ourselves
            for (RequestedGlobalProperties igps : intGlobal) {
                final Channel c = new Channel(child, this.inConn.getMaterializationMode());
                igps.parameterizeChannel(c, parallelismChange, executionMode, breaksPipeline);
                // ship strategy preserves/establishes them even under changing parallelisms
                if (parallelismChange && !c.getShipStrategy().isNetworkStrategy()) {
                    c.getGlobalProperties().reset();
                }
                // requested properties
                for (RequestedGlobalProperties rgps : allValidGlobals) {
                    if (rgps.isMetBy(c.getGlobalProperties())) {
                        c.setRequiredGlobalProps(rgps);
                        addLocalCandidates(c, broadcastPlanChannels, igps, outputPlans, estimator);
                        break;
                    }
                }
            }
        } else {
            // hint fixed the strategy
            final Channel c = new Channel(child, this.inConn.getMaterializationMode());
            final ShipStrategyType shipStrategy = this.inConn.getShipStrategy();
            final DataExchangeMode exMode = DataExchangeMode.select(executionMode, shipStrategy, breaksPipeline);
            if (this.keys != null) {
                c.setShipStrategy(shipStrategy, this.keys.toFieldList(), exMode);
            } else {
                c.setShipStrategy(shipStrategy, exMode);
            }
            if (parallelismChange) {
                c.adjustGlobalPropertiesForFullParallelismChange();
            }
            // check whether we meet any of the accepted properties
            for (RequestedGlobalProperties rgps : allValidGlobals) {
                if (rgps.isMetBy(c.getGlobalProperties())) {
                    addLocalCandidates(c, broadcastPlanChannels, rgps, outputPlans, estimator);
                    break;
                }
            }
        }
    }
    if (outputPlans.isEmpty()) {
        if (childrenSkippedDueToReplicatedInput) {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
        } else {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
        }
    }
    // cost and prune the plans
    for (PlanNode node : outputPlans) {
        estimator.costOperator(node);
    }
    prunePlanAlternatives(outputPlans);
    outputPlans.trimToSize();
    this.cachedPlans = outputPlans;
    return outputPlans;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) ArrayList(java.util.ArrayList) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) OperatorDescriptorSingle(org.apache.flink.optimizer.operators.OperatorDescriptorSingle) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DataExchangeMode(org.apache.flink.runtime.io.network.DataExchangeMode) CompilerException(org.apache.flink.optimizer.CompilerException) HashSet(java.util.HashSet) RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) ExecutionMode(org.apache.flink.api.common.ExecutionMode) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel)

Example 2 with ExecutionMode

use of org.apache.flink.api.common.ExecutionMode in project flink by apache.

the class Optimizer method compile.

/**
	 * Translates the given program to an OptimizedPlan. The optimized plan describes for each operator
	 * which strategy to use (such as hash join versus sort-merge join), what data exchange method to use
	 * (local pipe forward, shuffle, broadcast), what exchange mode to use (pipelined, batch),
	 * where to cache intermediate results, etc,
	 *
	 * The optimization happens in multiple phases:
	 * <ol>
	 *     <li>Create optimizer dag implementation of the program.
	 *
	 *     <tt>OptimizerNode</tt> representations of the PACTs, assign parallelism and compute size estimates.</li>
	 * <li>Compute interesting properties and auxiliary structures.</li>
	 * <li>Enumerate plan alternatives. This cannot be done in the same step as the interesting property computation (as
	 * opposed to the Database approaches), because we support plans that are not trees.</li>
	 * </ol>
	 * 
	 * @param program The program to be translated.
	 * @param postPasser The function to be used for post passing the optimizer's plan and setting the
	 *                   data type specific serialization routines.
	 * @return The optimized plan.
	 * 
	 * @throws CompilerException
	 *         Thrown, if the plan is invalid or the optimizer encountered an inconsistent
	 *         situation during the compilation process.
	 */
private OptimizedPlan compile(Plan program, OptimizerPostPass postPasser) throws CompilerException {
    if (program == null || postPasser == null) {
        throw new NullPointerException();
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Beginning compilation of program '" + program.getJobName() + '\'');
    }
    final ExecutionMode defaultDataExchangeMode = program.getExecutionConfig().getExecutionMode();
    final int defaultParallelism = program.getDefaultParallelism() > 0 ? program.getDefaultParallelism() : this.defaultParallelism;
    // log the default settings
    LOG.debug("Using a default parallelism of {}", defaultParallelism);
    LOG.debug("Using default data exchange mode {}", defaultDataExchangeMode);
    // the first step in the compilation is to create the optimizer plan representation
    // this step does the following:
    // 1) It creates an optimizer plan node for each operator
    // 2) It connects them via channels
    // 3) It looks for hints about local strategies and channel types and
    // sets the types and strategies accordingly
    // 4) It makes estimates about the data volume of the data sources and
    // propagates those estimates through the plan
    GraphCreatingVisitor graphCreator = new GraphCreatingVisitor(defaultParallelism, defaultDataExchangeMode);
    program.accept(graphCreator);
    // if we have a plan with multiple data sinks, add logical optimizer nodes that have two data-sinks as children
    // each until we have only a single root node. This allows to transparently deal with the nodes with
    // multiple outputs
    OptimizerNode rootNode;
    if (graphCreator.getSinks().size() == 1) {
        rootNode = graphCreator.getSinks().get(0);
    } else if (graphCreator.getSinks().size() > 1) {
        Iterator<DataSinkNode> iter = graphCreator.getSinks().iterator();
        rootNode = iter.next();
        while (iter.hasNext()) {
            rootNode = new SinkJoiner(rootNode, iter.next());
        }
    } else {
        throw new CompilerException("Bug: The optimizer plan representation has no sinks.");
    }
    // now that we have all nodes created and recorded which ones consume memory, tell the nodes their minimal
    // guaranteed memory, for further cost estimations. We assume an equal distribution of memory among consumer tasks
    rootNode.accept(new IdAndEstimatesVisitor(this.statistics));
    // We are dealing with operator DAGs, rather than operator trees.
    // That requires us to deviate at some points from the classical DB optimizer algorithms.
    // This step builds auxiliary structures to help track branches and joins in the DAG
    BranchesVisitor branchingVisitor = new BranchesVisitor();
    rootNode.accept(branchingVisitor);
    // Propagate the interesting properties top-down through the graph
    InterestingPropertyVisitor propsVisitor = new InterestingPropertyVisitor(this.costEstimator);
    rootNode.accept(propsVisitor);
    // perform a sanity check: the root may not have any unclosed branches
    if (rootNode.getOpenBranches() != null && rootNode.getOpenBranches().size() > 0) {
        throw new CompilerException("Bug: Logic for branching plans (non-tree plans) has an error, and does not " + "track the re-joining of branches correctly.");
    }
    // the final step is now to generate the actual plan alternatives
    List<PlanNode> bestPlan = rootNode.getAlternativePlans(this.costEstimator);
    if (bestPlan.size() != 1) {
        throw new CompilerException("Error in compiler: more than one best plan was created!");
    }
    // check if the best plan's root is a data sink (single sink plan)
    // if so, directly take it. if it is a sink joiner node, get its contained sinks
    PlanNode bestPlanRoot = bestPlan.get(0);
    List<SinkPlanNode> bestPlanSinks = new ArrayList<SinkPlanNode>(4);
    if (bestPlanRoot instanceof SinkPlanNode) {
        bestPlanSinks.add((SinkPlanNode) bestPlanRoot);
    } else if (bestPlanRoot instanceof SinkJoinerPlanNode) {
        ((SinkJoinerPlanNode) bestPlanRoot).getDataSinks(bestPlanSinks);
    }
    // finalize the plan
    OptimizedPlan plan = new PlanFinalizer().createFinalPlan(bestPlanSinks, program.getJobName(), program);
    plan.accept(new BinaryUnionReplacer());
    plan.accept(new RangePartitionRewriter(plan));
    // post pass the plan. this is the phase where the serialization and comparator code is set
    postPasser.postPass(plan);
    return plan;
}
Also used : SinkJoinerPlanNode(org.apache.flink.optimizer.plan.SinkJoinerPlanNode) ArrayList(java.util.ArrayList) PlanFinalizer(org.apache.flink.optimizer.traversals.PlanFinalizer) ExecutionMode(org.apache.flink.api.common.ExecutionMode) BinaryUnionReplacer(org.apache.flink.optimizer.traversals.BinaryUnionReplacer) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SinkJoinerPlanNode(org.apache.flink.optimizer.plan.SinkJoinerPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) RangePartitionRewriter(org.apache.flink.optimizer.traversals.RangePartitionRewriter) BranchesVisitor(org.apache.flink.optimizer.traversals.BranchesVisitor) Iterator(java.util.Iterator) InterestingPropertyVisitor(org.apache.flink.optimizer.traversals.InterestingPropertyVisitor) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) IdAndEstimatesVisitor(org.apache.flink.optimizer.traversals.IdAndEstimatesVisitor) SinkJoiner(org.apache.flink.optimizer.dag.SinkJoiner) GraphCreatingVisitor(org.apache.flink.optimizer.traversals.GraphCreatingVisitor)

Example 3 with ExecutionMode

use of org.apache.flink.api.common.ExecutionMode in project flink by apache.

the class BinaryUnionNode method getAlternativePlans.

@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    // check that union has only a single successor
    if (this.getOutgoingConnections().size() > 1) {
        throw new CompilerException("BinaryUnionNode has more than one successor.");
    }
    boolean childrenSkippedDueToReplicatedInput = false;
    // check if we have a cached version
    if (this.cachedPlans != null) {
        return this.cachedPlans;
    }
    // step down to all producer nodes and calculate alternative plans
    final List<? extends PlanNode> subPlans1 = getFirstPredecessorNode().getAlternativePlans(estimator);
    final List<? extends PlanNode> subPlans2 = getSecondPredecessorNode().getAlternativePlans(estimator);
    List<DagConnection> broadcastConnections = getBroadcastConnections();
    if (broadcastConnections != null && broadcastConnections.size() > 0) {
        throw new CompilerException("Found BroadcastVariables on a Union operation");
    }
    final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
    final List<Set<? extends NamedChannel>> broadcastPlanChannels = Collections.emptyList();
    final BinaryUnionOpDescriptor operator = new BinaryUnionOpDescriptor();
    final RequestedLocalProperties noLocalProps = new RequestedLocalProperties();
    final ExecutionMode input1Mode = this.input1.getDataExchangeMode();
    final ExecutionMode input2Mode = this.input2.getDataExchangeMode();
    final int parallelism = getParallelism();
    final int inParallelism1 = getFirstPredecessorNode().getParallelism();
    final int inParallelism2 = getSecondPredecessorNode().getParallelism();
    final boolean dopChange1 = parallelism != inParallelism1;
    final boolean dopChange2 = parallelism != inParallelism2;
    final boolean input1breakPipeline = this.input1.isBreakingPipeline();
    final boolean input2breakPipeline = this.input2.isBreakingPipeline();
    // create all candidates
    for (PlanNode child1 : subPlans1) {
        if (child1.getGlobalProperties().isFullyReplicated()) {
            // fully replicated input is always locally forwarded if parallelism is not changed
            if (dopChange1) {
                // can not continue with this child
                childrenSkippedDueToReplicatedInput = true;
                continue;
            } else {
                this.input1.setShipStrategy(ShipStrategyType.FORWARD);
            }
        }
        for (PlanNode child2 : subPlans2) {
            if (child2.getGlobalProperties().isFullyReplicated()) {
                // fully replicated input is always locally forwarded if parallelism is not changed
                if (dopChange2) {
                    // can not continue with this child
                    childrenSkippedDueToReplicatedInput = true;
                    continue;
                } else {
                    this.input2.setShipStrategy(ShipStrategyType.FORWARD);
                }
            }
            // candidate at the joined branch plan. 
            if (!areBranchCompatible(child1, child2)) {
                continue;
            }
            for (RequestedGlobalProperties igps : this.channelProps) {
                // create a candidate channel for the first input. mark it cached, if the connection says so
                Channel c1 = new Channel(child1, this.input1.getMaterializationMode());
                if (this.input1.getShipStrategy() == null) {
                    // free to choose the ship strategy
                    igps.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
                    // ship strategy preserves/establishes them even under changing parallelisms
                    if (dopChange1 && !c1.getShipStrategy().isNetworkStrategy()) {
                        c1.getGlobalProperties().reset();
                    }
                } else {
                    // ship strategy fixed by compiler hint
                    ShipStrategyType shipStrategy = this.input1.getShipStrategy();
                    DataExchangeMode exMode = DataExchangeMode.select(input1Mode, shipStrategy, input1breakPipeline);
                    if (this.keys1 != null) {
                        c1.setShipStrategy(this.input1.getShipStrategy(), this.keys1.toFieldList(), exMode);
                    } else {
                        c1.setShipStrategy(this.input1.getShipStrategy(), exMode);
                    }
                    if (dopChange1) {
                        c1.adjustGlobalPropertiesForFullParallelismChange();
                    }
                }
                // create a candidate channel for the second input. mark it cached, if the connection says so
                Channel c2 = new Channel(child2, this.input2.getMaterializationMode());
                if (this.input2.getShipStrategy() == null) {
                    // free to choose the ship strategy
                    igps.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
                    // ship strategy preserves/establishes them even under changing parallelisms
                    if (dopChange2 && !c2.getShipStrategy().isNetworkStrategy()) {
                        c2.getGlobalProperties().reset();
                    }
                } else {
                    // ship strategy fixed by compiler hint
                    ShipStrategyType shipStrategy = this.input2.getShipStrategy();
                    DataExchangeMode exMode = DataExchangeMode.select(input2Mode, shipStrategy, input2breakPipeline);
                    if (this.keys2 != null) {
                        c2.setShipStrategy(this.input2.getShipStrategy(), this.keys2.toFieldList(), exMode);
                    } else {
                        c2.setShipStrategy(this.input2.getShipStrategy(), exMode);
                    }
                    if (dopChange2) {
                        c2.adjustGlobalPropertiesForFullParallelismChange();
                    }
                }
                // get the global properties and clear unique fields (not preserved anyways during the union)
                GlobalProperties p1 = c1.getGlobalProperties();
                GlobalProperties p2 = c2.getGlobalProperties();
                p1.clearUniqueFieldCombinations();
                p2.clearUniqueFieldCombinations();
                // partitioned on that field. 
                if (!igps.isTrivial() && !(p1.equals(p2))) {
                    if (c1.getShipStrategy() == ShipStrategyType.FORWARD && c2.getShipStrategy() != ShipStrategyType.FORWARD) {
                        // adjust c2 to c1
                        c2 = c2.clone();
                        p1.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
                    } else if (c2.getShipStrategy() == ShipStrategyType.FORWARD && c1.getShipStrategy() != ShipStrategyType.FORWARD) {
                        // adjust c1 to c2
                        c1 = c1.clone();
                        p2.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
                    } else if (c1.getShipStrategy() == ShipStrategyType.FORWARD && c2.getShipStrategy() == ShipStrategyType.FORWARD) {
                        boolean adjustC1 = c1.getEstimatedOutputSize() <= 0 || c2.getEstimatedOutputSize() <= 0 || c1.getEstimatedOutputSize() <= c2.getEstimatedOutputSize();
                        if (adjustC1) {
                            c2 = c2.clone();
                            p1.parameterizeChannel(c2, dopChange2, input2Mode, input2breakPipeline);
                        } else {
                            c1 = c1.clone();
                            p2.parameterizeChannel(c1, dopChange1, input1Mode, input1breakPipeline);
                        }
                    } else {
                        // excluded by the check that the required strategies must match
                        throw new CompilerException("Bug in Plan Enumeration for Union Node.");
                    }
                }
                instantiate(operator, c1, c2, broadcastPlanChannels, outputPlans, estimator, igps, igps, noLocalProps, noLocalProps);
            }
        }
    }
    if (outputPlans.isEmpty()) {
        if (childrenSkippedDueToReplicatedInput) {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
        } else {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
        }
    }
    // cost and prune the plans
    for (PlanNode node : outputPlans) {
        estimator.costOperator(node);
    }
    prunePlanAlternatives(outputPlans);
    outputPlans.trimToSize();
    this.cachedPlans = outputPlans;
    return outputPlans;
}
Also used : RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) Set(java.util.Set) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) ArrayList(java.util.ArrayList) ExecutionMode(org.apache.flink.api.common.ExecutionMode) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) RequestedLocalProperties(org.apache.flink.optimizer.dataproperties.RequestedLocalProperties) PlanNode(org.apache.flink.optimizer.plan.PlanNode) RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) DataExchangeMode(org.apache.flink.runtime.io.network.DataExchangeMode) CompilerException(org.apache.flink.optimizer.CompilerException) BinaryUnionOpDescriptor(org.apache.flink.optimizer.operators.BinaryUnionOpDescriptor)

Example 4 with ExecutionMode

use of org.apache.flink.api.common.ExecutionMode in project flink by apache.

the class DataSinkNode method getAlternativePlans.

// --------------------------------------------------------------------------------------------
//                                   Recursive Optimization
// --------------------------------------------------------------------------------------------
@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    // check if we have a cached version
    if (this.cachedPlans != null) {
        return this.cachedPlans;
    }
    // calculate alternative sub-plans for predecessor
    List<? extends PlanNode> subPlans = getPredecessorNode().getAlternativePlans(estimator);
    List<PlanNode> outputPlans = new ArrayList<PlanNode>();
    final int parallelism = getParallelism();
    final int inDop = getPredecessorNode().getParallelism();
    final ExecutionMode executionMode = this.input.getDataExchangeMode();
    final boolean dopChange = parallelism != inDop;
    final boolean breakPipeline = this.input.isBreakingPipeline();
    InterestingProperties ips = this.input.getInterestingProperties();
    for (PlanNode p : subPlans) {
        for (RequestedGlobalProperties gp : ips.getGlobalProperties()) {
            for (RequestedLocalProperties lp : ips.getLocalProperties()) {
                Channel c = new Channel(p);
                gp.parameterizeChannel(c, dopChange, executionMode, breakPipeline);
                lp.parameterizeChannel(c);
                c.setRequiredLocalProps(lp);
                c.setRequiredGlobalProps(gp);
                // no need to check whether the created properties meet what we need in case
                // of ordering or global ordering, because the only interesting properties we have
                // are what we require
                outputPlans.add(new SinkPlanNode(this, "DataSink (" + this.getOperator().getName() + ")", c));
            }
        }
    }
    // cost and prune the plans
    for (PlanNode node : outputPlans) {
        estimator.costOperator(node);
    }
    prunePlanAlternatives(outputPlans);
    this.cachedPlans = outputPlans;
    return outputPlans;
}
Also used : RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) RequestedLocalProperties(org.apache.flink.optimizer.dataproperties.RequestedLocalProperties) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) ArrayList(java.util.ArrayList) InterestingProperties(org.apache.flink.optimizer.dataproperties.InterestingProperties) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) ExecutionMode(org.apache.flink.api.common.ExecutionMode)

Example 5 with ExecutionMode

use of org.apache.flink.api.common.ExecutionMode in project flink by apache.

the class TwoInputNode method getAlternativePlans.

@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    // check if we have a cached version
    if (this.cachedPlans != null) {
        return this.cachedPlans;
    }
    boolean childrenSkippedDueToReplicatedInput = false;
    // step down to all producer nodes and calculate alternative plans
    final List<? extends PlanNode> subPlans1 = getFirstPredecessorNode().getAlternativePlans(estimator);
    final List<? extends PlanNode> subPlans2 = getSecondPredecessorNode().getAlternativePlans(estimator);
    // calculate alternative sub-plans for predecessor
    final Set<RequestedGlobalProperties> intGlobal1 = this.input1.getInterestingProperties().getGlobalProperties();
    final Set<RequestedGlobalProperties> intGlobal2 = this.input2.getInterestingProperties().getGlobalProperties();
    // calculate alternative sub-plans for broadcast inputs
    final List<Set<? extends NamedChannel>> broadcastPlanChannels = new ArrayList<Set<? extends NamedChannel>>();
    List<DagConnection> broadcastConnections = getBroadcastConnections();
    List<String> broadcastConnectionNames = getBroadcastConnectionNames();
    for (int i = 0; i < broadcastConnections.size(); i++) {
        DagConnection broadcastConnection = broadcastConnections.get(i);
        String broadcastConnectionName = broadcastConnectionNames.get(i);
        List<PlanNode> broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);
        // wrap the plan candidates in named channels
        HashSet<NamedChannel> broadcastChannels = new HashSet<NamedChannel>(broadcastPlanCandidates.size());
        for (PlanNode plan : broadcastPlanCandidates) {
            final NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
            DataExchangeMode exMode = DataExchangeMode.select(broadcastConnection.getDataExchangeMode(), ShipStrategyType.BROADCAST, broadcastConnection.isBreakingPipeline());
            c.setShipStrategy(ShipStrategyType.BROADCAST, exMode);
            broadcastChannels.add(c);
        }
        broadcastPlanChannels.add(broadcastChannels);
    }
    final GlobalPropertiesPair[] allGlobalPairs;
    final LocalPropertiesPair[] allLocalPairs;
    {
        Set<GlobalPropertiesPair> pairsGlob = new HashSet<GlobalPropertiesPair>();
        Set<LocalPropertiesPair> pairsLoc = new HashSet<LocalPropertiesPair>();
        for (OperatorDescriptorDual ods : getProperties()) {
            pairsGlob.addAll(ods.getPossibleGlobalProperties());
            pairsLoc.addAll(ods.getPossibleLocalProperties());
        }
        allGlobalPairs = pairsGlob.toArray(new GlobalPropertiesPair[pairsGlob.size()]);
        allLocalPairs = pairsLoc.toArray(new LocalPropertiesPair[pairsLoc.size()]);
    }
    final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
    final ExecutionMode input1Mode = this.input1.getDataExchangeMode();
    final ExecutionMode input2Mode = this.input2.getDataExchangeMode();
    final int parallelism = getParallelism();
    final int inParallelism1 = getFirstPredecessorNode().getParallelism();
    final int inParallelism2 = getSecondPredecessorNode().getParallelism();
    final boolean dopChange1 = parallelism != inParallelism1;
    final boolean dopChange2 = parallelism != inParallelism2;
    final boolean input1breaksPipeline = this.input1.isBreakingPipeline();
    final boolean input2breaksPipeline = this.input2.isBreakingPipeline();
    // create all candidates
    for (PlanNode child1 : subPlans1) {
        if (child1.getGlobalProperties().isFullyReplicated()) {
            // fully replicated input is always locally forwarded if parallelism is not changed
            if (dopChange1) {
                // can not continue with this child
                childrenSkippedDueToReplicatedInput = true;
                continue;
            } else {
                this.input1.setShipStrategy(ShipStrategyType.FORWARD);
            }
        }
        for (PlanNode child2 : subPlans2) {
            if (child2.getGlobalProperties().isFullyReplicated()) {
                // fully replicated input is always locally forwarded if parallelism is not changed
                if (dopChange2) {
                    // can not continue with this child
                    childrenSkippedDueToReplicatedInput = true;
                    continue;
                } else {
                    this.input2.setShipStrategy(ShipStrategyType.FORWARD);
                }
            }
            // candidate at the joined branch plan. 
            if (!areBranchCompatible(child1, child2)) {
                continue;
            }
            for (RequestedGlobalProperties igps1 : intGlobal1) {
                // create a candidate channel for the first input. mark it cached, if the connection says so
                final Channel c1 = new Channel(child1, this.input1.getMaterializationMode());
                if (this.input1.getShipStrategy() == null) {
                    // free to choose the ship strategy
                    igps1.parameterizeChannel(c1, dopChange1, input1Mode, input1breaksPipeline);
                    // ship strategy preserves/establishes them even under changing parallelisms
                    if (dopChange1 && !c1.getShipStrategy().isNetworkStrategy()) {
                        c1.getGlobalProperties().reset();
                    }
                } else {
                    // ship strategy fixed by compiler hint
                    ShipStrategyType shipType = this.input1.getShipStrategy();
                    DataExchangeMode exMode = DataExchangeMode.select(input1Mode, shipType, input1breaksPipeline);
                    if (this.keys1 != null) {
                        c1.setShipStrategy(shipType, this.keys1.toFieldList(), exMode);
                    } else {
                        c1.setShipStrategy(shipType, exMode);
                    }
                    if (dopChange1) {
                        c1.adjustGlobalPropertiesForFullParallelismChange();
                    }
                }
                for (RequestedGlobalProperties igps2 : intGlobal2) {
                    // create a candidate channel for the first input. mark it cached, if the connection says so
                    final Channel c2 = new Channel(child2, this.input2.getMaterializationMode());
                    if (this.input2.getShipStrategy() == null) {
                        // free to choose the ship strategy
                        igps2.parameterizeChannel(c2, dopChange2, input2Mode, input2breaksPipeline);
                        // ship strategy preserves/establishes them even under changing parallelisms
                        if (dopChange2 && !c2.getShipStrategy().isNetworkStrategy()) {
                            c2.getGlobalProperties().reset();
                        }
                    } else {
                        // ship strategy fixed by compiler hint
                        ShipStrategyType shipType = this.input2.getShipStrategy();
                        DataExchangeMode exMode = DataExchangeMode.select(input2Mode, shipType, input2breaksPipeline);
                        if (this.keys2 != null) {
                            c2.setShipStrategy(shipType, this.keys2.toFieldList(), exMode);
                        } else {
                            c2.setShipStrategy(shipType, exMode);
                        }
                        if (dopChange2) {
                            c2.adjustGlobalPropertiesForFullParallelismChange();
                        }
                    }
                    outer: for (GlobalPropertiesPair gpp : allGlobalPairs) {
                        if (gpp.getProperties1().isMetBy(c1.getGlobalProperties()) && gpp.getProperties2().isMetBy(c2.getGlobalProperties())) {
                            for (OperatorDescriptorDual desc : getProperties()) {
                                if (desc.areCompatible(gpp.getProperties1(), gpp.getProperties2(), c1.getGlobalProperties(), c2.getGlobalProperties())) {
                                    Channel c1Clone = c1.clone();
                                    c1Clone.setRequiredGlobalProps(gpp.getProperties1());
                                    c2.setRequiredGlobalProps(gpp.getProperties2());
                                    // we form a valid combination, so create the local candidates
                                    // for this
                                    addLocalCandidates(c1Clone, c2, broadcastPlanChannels, igps1, igps2, outputPlans, allLocalPairs, estimator);
                                    break outer;
                                }
                            }
                        }
                    }
                    // so we can stop after the first
                    if (this.input2.getShipStrategy() != null) {
                        break;
                    }
                }
                // so we can stop after the first
                if (this.input1.getShipStrategy() != null) {
                    break;
                }
            }
        }
    }
    if (outputPlans.isEmpty()) {
        if (childrenSkippedDueToReplicatedInput) {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
        } else {
            throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
        }
    }
    // cost and prune the plans
    for (PlanNode node : outputPlans) {
        estimator.costOperator(node);
    }
    prunePlanAlternatives(outputPlans);
    outputPlans.trimToSize();
    this.cachedPlans = outputPlans;
    return outputPlans;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) ArrayList(java.util.ArrayList) GlobalPropertiesPair(org.apache.flink.optimizer.operators.OperatorDescriptorDual.GlobalPropertiesPair) OperatorDescriptorDual(org.apache.flink.optimizer.operators.OperatorDescriptorDual) LocalPropertiesPair(org.apache.flink.optimizer.operators.OperatorDescriptorDual.LocalPropertiesPair) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) DataExchangeMode(org.apache.flink.runtime.io.network.DataExchangeMode) CompilerException(org.apache.flink.optimizer.CompilerException) HashSet(java.util.HashSet) RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) ExecutionMode(org.apache.flink.api.common.ExecutionMode) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel)

Aggregations

ArrayList (java.util.ArrayList)5 ExecutionMode (org.apache.flink.api.common.ExecutionMode)5 PlanNode (org.apache.flink.optimizer.plan.PlanNode)5 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)4 Channel (org.apache.flink.optimizer.plan.Channel)4 Set (java.util.Set)3 CompilerException (org.apache.flink.optimizer.CompilerException)3 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)3 DataExchangeMode (org.apache.flink.runtime.io.network.DataExchangeMode)3 ShipStrategyType (org.apache.flink.runtime.operators.shipping.ShipStrategyType)3 HashSet (java.util.HashSet)2 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)2 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)2 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)2 Iterator (java.util.Iterator)1 OptimizerNode (org.apache.flink.optimizer.dag.OptimizerNode)1 SinkJoiner (org.apache.flink.optimizer.dag.SinkJoiner)1 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)1 InterestingProperties (org.apache.flink.optimizer.dataproperties.InterestingProperties)1 BinaryUnionOpDescriptor (org.apache.flink.optimizer.operators.BinaryUnionOpDescriptor)1