Search in sources :

Example 1 with InterestingPropertyVisitor

use of org.apache.flink.optimizer.traversals.InterestingPropertyVisitor in project flink by apache.

the class Optimizer method compile.

/**
	 * Translates the given program to an OptimizedPlan. The optimized plan describes for each operator
	 * which strategy to use (such as hash join versus sort-merge join), what data exchange method to use
	 * (local pipe forward, shuffle, broadcast), what exchange mode to use (pipelined, batch),
	 * where to cache intermediate results, etc,
	 *
	 * The optimization happens in multiple phases:
	 * <ol>
	 *     <li>Create optimizer dag implementation of the program.
	 *
	 *     <tt>OptimizerNode</tt> representations of the PACTs, assign parallelism and compute size estimates.</li>
	 * <li>Compute interesting properties and auxiliary structures.</li>
	 * <li>Enumerate plan alternatives. This cannot be done in the same step as the interesting property computation (as
	 * opposed to the Database approaches), because we support plans that are not trees.</li>
	 * </ol>
	 * 
	 * @param program The program to be translated.
	 * @param postPasser The function to be used for post passing the optimizer's plan and setting the
	 *                   data type specific serialization routines.
	 * @return The optimized plan.
	 * 
	 * @throws CompilerException
	 *         Thrown, if the plan is invalid or the optimizer encountered an inconsistent
	 *         situation during the compilation process.
	 */
private OptimizedPlan compile(Plan program, OptimizerPostPass postPasser) throws CompilerException {
    if (program == null || postPasser == null) {
        throw new NullPointerException();
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Beginning compilation of program '" + program.getJobName() + '\'');
    }
    final ExecutionMode defaultDataExchangeMode = program.getExecutionConfig().getExecutionMode();
    final int defaultParallelism = program.getDefaultParallelism() > 0 ? program.getDefaultParallelism() : this.defaultParallelism;
    // log the default settings
    LOG.debug("Using a default parallelism of {}", defaultParallelism);
    LOG.debug("Using default data exchange mode {}", defaultDataExchangeMode);
    // the first step in the compilation is to create the optimizer plan representation
    // this step does the following:
    // 1) It creates an optimizer plan node for each operator
    // 2) It connects them via channels
    // 3) It looks for hints about local strategies and channel types and
    // sets the types and strategies accordingly
    // 4) It makes estimates about the data volume of the data sources and
    // propagates those estimates through the plan
    GraphCreatingVisitor graphCreator = new GraphCreatingVisitor(defaultParallelism, defaultDataExchangeMode);
    program.accept(graphCreator);
    // if we have a plan with multiple data sinks, add logical optimizer nodes that have two data-sinks as children
    // each until we have only a single root node. This allows to transparently deal with the nodes with
    // multiple outputs
    OptimizerNode rootNode;
    if (graphCreator.getSinks().size() == 1) {
        rootNode = graphCreator.getSinks().get(0);
    } else if (graphCreator.getSinks().size() > 1) {
        Iterator<DataSinkNode> iter = graphCreator.getSinks().iterator();
        rootNode = iter.next();
        while (iter.hasNext()) {
            rootNode = new SinkJoiner(rootNode, iter.next());
        }
    } else {
        throw new CompilerException("Bug: The optimizer plan representation has no sinks.");
    }
    // now that we have all nodes created and recorded which ones consume memory, tell the nodes their minimal
    // guaranteed memory, for further cost estimations. We assume an equal distribution of memory among consumer tasks
    rootNode.accept(new IdAndEstimatesVisitor(this.statistics));
    // We are dealing with operator DAGs, rather than operator trees.
    // That requires us to deviate at some points from the classical DB optimizer algorithms.
    // This step builds auxiliary structures to help track branches and joins in the DAG
    BranchesVisitor branchingVisitor = new BranchesVisitor();
    rootNode.accept(branchingVisitor);
    // Propagate the interesting properties top-down through the graph
    InterestingPropertyVisitor propsVisitor = new InterestingPropertyVisitor(this.costEstimator);
    rootNode.accept(propsVisitor);
    // perform a sanity check: the root may not have any unclosed branches
    if (rootNode.getOpenBranches() != null && rootNode.getOpenBranches().size() > 0) {
        throw new CompilerException("Bug: Logic for branching plans (non-tree plans) has an error, and does not " + "track the re-joining of branches correctly.");
    }
    // the final step is now to generate the actual plan alternatives
    List<PlanNode> bestPlan = rootNode.getAlternativePlans(this.costEstimator);
    if (bestPlan.size() != 1) {
        throw new CompilerException("Error in compiler: more than one best plan was created!");
    }
    // check if the best plan's root is a data sink (single sink plan)
    // if so, directly take it. if it is a sink joiner node, get its contained sinks
    PlanNode bestPlanRoot = bestPlan.get(0);
    List<SinkPlanNode> bestPlanSinks = new ArrayList<SinkPlanNode>(4);
    if (bestPlanRoot instanceof SinkPlanNode) {
        bestPlanSinks.add((SinkPlanNode) bestPlanRoot);
    } else if (bestPlanRoot instanceof SinkJoinerPlanNode) {
        ((SinkJoinerPlanNode) bestPlanRoot).getDataSinks(bestPlanSinks);
    }
    // finalize the plan
    OptimizedPlan plan = new PlanFinalizer().createFinalPlan(bestPlanSinks, program.getJobName(), program);
    plan.accept(new BinaryUnionReplacer());
    plan.accept(new RangePartitionRewriter(plan));
    // post pass the plan. this is the phase where the serialization and comparator code is set
    postPasser.postPass(plan);
    return plan;
}
Also used : SinkJoinerPlanNode(org.apache.flink.optimizer.plan.SinkJoinerPlanNode) ArrayList(java.util.ArrayList) PlanFinalizer(org.apache.flink.optimizer.traversals.PlanFinalizer) ExecutionMode(org.apache.flink.api.common.ExecutionMode) BinaryUnionReplacer(org.apache.flink.optimizer.traversals.BinaryUnionReplacer) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SinkJoinerPlanNode(org.apache.flink.optimizer.plan.SinkJoinerPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) RangePartitionRewriter(org.apache.flink.optimizer.traversals.RangePartitionRewriter) BranchesVisitor(org.apache.flink.optimizer.traversals.BranchesVisitor) Iterator(java.util.Iterator) InterestingPropertyVisitor(org.apache.flink.optimizer.traversals.InterestingPropertyVisitor) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) IdAndEstimatesVisitor(org.apache.flink.optimizer.traversals.IdAndEstimatesVisitor) SinkJoiner(org.apache.flink.optimizer.dag.SinkJoiner) GraphCreatingVisitor(org.apache.flink.optimizer.traversals.GraphCreatingVisitor)

Example 2 with InterestingPropertyVisitor

use of org.apache.flink.optimizer.traversals.InterestingPropertyVisitor in project flink by apache.

the class BulkIterationNode method computeInterestingPropertiesForInputs.

@Override
public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
    final InterestingProperties intProps = getInterestingProperties().clone();
    if (this.terminationCriterion != null) {
        // first propagate through termination Criterion. since it has no successors, it has no
        // interesting properties
        this.terminationCriterionRootConnection.setInterestingProperties(new InterestingProperties());
        this.terminationCriterion.accept(new InterestingPropertyVisitor(estimator));
    }
    // we need to make 2 interesting property passes, because the root of the step function needs also
    // the interesting properties as generated by the partial solution
    // give our own interesting properties (as generated by the iterations successors) to the step function and
    // make the first pass
    this.rootConnection.setInterestingProperties(intProps);
    this.nextPartialSolution.accept(new InterestingPropertyVisitor(estimator));
    // take the interesting properties of the partial solution and add them to the root interesting properties
    InterestingProperties partialSolutionIntProps = this.partialSolution.getInterestingProperties();
    intProps.getGlobalProperties().addAll(partialSolutionIntProps.getGlobalProperties());
    intProps.getLocalProperties().addAll(partialSolutionIntProps.getLocalProperties());
    // clear all interesting properties to prepare the second traversal
    // this clears only the path down from the next partial solution. The paths down
    // from the termination criterion (before they meet the paths down from the next partial solution)
    // remain unaffected by this step
    this.rootConnection.clearInterestingProperties();
    this.nextPartialSolution.accept(InterestingPropertiesClearer.INSTANCE);
    // 2nd pass
    this.rootConnection.setInterestingProperties(intProps);
    this.nextPartialSolution.accept(new InterestingPropertyVisitor(estimator));
    // now add the interesting properties of the partial solution to the input
    final InterestingProperties inProps = this.partialSolution.getInterestingProperties().clone();
    inProps.addGlobalProperties(new RequestedGlobalProperties());
    inProps.addLocalProperties(new RequestedLocalProperties());
    this.inConn.setInterestingProperties(inProps);
}
Also used : RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) RequestedLocalProperties(org.apache.flink.optimizer.dataproperties.RequestedLocalProperties) InterestingProperties(org.apache.flink.optimizer.dataproperties.InterestingProperties) InterestingPropertyVisitor(org.apache.flink.optimizer.traversals.InterestingPropertyVisitor)

Example 3 with InterestingPropertyVisitor

use of org.apache.flink.optimizer.traversals.InterestingPropertyVisitor in project flink by apache.

the class WorksetIterationNode method computeInterestingPropertiesForInputs.

@Override
public void computeInterestingPropertiesForInputs(CostEstimator estimator) {
    // our own solution (the solution set) is always partitioned and this cannot be adjusted
    // depending on what the successor to the workset iteration requests. for that reason,
    // we ignore incoming interesting properties.
    // in addition, we need to make 2 interesting property passes, because the root of the step function 
    // that computes the next workset needs the interesting properties as generated by the
    // workset source of the step function. the second pass concerns only the workset path.
    // as initial interesting properties, we have the trivial ones for the step function,
    // and partitioned on the solution set key for the solution set delta 
    RequestedGlobalProperties partitionedProperties = new RequestedGlobalProperties();
    partitionedProperties.setHashPartitioned(this.solutionSetKeyFields);
    InterestingProperties partitionedIP = new InterestingProperties();
    partitionedIP.addGlobalProperties(partitionedProperties);
    partitionedIP.addLocalProperties(new RequestedLocalProperties());
    this.nextWorksetRootConnection.setInterestingProperties(new InterestingProperties());
    this.solutionSetDeltaRootConnection.setInterestingProperties(partitionedIP.clone());
    InterestingPropertyVisitor ipv = new InterestingPropertyVisitor(estimator);
    this.nextWorkset.accept(ipv);
    this.solutionSetDelta.accept(ipv);
    // take the interesting properties of the partial solution and add them to the root interesting properties
    InterestingProperties worksetIntProps = this.worksetNode.getInterestingProperties();
    InterestingProperties intProps = new InterestingProperties();
    intProps.getGlobalProperties().addAll(worksetIntProps.getGlobalProperties());
    intProps.getLocalProperties().addAll(worksetIntProps.getLocalProperties());
    // clear all interesting properties to prepare the second traversal
    this.nextWorksetRootConnection.clearInterestingProperties();
    this.nextWorkset.accept(InterestingPropertiesClearer.INSTANCE);
    // 2nd pass
    this.nextWorksetRootConnection.setInterestingProperties(intProps);
    this.nextWorkset.accept(ipv);
    // now add the interesting properties of the workset to the workset input
    final InterestingProperties inProps = this.worksetNode.getInterestingProperties().clone();
    inProps.addGlobalProperties(new RequestedGlobalProperties());
    inProps.addLocalProperties(new RequestedLocalProperties());
    this.input2.setInterestingProperties(inProps);
    // the partial solution must be hash partitioned, so it has only that as interesting properties
    this.input1.setInterestingProperties(partitionedIP);
}
Also used : RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) RequestedLocalProperties(org.apache.flink.optimizer.dataproperties.RequestedLocalProperties) InterestingProperties(org.apache.flink.optimizer.dataproperties.InterestingProperties) InterestingPropertyVisitor(org.apache.flink.optimizer.traversals.InterestingPropertyVisitor)

Aggregations

InterestingPropertyVisitor (org.apache.flink.optimizer.traversals.InterestingPropertyVisitor)3 InterestingProperties (org.apache.flink.optimizer.dataproperties.InterestingProperties)2 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)2 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)2 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1 ExecutionMode (org.apache.flink.api.common.ExecutionMode)1 OptimizerNode (org.apache.flink.optimizer.dag.OptimizerNode)1 SinkJoiner (org.apache.flink.optimizer.dag.SinkJoiner)1 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)1 PlanNode (org.apache.flink.optimizer.plan.PlanNode)1 SinkJoinerPlanNode (org.apache.flink.optimizer.plan.SinkJoinerPlanNode)1 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)1 BinaryUnionReplacer (org.apache.flink.optimizer.traversals.BinaryUnionReplacer)1 BranchesVisitor (org.apache.flink.optimizer.traversals.BranchesVisitor)1 GraphCreatingVisitor (org.apache.flink.optimizer.traversals.GraphCreatingVisitor)1 IdAndEstimatesVisitor (org.apache.flink.optimizer.traversals.IdAndEstimatesVisitor)1 PlanFinalizer (org.apache.flink.optimizer.traversals.PlanFinalizer)1 RangePartitionRewriter (org.apache.flink.optimizer.traversals.RangePartitionRewriter)1