Search in sources :

Example 1 with OptimizerNode

use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.

the class JsonMapper method getOptimizerPropertiesJson.

public static String getOptimizerPropertiesJson(JsonFactory jsonFactory, PlanNode node) {
    try {
        final StringWriter writer = new StringWriter(256);
        final JsonGenerator gen = jsonFactory.createGenerator(writer);
        final OptimizerNode optNode = node.getOptimizerNode();
        gen.writeStartObject();
        // global properties
        if (node.getGlobalProperties() != null) {
            GlobalProperties gp = node.getGlobalProperties();
            gen.writeArrayFieldStart("global_properties");
            addProperty(gen, "Partitioning", gp.getPartitioning().name());
            if (gp.getPartitioningFields() != null) {
                addProperty(gen, "Partitioned on", gp.getPartitioningFields().toString());
            }
            if (gp.getPartitioningOrdering() != null) {
                addProperty(gen, "Partitioning Order", gp.getPartitioningOrdering().toString());
            } else {
                addProperty(gen, "Partitioning Order", "(none)");
            }
            if (optNode.getUniqueFields() == null || optNode.getUniqueFields().size() == 0) {
                addProperty(gen, "Uniqueness", "not unique");
            } else {
                addProperty(gen, "Uniqueness", optNode.getUniqueFields().toString());
            }
            gen.writeEndArray();
        }
        // local properties
        if (node.getLocalProperties() != null) {
            LocalProperties lp = node.getLocalProperties();
            gen.writeArrayFieldStart("local_properties");
            if (lp.getOrdering() != null) {
                addProperty(gen, "Order", lp.getOrdering().toString());
            } else {
                addProperty(gen, "Order", "(none)");
            }
            if (lp.getGroupedFields() != null && lp.getGroupedFields().size() > 0) {
                addProperty(gen, "Grouped on", lp.getGroupedFields().toString());
            } else {
                addProperty(gen, "Grouping", "not grouped");
            }
            if (optNode.getUniqueFields() == null || optNode.getUniqueFields().size() == 0) {
                addProperty(gen, "Uniqueness", "not unique");
            } else {
                addProperty(gen, "Uniqueness", optNode.getUniqueFields().toString());
            }
            gen.writeEndArray();
        }
        // output size estimates
        {
            gen.writeArrayFieldStart("estimates");
            addProperty(gen, "Est. Output Size", optNode.getEstimatedOutputSize() == -1 ? "(unknown)" : formatNumber(optNode.getEstimatedOutputSize(), "B"));
            addProperty(gen, "Est. Cardinality", optNode.getEstimatedNumRecords() == -1 ? "(unknown)" : formatNumber(optNode.getEstimatedNumRecords()));
            gen.writeEndArray();
        }
        // output node cost
        if (node.getNodeCosts() != null) {
            gen.writeArrayFieldStart("costs");
            addProperty(gen, "Network", node.getNodeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(node.getNodeCosts().getNetworkCost(), "B"));
            addProperty(gen, "Disk I/O", node.getNodeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(node.getNodeCosts().getDiskCost(), "B"));
            addProperty(gen, "CPU", node.getNodeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(node.getNodeCosts().getCpuCost(), ""));
            addProperty(gen, "Cumulative Network", node.getCumulativeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(node.getCumulativeCosts().getNetworkCost(), "B"));
            addProperty(gen, "Cumulative Disk I/O", node.getCumulativeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(node.getCumulativeCosts().getDiskCost(), "B"));
            addProperty(gen, "Cumulative CPU", node.getCumulativeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(node.getCumulativeCosts().getCpuCost(), ""));
            gen.writeEndArray();
        }
        // compiler hints
        if (optNode.getOperator().getCompilerHints() != null) {
            CompilerHints hints = optNode.getOperator().getCompilerHints();
            CompilerHints defaults = new CompilerHints();
            String size = hints.getOutputSize() == defaults.getOutputSize() ? "(none)" : String.valueOf(hints.getOutputSize());
            String card = hints.getOutputCardinality() == defaults.getOutputCardinality() ? "(none)" : String.valueOf(hints.getOutputCardinality());
            String width = hints.getAvgOutputRecordSize() == defaults.getAvgOutputRecordSize() ? "(none)" : String.valueOf(hints.getAvgOutputRecordSize());
            String filter = hints.getFilterFactor() == defaults.getFilterFactor() ? "(none)" : String.valueOf(hints.getFilterFactor());
            gen.writeArrayFieldStart("compiler_hints");
            addProperty(gen, "Output Size (bytes)", size);
            addProperty(gen, "Output Cardinality", card);
            addProperty(gen, "Avg. Output Record Size (bytes)", width);
            addProperty(gen, "Filter Factor", filter);
            gen.writeEndArray();
        }
        gen.writeEndObject();
        gen.close();
        return writer.toString();
    } catch (Exception e) {
        return "{}";
    }
}
Also used : StringWriter(java.io.StringWriter) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) CompilerHints(org.apache.flink.api.common.operators.CompilerHints) JsonGenerator(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerator) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) IOException(java.io.IOException)

Example 2 with OptimizerNode

use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.

the class PlanNode method setBroadcastInputs.

/**
 * Sets a list of all broadcast inputs attached to this node.
 */
public void setBroadcastInputs(List<NamedChannel> broadcastInputs) {
    if (broadcastInputs != null) {
        this.broadcastInputs = broadcastInputs;
        // update the branch map
        for (NamedChannel nc : broadcastInputs) {
            PlanNode source = nc.getSource();
            mergeBranchPlanMaps(branchPlan, source.branchPlan);
        }
    }
    // do a sanity check that if we are branching, we have now candidates for each branch point
    if (this.template.hasUnclosedBranches()) {
        if (this.branchPlan == null) {
            throw new CompilerException("Branching and rejoining logic did not find a candidate for the branching point.");
        }
        for (UnclosedBranchDescriptor uc : this.template.getOpenBranches()) {
            OptimizerNode brancher = uc.getBranchingNode();
            if (this.branchPlan.get(brancher) == null) {
                throw new CompilerException("Branching and rejoining logic did not find a candidate for the branching point.");
            }
        }
    }
}
Also used : OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) CompilerException(org.apache.flink.optimizer.CompilerException) UnclosedBranchDescriptor(org.apache.flink.optimizer.dag.OptimizerNode.UnclosedBranchDescriptor)

Example 3 with OptimizerNode

use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.

the class WorksetIterationPlanNode method mergeBranchPlanMaps.

protected void mergeBranchPlanMaps() {
    Map<OptimizerNode, PlanNode> branchPlan1 = input1.getSource().branchPlan;
    Map<OptimizerNode, PlanNode> branchPlan2 = input2.getSource().branchPlan;
    // merge the branchPlan maps according the template's uncloseBranchesStack
    if (this.template.hasUnclosedBranches()) {
        if (this.branchPlan == null) {
            this.branchPlan = new HashMap<OptimizerNode, PlanNode>(8);
        }
        for (OptimizerNode.UnclosedBranchDescriptor uc : this.template.getOpenBranches()) {
            OptimizerNode brancher = uc.getBranchingNode();
            PlanNode selectedCandidate = null;
            if (branchPlan1 != null) {
                // predecessor 1 has branching children, see if it got the branch we are looking
                // for
                selectedCandidate = branchPlan1.get(brancher);
            }
            if (selectedCandidate == null && branchPlan2 != null) {
                // predecessor 2 has branching children, see if it got the branch we are looking
                // for
                selectedCandidate = branchPlan2.get(brancher);
            }
            if (selectedCandidate == null && getSolutionSetDeltaPlanNode() != null && getSolutionSetDeltaPlanNode().branchPlan != null) {
                selectedCandidate = getSolutionSetDeltaPlanNode().branchPlan.get(brancher);
            }
            if (selectedCandidate == null && getNextWorkSetPlanNode() != null && getNextWorkSetPlanNode().branchPlan != null) {
                selectedCandidate = getNextWorkSetPlanNode().branchPlan.get(brancher);
            }
            if (selectedCandidate == null) {
                throw new CompilerException("Candidates for a node with open branches are missing information about the selected candidate ");
            }
            this.branchPlan.put(brancher, selectedCandidate);
        }
    }
}
Also used : OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) CompilerException(org.apache.flink.optimizer.CompilerException)

Example 4 with OptimizerNode

use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.

the class PipelineBreakingTest method convertPlan.

private static List<DataSinkNode> convertPlan(Plan p) {
    GraphCreatingVisitor dagCreator = new GraphCreatingVisitor(17, p.getExecutionConfig().getExecutionMode());
    // create the DAG
    p.accept(dagCreator);
    List<DataSinkNode> sinks = dagCreator.getSinks();
    // build a single root and run the branch tracking logic
    OptimizerNode rootNode;
    if (sinks.size() == 1) {
        rootNode = sinks.get(0);
    } else {
        Iterator<DataSinkNode> iter = sinks.iterator();
        rootNode = iter.next();
        while (iter.hasNext()) {
            rootNode = new SinkJoiner(rootNode, iter.next());
        }
    }
    rootNode.accept(new IdAndEstimatesVisitor(null));
    rootNode.accept(new BranchesVisitor());
    return sinks;
}
Also used : OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) BranchesVisitor(org.apache.flink.optimizer.traversals.BranchesVisitor) IdAndEstimatesVisitor(org.apache.flink.optimizer.traversals.IdAndEstimatesVisitor) SinkJoiner(org.apache.flink.optimizer.dag.SinkJoiner) GraphCreatingVisitor(org.apache.flink.optimizer.traversals.GraphCreatingVisitor)

Example 5 with OptimizerNode

use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.

the class GraphCreatingVisitor method postVisit.

@Override
public void postVisit(Operator<?> c) {
    OptimizerNode n = this.con2node.get(c);
    // first connect to the predecessors
    n.setInput(this.con2node, this.defaultDataExchangeMode);
    n.setBroadcastInputs(this.con2node, this.defaultDataExchangeMode);
    // if the node represents a bulk iteration, we recursively translate the data flow now
    if (n instanceof BulkIterationNode) {
        final BulkIterationNode iterNode = (BulkIterationNode) n;
        final BulkIterationBase<?> iter = iterNode.getIterationContract();
        // pass a copy of the no iterative part into the iteration translation,
        // in case the iteration references its closure
        HashMap<Operator<?>, OptimizerNode> closure = new HashMap<Operator<?>, OptimizerNode>(con2node);
        // first, recursively build the data flow for the step function
        final GraphCreatingVisitor recursiveCreator = new GraphCreatingVisitor(this, true, iterNode.getParallelism(), defaultDataExchangeMode, closure);
        BulkPartialSolutionNode partialSolution;
        iter.getNextPartialSolution().accept(recursiveCreator);
        partialSolution = (BulkPartialSolutionNode) recursiveCreator.con2node.get(iter.getPartialSolution());
        OptimizerNode rootOfStepFunction = recursiveCreator.con2node.get(iter.getNextPartialSolution());
        if (partialSolution == null) {
            throw new CompilerException("Error: The step functions result does not depend on the partial solution.");
        }
        OptimizerNode terminationCriterion = null;
        if (iter.getTerminationCriterion() != null) {
            terminationCriterion = recursiveCreator.con2node.get(iter.getTerminationCriterion());
            // missing parts
            if (terminationCriterion == null) {
                iter.getTerminationCriterion().accept(recursiveCreator);
                terminationCriterion = recursiveCreator.con2node.get(iter.getTerminationCriterion());
            }
        }
        iterNode.setPartialSolution(partialSolution);
        iterNode.setNextPartialSolution(rootOfStepFunction, terminationCriterion);
        // go over the contained data flow and mark the dynamic path nodes
        StaticDynamicPathIdentifier identifier = new StaticDynamicPathIdentifier(iterNode.getCostWeight());
        iterNode.acceptForStepFunction(identifier);
    } else if (n instanceof WorksetIterationNode) {
        final WorksetIterationNode iterNode = (WorksetIterationNode) n;
        final DeltaIterationBase<?, ?> iter = iterNode.getIterationContract();
        // we need to ensure that both the next-workset and the solution-set-delta depend on the
        // workset.
        // One check is for free during the translation, we do the other check here as a
        // pre-condition
        {
            StepFunctionValidator wsf = new StepFunctionValidator();
            iter.getNextWorkset().accept(wsf);
            if (!wsf.hasFoundWorkset()) {
                throw new CompilerException("In the given program, the next workset does not depend on the workset. " + "This is a prerequisite in delta iterations.");
            }
        }
        // calculate the closure of the anonymous function
        HashMap<Operator<?>, OptimizerNode> closure = new HashMap<Operator<?>, OptimizerNode>(con2node);
        // first, recursively build the data flow for the step function
        final GraphCreatingVisitor recursiveCreator = new GraphCreatingVisitor(this, true, iterNode.getParallelism(), defaultDataExchangeMode, closure);
        // descend from the solution set delta. check that it depends on both the workset
        // and the solution set. If it does depend on both, this descend should create both
        // nodes
        iter.getSolutionSetDelta().accept(recursiveCreator);
        final WorksetNode worksetNode = (WorksetNode) recursiveCreator.con2node.get(iter.getWorkset());
        if (worksetNode == null) {
            throw new CompilerException("In the given program, the solution set delta does not depend on the workset." + "This is a prerequisite in delta iterations.");
        }
        iter.getNextWorkset().accept(recursiveCreator);
        SolutionSetNode solutionSetNode = (SolutionSetNode) recursiveCreator.con2node.get(iter.getSolutionSet());
        if (solutionSetNode == null || solutionSetNode.getOutgoingConnections() == null || solutionSetNode.getOutgoingConnections().isEmpty()) {
            solutionSetNode = new SolutionSetNode((DeltaIterationBase.SolutionSetPlaceHolder<?>) iter.getSolutionSet(), iterNode);
        } else {
            for (DagConnection conn : solutionSetNode.getOutgoingConnections()) {
                OptimizerNode successor = conn.getTarget();
                if (successor.getClass() == JoinNode.class) {
                    // find out which input to the match the solution set is
                    JoinNode mn = (JoinNode) successor;
                    if (mn.getFirstPredecessorNode() == solutionSetNode) {
                        mn.makeJoinWithSolutionSet(0);
                    } else if (mn.getSecondPredecessorNode() == solutionSetNode) {
                        mn.makeJoinWithSolutionSet(1);
                    } else {
                        throw new CompilerException();
                    }
                } else if (successor.getClass() == CoGroupNode.class) {
                    CoGroupNode cg = (CoGroupNode) successor;
                    if (cg.getFirstPredecessorNode() == solutionSetNode) {
                        cg.makeCoGroupWithSolutionSet(0);
                    } else if (cg.getSecondPredecessorNode() == solutionSetNode) {
                        cg.makeCoGroupWithSolutionSet(1);
                    } else {
                        throw new CompilerException();
                    }
                } else {
                    throw new InvalidProgramException("Error: The only operations allowed on the solution set are Join and CoGroup.");
                }
            }
        }
        final OptimizerNode nextWorksetNode = recursiveCreator.con2node.get(iter.getNextWorkset());
        final OptimizerNode solutionSetDeltaNode = recursiveCreator.con2node.get(iter.getSolutionSetDelta());
        // set the step function nodes to the iteration node
        iterNode.setPartialSolution(solutionSetNode, worksetNode);
        iterNode.setNextPartialSolution(solutionSetDeltaNode, nextWorksetNode, defaultDataExchangeMode);
        // go over the contained data flow and mark the dynamic path nodes
        StaticDynamicPathIdentifier pathIdentifier = new StaticDynamicPathIdentifier(iterNode.getCostWeight());
        iterNode.acceptForStepFunction(pathIdentifier);
    }
}
Also used : Operator(org.apache.flink.api.common.operators.Operator) HashMap(java.util.HashMap) WorksetNode(org.apache.flink.optimizer.dag.WorksetNode) JoinNode(org.apache.flink.optimizer.dag.JoinNode) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) CoGroupNode(org.apache.flink.optimizer.dag.CoGroupNode) BulkIterationNode(org.apache.flink.optimizer.dag.BulkIterationNode) SolutionSetNode(org.apache.flink.optimizer.dag.SolutionSetNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) BulkPartialSolutionNode(org.apache.flink.optimizer.dag.BulkPartialSolutionNode) CompilerException(org.apache.flink.optimizer.CompilerException) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) DagConnection(org.apache.flink.optimizer.dag.DagConnection)

Aggregations

OptimizerNode (org.apache.flink.optimizer.dag.OptimizerNode)10 CompilerException (org.apache.flink.optimizer.CompilerException)5 BulkIterationNode (org.apache.flink.optimizer.dag.BulkIterationNode)3 DataSinkNode (org.apache.flink.optimizer.dag.DataSinkNode)3 WorksetIterationNode (org.apache.flink.optimizer.dag.WorksetIterationNode)3 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)2 CompilerHints (org.apache.flink.api.common.operators.CompilerHints)2 DeltaIterationBase (org.apache.flink.api.common.operators.base.DeltaIterationBase)2 BinaryUnionNode (org.apache.flink.optimizer.dag.BinaryUnionNode)2 BulkPartialSolutionNode (org.apache.flink.optimizer.dag.BulkPartialSolutionNode)2 CoGroupNode (org.apache.flink.optimizer.dag.CoGroupNode)2 DataSourceNode (org.apache.flink.optimizer.dag.DataSourceNode)2 JoinNode (org.apache.flink.optimizer.dag.JoinNode)2 OuterJoinNode (org.apache.flink.optimizer.dag.OuterJoinNode)2 SolutionSetNode (org.apache.flink.optimizer.dag.SolutionSetNode)2 WorksetNode (org.apache.flink.optimizer.dag.WorksetNode)2 PlanNode (org.apache.flink.optimizer.plan.PlanNode)2 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)2 IOException (java.io.IOException)1 StringWriter (java.io.StringWriter)1