Search in sources :

Example 16 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class JobGraphGenerator method finalizeBulkIteration.

private void finalizeBulkIteration(IterationDescriptor descr) {
    final BulkIterationPlanNode bulkNode = (BulkIterationPlanNode) descr.getIterationNode();
    final JobVertex headVertex = descr.getHeadTask();
    final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
    final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
    // ------------ finalize the head config with the final outputs and the sync gate ------------
    final int numStepFunctionOuts = headConfig.getNumOutputs();
    final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
    if (numStepFunctionOuts == 0) {
        throw new CompilerException("The iteration has no operation inside the step function.");
    }
    headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
    headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
    final double relativeMemForBackChannel = bulkNode.getRelativeMemoryPerSubTask();
    if (relativeMemForBackChannel <= 0) {
        throw new CompilerException("Bug: No memory has been assigned to the iteration back channel.");
    }
    headConfig.setRelativeBackChannelMemory(relativeMemForBackChannel);
    // --------------------------- create the sync task ---------------------------
    final JobVertex sync = new JobVertex("Sync(" + bulkNode.getNodeName() + ")");
    sync.setResources(bulkNode.getMinResources(), bulkNode.getPreferredResources());
    sync.setInvokableClass(IterationSynchronizationSinkTask.class);
    sync.setParallelism(1);
    sync.setMaxParallelism(1);
    this.auxVertices.add(sync);
    final TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
    syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getParallelism());
    // set the number of iteration / convergence criterion for the sync
    final int maxNumIterations = bulkNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
    if (maxNumIterations < 1) {
        throw new CompilerException("Cannot create bulk iteration with unspecified maximum number of iterations.");
    }
    syncConfig.setNumberOfIterations(maxNumIterations);
    // connect the sync task
    sync.connectNewDataSetAsInput(headVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    // ----------------------------- create the iteration tail ------------------------------
    final PlanNode rootOfTerminationCriterion = bulkNode.getRootOfTerminationCriterion();
    final PlanNode rootOfStepFunction = bulkNode.getRootOfStepFunction();
    final TaskConfig tailConfig;
    JobVertex rootOfStepFunctionVertex = this.vertices.get(rootOfStepFunction);
    if (rootOfStepFunctionVertex == null) {
        // last op is chained
        final TaskInChain taskInChain = this.chainedTasks.get(rootOfStepFunction);
        if (taskInChain == null) {
            throw new CompilerException("Bug: Tail of step function not found as vertex or chained task.");
        }
        rootOfStepFunctionVertex = taskInChain.getContainingVertex();
        // the fake channel is statically typed to pact record. no data is sent over this channel anyways.
        tailConfig = taskInChain.getTaskConfig();
    } else {
        tailConfig = new TaskConfig(rootOfStepFunctionVertex.getConfiguration());
    }
    tailConfig.setIsWorksetUpdate();
    // No following termination criterion
    if (rootOfStepFunction.getOutgoingChannels().isEmpty()) {
        rootOfStepFunctionVertex.setInvokableClass(IterationTailTask.class);
        tailConfig.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
    }
    // create the fake output task for termination criterion, if needed
    final TaskConfig tailConfigOfTerminationCriterion;
    // If we have a termination criterion and it is not an intermediate node
    if (rootOfTerminationCriterion != null && rootOfTerminationCriterion.getOutgoingChannels().isEmpty()) {
        JobVertex rootOfTerminationCriterionVertex = this.vertices.get(rootOfTerminationCriterion);
        if (rootOfTerminationCriterionVertex == null) {
            // last op is chained
            final TaskInChain taskInChain = this.chainedTasks.get(rootOfTerminationCriterion);
            if (taskInChain == null) {
                throw new CompilerException("Bug: Tail of termination criterion not found as vertex or chained task.");
            }
            rootOfTerminationCriterionVertex = taskInChain.getContainingVertex();
            // the fake channel is statically typed to pact record. no data is sent over this channel anyways.
            tailConfigOfTerminationCriterion = taskInChain.getTaskConfig();
        } else {
            tailConfigOfTerminationCriterion = new TaskConfig(rootOfTerminationCriterionVertex.getConfiguration());
        }
        rootOfTerminationCriterionVertex.setInvokableClass(IterationTailTask.class);
        // Hack
        tailConfigOfTerminationCriterion.setIsSolutionSetUpdate();
        tailConfigOfTerminationCriterion.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
        // tell the head that it needs to wait for the solution set updates
        headConfig.setWaitForSolutionSetUpdate();
    }
    // ------------------- register the aggregators -------------------
    AggregatorRegistry aggs = bulkNode.getIterationNode().getIterationContract().getAggregators();
    Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
    headConfig.addIterationAggregators(allAggregators);
    syncConfig.addIterationAggregators(allAggregators);
    String convAggName = aggs.getConvergenceCriterionAggregatorName();
    ConvergenceCriterion<?> convCriterion = aggs.getConvergenceCriterion();
    if (convCriterion != null || convAggName != null) {
        if (convCriterion == null) {
            throw new CompilerException("Error: Convergence criterion aggregator set, but criterion is null.");
        }
        if (convAggName == null) {
            throw new CompilerException("Error: Aggregator convergence criterion set, but aggregator is null.");
        }
        syncConfig.setConvergenceCriterion(convAggName, convCriterion);
    }
}
Also used : TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) AggregatorWithName(org.apache.flink.api.common.aggregators.AggregatorWithName) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) AggregatorRegistry(org.apache.flink.api.common.aggregators.AggregatorRegistry)

Example 17 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class JobGraphGenerator method preVisit.

/**
	 * This methods implements the pre-visiting during a depth-first traversal. It create the job vertex and
	 * sets local strategy.
	 * 
	 * @param node
	 *        The node that is currently processed.
	 * @return True, if the visitor should descend to the node's children, false if not.
	 * @see org.apache.flink.util.Visitor#preVisit(org.apache.flink.util.Visitable)
	 */
@Override
public boolean preVisit(PlanNode node) {
    // check if we have visited this node before. in non-tree graphs, this happens
    if (this.vertices.containsKey(node) || this.chainedTasks.containsKey(node) || this.iterations.containsKey(node)) {
        // return false to prevent further descend
        return false;
    }
    // the vertex to be created for the current node
    final JobVertex vertex;
    try {
        if (node instanceof SinkPlanNode) {
            vertex = createDataSinkVertex((SinkPlanNode) node);
        } else if (node instanceof SourcePlanNode) {
            vertex = createDataSourceVertex((SourcePlanNode) node);
        } else if (node instanceof BulkIterationPlanNode) {
            BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
            // for the bulk iteration, we skip creating anything for now. we create the graph
            // for the step function in the post visit.
            // check that the root of the step function has the same parallelism as the iteration.
            // because the tail must have the same parallelism as the head, we can only merge the last
            // operator with the tail, if they have the same parallelism. not merging is currently not
            // implemented
            PlanNode root = iterationNode.getRootOfStepFunction();
            if (root.getParallelism() != node.getParallelism()) {
                throw new CompilerException("Error: The final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
            this.iterations.put(iterationNode, descr);
            vertex = null;
        } else if (node instanceof WorksetIterationPlanNode) {
            WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
            // we have the same constraints as for the bulk iteration
            PlanNode nextWorkSet = iterationNode.getNextWorkSetPlanNode();
            PlanNode solutionSetDelta = iterationNode.getSolutionSetDeltaPlanNode();
            if (nextWorkSet.getParallelism() != node.getParallelism()) {
                throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            if (solutionSetDelta.getParallelism() != node.getParallelism()) {
                throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
            }
            IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
            this.iterations.put(iterationNode, descr);
            vertex = null;
        } else if (node instanceof SingleInputPlanNode) {
            vertex = createSingleInputVertex((SingleInputPlanNode) node);
        } else if (node instanceof DualInputPlanNode) {
            vertex = createDualInputVertex((DualInputPlanNode) node);
        } else if (node instanceof NAryUnionPlanNode) {
            // skip the union for now
            vertex = null;
        } else if (node instanceof BulkPartialSolutionPlanNode) {
            // create a head node (or not, if it is merged into its successor)
            vertex = createBulkIterationHead((BulkPartialSolutionPlanNode) node);
        } else if (node instanceof SolutionSetPlanNode) {
            // we adjust the joins / cogroups that go into the solution set here
            for (Channel c : node.getOutgoingChannels()) {
                DualInputPlanNode target = (DualInputPlanNode) c.getTarget();
                JobVertex accessingVertex = this.vertices.get(target);
                TaskConfig conf = new TaskConfig(accessingVertex.getConfiguration());
                int inputNum = c == target.getInput1() ? 0 : c == target.getInput2() ? 1 : -1;
                // sanity checks
                if (inputNum == -1) {
                    throw new CompilerException();
                }
                // adjust the driver
                if (conf.getDriver().equals(JoinDriver.class)) {
                    conf.setDriver(inputNum == 0 ? JoinWithSolutionSetFirstDriver.class : JoinWithSolutionSetSecondDriver.class);
                } else if (conf.getDriver().equals(CoGroupDriver.class)) {
                    conf.setDriver(inputNum == 0 ? CoGroupWithSolutionSetFirstDriver.class : CoGroupWithSolutionSetSecondDriver.class);
                } else {
                    throw new CompilerException("Found join with solution set using incompatible operator (only Join/CoGroup are valid).");
                }
            }
            // make sure we do not visit this node again. for that, we add a 'already seen' entry into one of the sets
            this.chainedTasks.put(node, ALREADY_VISITED_PLACEHOLDER);
            vertex = null;
        } else if (node instanceof WorksetPlanNode) {
            // create the iteration head here
            vertex = createWorksetIterationHead((WorksetPlanNode) node);
        } else {
            throw new CompilerException("Unrecognized node type: " + node.getClass().getName());
        }
    } catch (Exception e) {
        throw new CompilerException("Error translating node '" + node + "': " + e.getMessage(), e);
    }
    // check if a vertex was created, or if it was chained or skipped
    if (vertex != null) {
        // set parallelism
        int pd = node.getParallelism();
        vertex.setParallelism(pd);
        vertex.setMaxParallelism(pd);
        vertex.setSlotSharingGroup(sharingGroup);
        // check whether this vertex is part of an iteration step function
        if (this.currentIteration != null) {
            // check that the task has the same parallelism as the iteration as such
            PlanNode iterationNode = (PlanNode) this.currentIteration;
            if (iterationNode.getParallelism() < pd) {
                throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, parallelism than the iteration operator.");
            }
            // store the id of the iterations the step functions participate in
            IterationDescriptor descr = this.iterations.get(this.currentIteration);
            new TaskConfig(vertex.getConfiguration()).setIterationId(descr.getId());
        }
        // store in the map
        this.vertices.put(node, vertex);
    }
    // returning true causes deeper descend
    return true;
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) JoinWithSolutionSetFirstDriver(org.apache.flink.runtime.operators.JoinWithSolutionSetFirstDriver) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) JoinWithSolutionSetSecondDriver(org.apache.flink.runtime.operators.JoinWithSolutionSetSecondDriver) IOException(java.io.IOException) CompilerException(org.apache.flink.optimizer.CompilerException) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) CoGroupDriver(org.apache.flink.runtime.operators.CoGroupDriver)

Example 18 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class WorksetIterationNode method instantiate.

@SuppressWarnings("unchecked")
@Override
protected void instantiate(OperatorDescriptorDual operator, Channel solutionSetIn, Channel worksetIn, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReqSolutionSet, RequestedGlobalProperties globPropsReqWorkset, RequestedLocalProperties locPropsReqSolutionSet, RequestedLocalProperties locPropsReqWorkset) {
    // check for pipeline breaking using hash join with build on the solution set side
    placePipelineBreakersIfNecessary(DriverStrategy.HYBRIDHASH_BUILD_FIRST, solutionSetIn, worksetIn);
    // NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
    // Whenever we instantiate the iteration, we enumerate new candidates for the step function.
    // That way, we make sure we have an appropriate plan for each candidate for the initial partial solution,
    // we have a fitting candidate for the step function (often, work is pushed out of the step function).
    // Among the candidates of the step function, we keep only those that meet the requested properties of the
    // current candidate initial partial solution. That makes sure these properties exist at the beginning of
    // every iteration.
    // 1) Because we enumerate multiple times, we may need to clean the cached plans
    //    before starting another enumeration
    this.nextWorkset.accept(PlanCacheCleaner.INSTANCE);
    this.solutionSetDelta.accept(PlanCacheCleaner.INSTANCE);
    // 2) Give the partial solution the properties of the current candidate for the initial partial solution
    //    This concerns currently only the workset.
    this.worksetNode.setCandidateProperties(worksetIn.getGlobalProperties(), worksetIn.getLocalProperties(), worksetIn);
    this.solutionSetNode.setCandidateProperties(this.partitionedProperties, new LocalProperties(), solutionSetIn);
    final SolutionSetPlanNode sspn = this.solutionSetNode.getCurrentSolutionSetPlanNode();
    final WorksetPlanNode wspn = this.worksetNode.getCurrentWorksetPlanNode();
    // 3) Get the alternative plans
    List<PlanNode> solutionSetDeltaCandidates = this.solutionSetDelta.getAlternativePlans(estimator);
    List<PlanNode> worksetCandidates = this.nextWorkset.getAlternativePlans(estimator);
    // 4) Throw away all that are not compatible with the properties currently requested to the
    //    initial partial solution
    // Make sure that the workset candidates fulfill the input requirements
    {
        List<PlanNode> newCandidates = new ArrayList<PlanNode>();
        for (Iterator<PlanNode> planDeleter = worksetCandidates.iterator(); planDeleter.hasNext(); ) {
            PlanNode candidate = planDeleter.next();
            GlobalProperties atEndGlobal = candidate.getGlobalProperties();
            LocalProperties atEndLocal = candidate.getLocalProperties();
            FeedbackPropertiesMeetRequirementsReport report = candidate.checkPartialSolutionPropertiesMet(wspn, atEndGlobal, atEndLocal);
            if (report == FeedbackPropertiesMeetRequirementsReport.NO_PARTIAL_SOLUTION) {
            // depends only through broadcast variable on the workset solution
            } else if (report == FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
                // attach a no-op node through which we create the properties of the original input
                Channel toNoOp = new Channel(candidate);
                globPropsReqWorkset.parameterizeChannel(toNoOp, false, nextWorksetRootConnection.getDataExchangeMode(), false);
                locPropsReqWorkset.parameterizeChannel(toNoOp);
                NoOpUnaryUdfOp noOpUnaryUdfOp = new NoOpUnaryUdfOp<>();
                noOpUnaryUdfOp.setInput(candidate.getProgramOperator());
                UnaryOperatorNode rebuildWorksetPropertiesNode = new UnaryOperatorNode("Rebuild Workset Properties", noOpUnaryUdfOp, true);
                rebuildWorksetPropertiesNode.setParallelism(candidate.getParallelism());
                SingleInputPlanNode rebuildWorksetPropertiesPlanNode = new SingleInputPlanNode(rebuildWorksetPropertiesNode, "Rebuild Workset Properties", toNoOp, DriverStrategy.UNARY_NO_OP);
                rebuildWorksetPropertiesPlanNode.initProperties(toNoOp.getGlobalProperties(), toNoOp.getLocalProperties());
                estimator.costOperator(rebuildWorksetPropertiesPlanNode);
                GlobalProperties atEndGlobalModified = rebuildWorksetPropertiesPlanNode.getGlobalProperties();
                LocalProperties atEndLocalModified = rebuildWorksetPropertiesPlanNode.getLocalProperties();
                if (!(atEndGlobalModified.equals(atEndGlobal) && atEndLocalModified.equals(atEndLocal))) {
                    FeedbackPropertiesMeetRequirementsReport report2 = candidate.checkPartialSolutionPropertiesMet(wspn, atEndGlobalModified, atEndLocalModified);
                    if (report2 != FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
                        newCandidates.add(rebuildWorksetPropertiesPlanNode);
                    }
                }
                // remove the original operator and add the modified candidate
                planDeleter.remove();
            }
        }
        worksetCandidates.addAll(newCandidates);
    }
    if (worksetCandidates.isEmpty()) {
        return;
    }
    // sanity check the solution set delta
    for (PlanNode solutionSetDeltaCandidate : solutionSetDeltaCandidates) {
        SingleInputPlanNode candidate = (SingleInputPlanNode) solutionSetDeltaCandidate;
        GlobalProperties gp = candidate.getGlobalProperties();
        if (gp.getPartitioning() != PartitioningProperty.HASH_PARTITIONED || gp.getPartitioningFields() == null || !gp.getPartitioningFields().equals(this.solutionSetKeyFields)) {
            throw new CompilerException("Bug: The solution set delta is not partitioned.");
        }
    }
    // 5) Create a candidate for the Iteration Node for every remaining plan of the step function.
    final GlobalProperties gp = new GlobalProperties();
    gp.setHashPartitioned(this.solutionSetKeyFields);
    gp.addUniqueFieldCombination(this.solutionSetKeyFields);
    LocalProperties lp = LocalProperties.EMPTY.addUniqueFields(this.solutionSetKeyFields);
    // take all combinations of solution set delta and workset plans
    for (PlanNode solutionSetCandidate : solutionSetDeltaCandidates) {
        for (PlanNode worksetCandidate : worksetCandidates) {
            // check whether they have the same operator at their latest branching point
            if (this.singleRoot.areBranchCompatible(solutionSetCandidate, worksetCandidate)) {
                SingleInputPlanNode siSolutionDeltaCandidate = (SingleInputPlanNode) solutionSetCandidate;
                boolean immediateDeltaUpdate;
                // check whether we need a dedicated solution set delta operator, or whether we can update on the fly
                if (siSolutionDeltaCandidate.getInput().getShipStrategy() == ShipStrategyType.FORWARD && this.solutionDeltaImmediatelyAfterSolutionJoin) {
                    // sanity check the node and connection
                    if (siSolutionDeltaCandidate.getDriverStrategy() != DriverStrategy.UNARY_NO_OP || siSolutionDeltaCandidate.getInput().getLocalStrategy() != LocalStrategy.NONE) {
                        throw new CompilerException("Invalid Solution set delta node.");
                    }
                    solutionSetCandidate = siSolutionDeltaCandidate.getInput().getSource();
                    immediateDeltaUpdate = true;
                } else {
                    // was not partitioned, we need to keep this node.
                    // mark that we materialize the input
                    siSolutionDeltaCandidate.getInput().setTempMode(TempMode.PIPELINE_BREAKER);
                    immediateDeltaUpdate = false;
                }
                WorksetIterationPlanNode wsNode = new WorksetIterationPlanNode(this, this.getOperator().getName(), solutionSetIn, worksetIn, sspn, wspn, worksetCandidate, solutionSetCandidate);
                wsNode.setImmediateSolutionSetUpdate(immediateDeltaUpdate);
                wsNode.initProperties(gp, lp);
                target.add(wsNode);
            }
        }
    }
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) FeedbackPropertiesMeetRequirementsReport(org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) NoOpUnaryUdfOp(org.apache.flink.optimizer.util.NoOpUnaryUdfOp) Iterator(java.util.Iterator) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) ArrayList(java.util.ArrayList) FieldList(org.apache.flink.api.common.operators.util.FieldList) List(java.util.List) RequestedLocalProperties(org.apache.flink.optimizer.dataproperties.RequestedLocalProperties) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties)

Example 19 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class PlanFinalizer method createFinalPlan.

public OptimizedPlan createFinalPlan(List<SinkPlanNode> sinks, String jobName, Plan originalPlan) {
    this.memoryConsumerWeights = 0;
    // traverse the graph
    for (SinkPlanNode node : sinks) {
        node.accept(this);
    }
    // assign the memory to each node
    if (this.memoryConsumerWeights > 0) {
        for (PlanNode node : this.allNodes) {
            // assign memory to the driver strategy of the node
            final int consumerWeight = node.getMemoryConsumerWeight();
            if (consumerWeight > 0) {
                final double relativeMem = (double) consumerWeight / this.memoryConsumerWeights;
                node.setRelativeMemoryPerSubtask(relativeMem);
                if (Optimizer.LOG.isDebugEnabled()) {
                    Optimizer.LOG.debug("Assigned " + relativeMem + " of total memory to each subtask of " + node.getProgramOperator().getName() + ".");
                }
            }
            // assign memory to the local and global strategies of the channels
            for (Channel c : node.getInputs()) {
                if (c.getLocalStrategy().dams()) {
                    final double relativeMem = 1.0 / this.memoryConsumerWeights;
                    c.setRelativeMemoryLocalStrategy(relativeMem);
                    if (Optimizer.LOG.isDebugEnabled()) {
                        Optimizer.LOG.debug("Assigned " + relativeMem + " of total memory to each local strategy " + "instance of " + c + ".");
                    }
                }
                if (c.getTempMode() != TempMode.NONE) {
                    final double relativeMem = 1.0 / this.memoryConsumerWeights;
                    c.setRelativeTempMemory(relativeMem);
                    if (Optimizer.LOG.isDebugEnabled()) {
                        Optimizer.LOG.debug("Assigned " + relativeMem + " of total memory to each instance of the temp " + "table for " + c + ".");
                    }
                }
            }
        }
    }
    return new OptimizedPlan(this.sources, this.sinks, this.allNodes, jobName, originalPlan);
}
Also used : SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) BinaryUnionPlanNode(org.apache.flink.optimizer.plan.BinaryUnionPlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan)

Example 20 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class RangePartitionRewriter method postVisit.

@Override
public void postVisit(PlanNode node) {
    if (node instanceof IterationPlanNode) {
        IterationPlanNode iNode = (IterationPlanNode) node;
        if (!visitedIterationNodes.contains(iNode)) {
            visitedIterationNodes.add(iNode);
            iNode.acceptForStepFunction(this);
        }
    }
    final Iterable<Channel> inputChannels = node.getInputs();
    for (Channel channel : inputChannels) {
        ShipStrategyType shipStrategy = channel.getShipStrategy();
        // Make sure we only optimize the DAG for range partition, and do not optimize multi times.
        if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {
            if (channel.getDataDistribution() == null) {
                if (node.isOnDynamicPath()) {
                    throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
                }
                PlanNode channelSource = channel.getSource();
                List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
                channelSource.getOutgoingChannels().remove(channel);
                channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
            }
        }
    }
}
Also used : IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType)

Aggregations

PlanNode (org.apache.flink.optimizer.plan.PlanNode)43 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)25 Channel (org.apache.flink.optimizer.plan.Channel)24 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)22 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)21 CompilerException (org.apache.flink.optimizer.CompilerException)16 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)16 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)15 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)14 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)13 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)13 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)13 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)13 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)13 WorksetPlanNode (org.apache.flink.optimizer.plan.WorksetPlanNode)13 Plan (org.apache.flink.api.common.Plan)12 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)12 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)11