Search in sources :

Example 11 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestMapReduce.

@Test
public void forwardFieldsTestMapReduce() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Integer, Integer>> set = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
    set = set.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1").map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("*");
    set.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileWithStats(plan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof ReduceOperatorBase) {
                for (Channel input : visitable.getInputs()) {
                    GlobalProperties gprops = visitable.getGlobalProperties();
                    LocalProperties lprops = visitable.getLocalProperties();
                    Assert.assertTrue("Reduce should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
                    Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.isPartitionedOnFields(new FieldSet(1)));
                    Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
                    Assert.assertTrue("Wrong LocalProperties on Reducer", lprops.getGroupedFields().contains(1));
                }
            }
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof MapOperatorBase) {
                for (Channel input : visitable.getInputs()) {
                    GlobalProperties gprops = visitable.getGlobalProperties();
                    LocalProperties lprops = visitable.getLocalProperties();
                    Assert.assertTrue("Map should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
                    Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.isPartitionedOnFields(new FieldSet(1)));
                    Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
                    Assert.assertTrue("Wrong LocalProperties on Mapper", lprops.getGroupedFields().contains(1));
                }
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) Test(org.junit.Test)

Example 12 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class JobGraphGenerator method compileJobGraph.

public JobGraph compileJobGraph(OptimizedPlan program, JobID jobId) {
    if (program == null) {
        throw new NullPointerException("Program is null, did you called " + "ExecutionEnvironment.execute()");
    }
    if (jobId == null) {
        jobId = JobID.generate();
    }
    this.vertices = new HashMap<PlanNode, JobVertex>();
    this.chainedTasks = new HashMap<PlanNode, TaskInChain>();
    this.chainedTasksInSequence = new ArrayList<TaskInChain>();
    this.auxVertices = new ArrayList<JobVertex>();
    this.iterations = new HashMap<IterationPlanNode, IterationDescriptor>();
    this.iterationStack = new ArrayList<IterationPlanNode>();
    this.sharingGroup = new SlotSharingGroup();
    // this starts the traversal that generates the job graph
    program.accept(this);
    // sanity check that we are not somehow in an iteration at the end
    if (this.currentIteration != null) {
        throw new CompilerException("The graph translation ended prematurely, leaving an unclosed iteration.");
    }
    // finalize the iterations
    for (IterationDescriptor iteration : this.iterations.values()) {
        if (iteration.getIterationNode() instanceof BulkIterationPlanNode) {
            finalizeBulkIteration(iteration);
        } else if (iteration.getIterationNode() instanceof WorksetIterationPlanNode) {
            finalizeWorksetIteration(iteration);
        } else {
            throw new CompilerException();
        }
    }
    // parents' configurations
    for (TaskInChain tic : this.chainedTasksInSequence) {
        TaskConfig t = new TaskConfig(tic.getContainingVertex().getConfiguration());
        t.addChainedTask(tic.getChainedTask(), tic.getTaskConfig(), tic.getTaskName());
    }
    // ----- attach the additional info to the job vertices, for display in the runtime monitor
    attachOperatorNamesAndDescriptions();
    // ----------- finalize the job graph -----------
    // create the job graph object
    JobGraph graph = new JobGraph(jobId, program.getJobName());
    try {
        graph.setExecutionConfig(program.getOriginalPlan().getExecutionConfig());
    } catch (IOException e) {
        throw new CompilerException("Could not serialize the ExecutionConfig." + "This indicates that non-serializable types (like custom serializers) were registered");
    }
    graph.setAllowQueuedScheduling(false);
    graph.setSessionTimeout(program.getOriginalPlan().getSessionTimeout());
    // add vertices to the graph
    for (JobVertex vertex : this.vertices.values()) {
        graph.addVertex(vertex);
    }
    for (JobVertex vertex : this.auxVertices) {
        graph.addVertex(vertex);
        vertex.setSlotSharingGroup(sharingGroup);
    }
    // add registered cache file into job configuration
    for (Entry<String, DistributedCacheEntry> e : program.getOriginalPlan().getCachedFiles()) {
        DistributedCache.writeFileInfoToConfig(e.getKey(), e.getValue(), graph.getJobConfiguration());
    }
    // release all references again
    this.vertices = null;
    this.chainedTasks = null;
    this.chainedTasksInSequence = null;
    this.auxVertices = null;
    this.iterations = null;
    this.iterationStack = null;
    // return job graph
    return graph;
}
Also used : WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) IOException(java.io.IOException) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DistributedCacheEntry(org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) CompilerException(org.apache.flink.optimizer.CompilerException) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 13 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class JobGraphGenerator method finalizeWorksetIteration.

private void finalizeWorksetIteration(IterationDescriptor descr) {
    final WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) descr.getIterationNode();
    final JobVertex headVertex = descr.getHeadTask();
    final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
    final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
    // ------------ finalize the head config with the final outputs and the sync gate ------------
    {
        final int numStepFunctionOuts = headConfig.getNumOutputs();
        final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
        if (numStepFunctionOuts == 0) {
            throw new CompilerException("The workset iteration has no operation on the workset inside the step function.");
        }
        headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
        headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
        final double relativeMemory = iterNode.getRelativeMemoryPerSubTask();
        if (relativeMemory <= 0) {
            throw new CompilerException("Bug: No memory has been assigned to the workset iteration.");
        }
        headConfig.setIsWorksetIteration();
        headConfig.setRelativeBackChannelMemory(relativeMemory / 2);
        headConfig.setRelativeSolutionSetMemory(relativeMemory / 2);
        // set the solution set serializer and comparator
        headConfig.setSolutionSetSerializer(iterNode.getSolutionSetSerializer());
        headConfig.setSolutionSetComparator(iterNode.getSolutionSetComparator());
    }
    // --------------------------- create the sync task ---------------------------
    final TaskConfig syncConfig;
    {
        final JobVertex sync = new JobVertex("Sync (" + iterNode.getNodeName() + ")");
        sync.setResources(iterNode.getMinResources(), iterNode.getPreferredResources());
        sync.setInvokableClass(IterationSynchronizationSinkTask.class);
        sync.setParallelism(1);
        sync.setMaxParallelism(1);
        this.auxVertices.add(sync);
        syncConfig = new TaskConfig(sync.getConfiguration());
        syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getParallelism());
        // set the number of iteration / convergence criterion for the sync
        final int maxNumIterations = iterNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
        if (maxNumIterations < 1) {
            throw new CompilerException("Cannot create workset iteration with unspecified maximum number of iterations.");
        }
        syncConfig.setNumberOfIterations(maxNumIterations);
        // connect the sync task
        sync.connectNewDataSetAsInput(headVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
    }
    // ----------------------------- create the iteration tails -----------------------------
    // ----------------------- for next workset and solution set delta-----------------------
    {
        // we have three possible cases:
        // 1) Two tails, one for workset update, one for solution set update
        // 2) One tail for workset update, solution set update happens in an intermediate task
        // 3) One tail for solution set update, workset update happens in an intermediate task
        final PlanNode nextWorksetNode = iterNode.getNextWorkSetPlanNode();
        final PlanNode solutionDeltaNode = iterNode.getSolutionSetDeltaPlanNode();
        final boolean hasWorksetTail = nextWorksetNode.getOutgoingChannels().isEmpty();
        final boolean hasSolutionSetTail = (!iterNode.isImmediateSolutionSetUpdate()) || (!hasWorksetTail);
        {
            // get the vertex for the workset update
            final TaskConfig worksetTailConfig;
            JobVertex nextWorksetVertex = this.vertices.get(nextWorksetNode);
            if (nextWorksetVertex == null) {
                // nextWorksetVertex is chained
                TaskInChain taskInChain = this.chainedTasks.get(nextWorksetNode);
                if (taskInChain == null) {
                    throw new CompilerException("Bug: Next workset node not found as vertex or chained task.");
                }
                nextWorksetVertex = taskInChain.getContainingVertex();
                worksetTailConfig = taskInChain.getTaskConfig();
            } else {
                worksetTailConfig = new TaskConfig(nextWorksetVertex.getConfiguration());
            }
            // mark the node to perform workset updates
            worksetTailConfig.setIsWorksetIteration();
            worksetTailConfig.setIsWorksetUpdate();
            if (hasWorksetTail) {
                nextWorksetVertex.setInvokableClass(IterationTailTask.class);
                worksetTailConfig.setOutputSerializer(iterNode.getWorksetSerializer());
            }
        }
        {
            final TaskConfig solutionDeltaConfig;
            JobVertex solutionDeltaVertex = this.vertices.get(solutionDeltaNode);
            if (solutionDeltaVertex == null) {
                // last op is chained
                TaskInChain taskInChain = this.chainedTasks.get(solutionDeltaNode);
                if (taskInChain == null) {
                    throw new CompilerException("Bug: Solution Set Delta not found as vertex or chained task.");
                }
                solutionDeltaVertex = taskInChain.getContainingVertex();
                solutionDeltaConfig = taskInChain.getTaskConfig();
            } else {
                solutionDeltaConfig = new TaskConfig(solutionDeltaVertex.getConfiguration());
            }
            solutionDeltaConfig.setIsWorksetIteration();
            solutionDeltaConfig.setIsSolutionSetUpdate();
            if (hasSolutionSetTail) {
                solutionDeltaVertex.setInvokableClass(IterationTailTask.class);
                solutionDeltaConfig.setOutputSerializer(iterNode.getSolutionSetSerializer());
                // tell the head that it needs to wait for the solution set updates
                headConfig.setWaitForSolutionSetUpdate();
            } else {
                // no tail, intermediate update. must be immediate update
                if (!iterNode.isImmediateSolutionSetUpdate()) {
                    throw new CompilerException("A solution set update without dedicated tail is not set to perform immediate updates.");
                }
                solutionDeltaConfig.setIsSolutionSetUpdateWithoutReprobe();
            }
        }
    }
    // ------------------- register the aggregators -------------------
    AggregatorRegistry aggs = iterNode.getIterationNode().getIterationContract().getAggregators();
    Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
    for (AggregatorWithName<?> agg : allAggregators) {
        if (agg.getName().equals(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME)) {
            throw new CompilerException("User defined aggregator used the same name as built-in workset " + "termination check aggregator: " + WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME);
        }
    }
    headConfig.addIterationAggregators(allAggregators);
    syncConfig.addIterationAggregators(allAggregators);
    String convAggName = aggs.getConvergenceCriterionAggregatorName();
    ConvergenceCriterion<?> convCriterion = aggs.getConvergenceCriterion();
    if (convCriterion != null || convAggName != null) {
        if (convCriterion == null) {
            throw new CompilerException("Error: Convergence criterion aggregator set, but criterion is null.");
        }
        if (convAggName == null) {
            throw new CompilerException("Error: Aggregator convergence criterion set, but aggregator is null.");
        }
        syncConfig.setConvergenceCriterion(convAggName, convCriterion);
    }
    headConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new LongSumAggregator());
    syncConfig.addIterationAggregator(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new LongSumAggregator());
    syncConfig.setImplicitConvergenceCriterion(WorksetEmptyConvergenceCriterion.AGGREGATOR_NAME, new WorksetEmptyConvergenceCriterion());
}
Also used : WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) IterationSynchronizationSinkTask(org.apache.flink.runtime.iterative.task.IterationSynchronizationSinkTask) WorksetEmptyConvergenceCriterion(org.apache.flink.runtime.iterative.convergence.WorksetEmptyConvergenceCriterion) CompilerException(org.apache.flink.optimizer.CompilerException) AggregatorWithName(org.apache.flink.api.common.aggregators.AggregatorWithName) AggregatorRegistry(org.apache.flink.api.common.aggregators.AggregatorRegistry)

Example 14 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class JobGraphGenerator method translateChannel.

private int translateChannel(Channel input, int inputIndex, JobVertex targetVertex, TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception {
    final PlanNode inputPlanNode = input.getSource();
    final Iterator<Channel> allInChannels;
    if (inputPlanNode instanceof NAryUnionPlanNode) {
        allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
        // deadlocks when closing a branching flow at runtime.
        for (Channel in : inputPlanNode.getInputs()) {
            if (input.getDataExchangeMode().equals(DataExchangeMode.BATCH)) {
                in.setDataExchangeMode(DataExchangeMode.BATCH);
            }
            if (isBroadcast) {
                in.setShipStrategy(ShipStrategyType.BROADCAST, in.getDataExchangeMode());
            }
        }
    } else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
        if (this.vertices.get(inputPlanNode) == null) {
            // merged iteration head
            final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
            final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
            // check if the iteration's input is a union
            if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
                allInChannels = (iterationNode.getInput().getSource()).getInputs().iterator();
            } else {
                allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
            }
            // also, set the index of the gate with the partial solution
            targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
        } else {
            // standalone iteration head
            allInChannels = Collections.singletonList(input).iterator();
        }
    } else if (inputPlanNode instanceof WorksetPlanNode) {
        if (this.vertices.get(inputPlanNode) == null) {
            // merged iteration head
            final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
            final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
            // check if the iteration's input is a union
            if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
                allInChannels = (iterationNode.getInput2().getSource()).getInputs().iterator();
            } else {
                allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
            }
            // also, set the index of the gate with the partial solution
            targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
        } else {
            // standalone iteration head
            allInChannels = Collections.singletonList(input).iterator();
        }
    } else if (inputPlanNode instanceof SolutionSetPlanNode) {
        // rather than a vertex connection
        return 0;
    } else {
        allInChannels = Collections.singletonList(input).iterator();
    }
    // check that the type serializer is consistent
    TypeSerializerFactory<?> typeSerFact = null;
    // accounting for channels on the dynamic path
    int numChannelsTotal = 0;
    int numChannelsDynamicPath = 0;
    int numDynamicSenderTasksTotal = 0;
    // expand the channel to all the union channels, in case there is a union operator at its source
    while (allInChannels.hasNext()) {
        final Channel inConn = allInChannels.next();
        // sanity check the common serializer
        if (typeSerFact == null) {
            typeSerFact = inConn.getSerializer();
        } else if (!typeSerFact.equals(inConn.getSerializer())) {
            throw new CompilerException("Conflicting types in union operator.");
        }
        final PlanNode sourceNode = inConn.getSource();
        JobVertex sourceVertex = this.vertices.get(sourceNode);
        TaskConfig sourceVertexConfig;
        if (sourceVertex == null) {
            // this predecessor is chained to another task or an iteration
            final TaskInChain chainedTask;
            final IterationDescriptor iteration;
            if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
                // push chained task
                if (chainedTask.getContainingVertex() == null) {
                    throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
                }
                sourceVertex = chainedTask.getContainingVertex();
                sourceVertexConfig = chainedTask.getTaskConfig();
            } else if ((iteration = this.iterations.get(sourceNode)) != null) {
                // predecessor is an iteration
                sourceVertex = iteration.getHeadTask();
                sourceVertexConfig = iteration.getHeadFinalResultConfig();
            } else {
                throw new CompilerException("Bug: Could not resolve source node for a channel.");
            }
        } else {
            // predecessor is its own vertex
            sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
        }
        DistributionPattern pattern = connectJobVertices(inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
        // accounting on channels and senders
        numChannelsTotal++;
        if (inConn.isOnDynamicPath()) {
            numChannelsDynamicPath++;
            numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern, sourceVertex.getParallelism(), targetVertex.getParallelism());
        }
    }
    // is a union between nodes on the static and nodes on the dynamic path
    if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
        throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
    }
    if (numDynamicSenderTasksTotal > 0) {
        if (isBroadcast) {
            targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
        } else {
            targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
        }
    }
    // the local strategy is added only once. in non-union case that is the actual edge,
    // in the union case, it is the edge between union and the target node
    addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
    return 1;
}
Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 15 with PlanNode

use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.

the class JobGraphGenerator method createBulkIterationHead.

private JobVertex createBulkIterationHead(BulkPartialSolutionPlanNode pspn) {
    // get the bulk iteration that corresponds to this partial solution node
    final BulkIterationPlanNode iteration = pspn.getContainingIterationNode();
    // check whether we need an individual vertex for the partial solution, or whether we
    // attach ourselves to the vertex of the parent node. We can combine the head with a node of 
    // the step function, if
    // 1) There is one parent that the partial solution connects to via a forward pattern and no
    //    local strategy
    // 2) parallelism and the number of subtasks per instance does not change
    // 3) That successor is not a union
    // 4) That successor is not itself the last node of the step function
    // 5) There is no local strategy on the edge for the initial partial solution, as
    //    this translates to a local strategy that would only be executed in the first iteration
    final boolean merge;
    if (mergeIterationAuxTasks && pspn.getOutgoingChannels().size() == 1) {
        final Channel c = pspn.getOutgoingChannels().get(0);
        final PlanNode successor = c.getTarget();
        merge = c.getShipStrategy() == ShipStrategyType.FORWARD && c.getLocalStrategy() == LocalStrategy.NONE && c.getTempMode() == TempMode.NONE && successor.getParallelism() == pspn.getParallelism() && !(successor instanceof NAryUnionPlanNode) && successor != iteration.getRootOfStepFunction() && iteration.getInput().getLocalStrategy() == LocalStrategy.NONE;
    } else {
        merge = false;
    }
    // create or adopt the head vertex
    final JobVertex toReturn;
    final JobVertex headVertex;
    final TaskConfig headConfig;
    if (merge) {
        final PlanNode successor = pspn.getOutgoingChannels().get(0).getTarget();
        headVertex = this.vertices.get(successor);
        if (headVertex == null) {
            throw new CompilerException("Bug: Trying to merge solution set with its successor, but successor has not been created.");
        }
        // reset the vertex type to iteration head
        headVertex.setInvokableClass(IterationHeadTask.class);
        headConfig = new TaskConfig(headVertex.getConfiguration());
        toReturn = null;
    } else {
        // instantiate the head vertex and give it a no-op driver as the driver strategy.
        // everything else happens in the post visit, after the input (the initial partial solution)
        // is connected.
        headVertex = new JobVertex("PartialSolution (" + iteration.getNodeName() + ")");
        headVertex.setResources(iteration.getMinResources(), iteration.getPreferredResources());
        headVertex.setInvokableClass(IterationHeadTask.class);
        headConfig = new TaskConfig(headVertex.getConfiguration());
        headConfig.setDriver(NoOpDriver.class);
        toReturn = headVertex;
    }
    // create the iteration descriptor and the iteration to it
    IterationDescriptor descr = this.iterations.get(iteration);
    if (descr == null) {
        throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
    }
    descr.setHeadTask(headVertex, headConfig);
    return toReturn;
}
Also used : NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) CompilerException(org.apache.flink.optimizer.CompilerException) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Aggregations

PlanNode (org.apache.flink.optimizer.plan.PlanNode)43 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)27 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)25 Channel (org.apache.flink.optimizer.plan.Channel)24 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)22 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)21 CompilerException (org.apache.flink.optimizer.CompilerException)16 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)16 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)15 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)14 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)13 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)13 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)13 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)13 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)13 WorksetPlanNode (org.apache.flink.optimizer.plan.WorksetPlanNode)13 Plan (org.apache.flink.api.common.Plan)12 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)12 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)11