Examples with WorksetPlanNode - org.apache.flink.optimizer.plan.WorksetPlanNode

Example 6 with WorksetPlanNode

use of org.apache.flink.optimizer.plan.WorksetPlanNode in project flink by apache.

the class WorksetNode method setCandidateProperties.

// --------------------------------------------------------------------------------------------
public void setCandidateProperties(GlobalProperties gProps, LocalProperties lProps, Channel initialInput) {
    if (this.cachedPlans != null) {
        throw new IllegalStateException();
    } else {
        WorksetPlanNode wspn = new WorksetPlanNode(this, "Workset (" + this.getOperator().getName() + ")", gProps, lProps, initialInput);
        this.cachedPlans = Collections.<PlanNode>singletonList(wspn);
    }
}

Also used : WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode)

Example 7 with WorksetPlanNode

use of org.apache.flink.optimizer.plan.WorksetPlanNode in project flink by apache.

the class JobGraphGenerator method postVisit.

/**
	 * This method implements the post-visit during the depth-first traversal. When the post visit happens,
	 * all of the descendants have been processed, so this method connects all of the current node's
	 * predecessors to the current node.
	 * 
	 * @param node
	 *        The node currently processed during the post-visit.
	 * @see org.apache.flink.util.Visitor#postVisit(org.apache.flink.util.Visitable) t
	 */
@Override
public void postVisit(PlanNode node) {
    try {
        // solution sets have no input. the initial solution set input is connected when the iteration node is in its postVisit
        if (node instanceof SourcePlanNode || node instanceof NAryUnionPlanNode || node instanceof SolutionSetPlanNode) {
            return;
        }
        // check if we have an iteration. in that case, translate the step function now
        if (node instanceof IterationPlanNode) {
            // prevent nested iterations
            if (node.isOnDynamicPath()) {
                throw new CompilerException("Nested Iterations are not possible at the moment!");
            }
            // another one), we push the current one onto the stack
            if (this.currentIteration != null) {
                this.iterationStack.add(this.currentIteration);
            }
            this.currentIteration = (IterationPlanNode) node;
            this.currentIteration.acceptForStepFunction(this);
            // pop the current iteration from the stack
            if (this.iterationStack.isEmpty()) {
                this.currentIteration = null;
            } else {
                this.currentIteration = this.iterationStack.remove(this.iterationStack.size() - 1);
            }
            // connect the initial solution set now.
            if (node instanceof WorksetIterationPlanNode) {
                // connect the initial solution set
                WorksetIterationPlanNode wsNode = (WorksetIterationPlanNode) node;
                JobVertex headVertex = this.iterations.get(wsNode).getHeadTask();
                TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
                int inputIndex = headConfig.getDriverStrategy().getNumInputs();
                headConfig.setIterationHeadSolutionSetInputIndex(inputIndex);
                translateChannel(wsNode.getInitialSolutionSetInput(), inputIndex, headVertex, headConfig, false);
            }
            return;
        }
        final JobVertex targetVertex = this.vertices.get(node);
        // check whether this node has its own task, or is merged with another one
        if (targetVertex == null) {
            // node's task is merged with another task. it is either chained, of a merged head vertex
            // from an iteration
            final TaskInChain chainedTask;
            if ((chainedTask = this.chainedTasks.get(node)) != null) {
                // Chained Task. Sanity check first...
                final Iterator<Channel> inConns = node.getInputs().iterator();
                if (!inConns.hasNext()) {
                    throw new CompilerException("Bug: Found chained task with no input.");
                }
                final Channel inConn = inConns.next();
                if (inConns.hasNext()) {
                    throw new CompilerException("Bug: Found a chained task with more than one input!");
                }
                if (inConn.getLocalStrategy() != null && inConn.getLocalStrategy() != LocalStrategy.NONE) {
                    throw new CompilerException("Bug: Found a chained task with an input local strategy.");
                }
                if (inConn.getShipStrategy() != null && inConn.getShipStrategy() != ShipStrategyType.FORWARD) {
                    throw new CompilerException("Bug: Found a chained task with an input ship strategy other than FORWARD.");
                }
                JobVertex container = chainedTask.getContainingVertex();
                if (container == null) {
                    final PlanNode sourceNode = inConn.getSource();
                    container = this.vertices.get(sourceNode);
                    if (container == null) {
                        // predecessor is itself chained
                        container = this.chainedTasks.get(sourceNode).getContainingVertex();
                        if (container == null) {
                            throw new IllegalStateException("Bug: Chained task predecessor has not been assigned its containing vertex.");
                        }
                    } else {
                        // predecessor is a proper task job vertex and this is the first chained task. add a forward connection entry.
                        new TaskConfig(container.getConfiguration()).addOutputShipStrategy(ShipStrategyType.FORWARD);
                    }
                    chainedTask.setContainingVertex(container);
                }
                // add info about the input serializer type
                chainedTask.getTaskConfig().setInputSerializer(inConn.getSerializer(), 0);
                // update name of container task
                String containerTaskName = container.getName();
                if (containerTaskName.startsWith("CHAIN ")) {
                    container.setName(containerTaskName + " -> " + chainedTask.getTaskName());
                } else {
                    container.setName("CHAIN " + containerTaskName + " -> " + chainedTask.getTaskName());
                }
                //update resource of container task
                container.setResources(container.getMinResources().merge(node.getMinResources()), container.getPreferredResources().merge(node.getPreferredResources()));
                this.chainedTasksInSequence.add(chainedTask);
                return;
            } else if (node instanceof BulkPartialSolutionPlanNode || node instanceof WorksetPlanNode) {
                // merged iteration head task. the task that the head is merged with will take care of it
                return;
            } else {
                throw new CompilerException("Bug: Unrecognized merged task vertex.");
            }
        }
        if (this.currentIteration != null) {
            JobVertex head = this.iterations.get(this.currentIteration).getHeadTask();
            // their execution determines the deployment slots of the co-location group
            if (node.isOnDynamicPath()) {
                targetVertex.setStrictlyCoLocatedWith(head);
            }
        }
        // create the config that will contain all the description of the inputs
        final TaskConfig targetVertexConfig = new TaskConfig(targetVertex.getConfiguration());
        // get the inputs. if this node is the head of an iteration, we obtain the inputs from the
        // enclosing iteration node, because the inputs are the initial inputs to the iteration.
        final Iterator<Channel> inConns;
        if (node instanceof BulkPartialSolutionPlanNode) {
            inConns = ((BulkPartialSolutionPlanNode) node).getContainingIterationNode().getInputs().iterator();
            // because the partial solution has its own vertex, is has only one (logical) input.
            // note this in the task configuration
            targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
        } else if (node instanceof WorksetPlanNode) {
            WorksetPlanNode wspn = (WorksetPlanNode) node;
            // input that is the initial workset
            inConns = Collections.singleton(wspn.getContainingIterationNode().getInput2()).iterator();
            // because we have a stand-alone (non-merged) workset iteration head, the initial workset will
            // be input 0 and the solution set will be input 1
            targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
            targetVertexConfig.setIterationHeadSolutionSetInputIndex(1);
        } else {
            inConns = node.getInputs().iterator();
        }
        if (!inConns.hasNext()) {
            throw new CompilerException("Bug: Found a non-source task with no input.");
        }
        int inputIndex = 0;
        while (inConns.hasNext()) {
            Channel input = inConns.next();
            inputIndex += translateChannel(input, inputIndex, targetVertex, targetVertexConfig, false);
        }
        // broadcast variables
        int broadcastInputIndex = 0;
        for (NamedChannel broadcastInput : node.getBroadcastInputs()) {
            int broadcastInputIndexDelta = translateChannel(broadcastInput, broadcastInputIndex, targetVertex, targetVertexConfig, true);
            targetVertexConfig.setBroadcastInputName(broadcastInput.getName(), broadcastInputIndex);
            targetVertexConfig.setBroadcastInputSerializer(broadcastInput.getSerializer(), broadcastInputIndex);
            broadcastInputIndex += broadcastInputIndexDelta;
        }
    } catch (Exception e) {
        throw new CompilerException("An error occurred while translating the optimized plan to a JobGraph: " + e.getMessage(), e);
    }
}

Also used : SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) IOException(java.io.IOException) CompilerException(org.apache.flink.optimizer.CompilerException) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode)

Example 8 with WorksetPlanNode

use of org.apache.flink.optimizer.plan.WorksetPlanNode in project flink by apache.

the class JobGraphGenerator method createWorksetIterationHead.

private JobVertex createWorksetIterationHead(WorksetPlanNode wspn) {
    // get the bulk iteration that corresponds to this partial solution node
    final WorksetIterationPlanNode iteration = wspn.getContainingIterationNode();
    // check whether we need an individual vertex for the partial solution, or whether we
    // attach ourselves to the vertex of the parent node. We can combine the head with a node of 
    // the step function, if
    // 1) There is one parent that the partial solution connects to via a forward pattern and no
    //    local strategy
    // 2) parallelism and the number of subtasks per instance does not change
    // 3) That successor is not a union
    // 4) That successor is not itself the last node of the step function
    // 5) There is no local strategy on the edge for the initial workset, as
    //    this translates to a local strategy that would only be executed in the first superstep
    final boolean merge;
    if (mergeIterationAuxTasks && wspn.getOutgoingChannels().size() == 1) {
        final Channel c = wspn.getOutgoingChannels().get(0);
        final PlanNode successor = c.getTarget();
        merge = c.getShipStrategy() == ShipStrategyType.FORWARD && c.getLocalStrategy() == LocalStrategy.NONE && c.getTempMode() == TempMode.NONE && successor.getParallelism() == wspn.getParallelism() && !(successor instanceof NAryUnionPlanNode) && successor != iteration.getNextWorkSetPlanNode() && iteration.getInitialWorksetInput().getLocalStrategy() == LocalStrategy.NONE;
    } else {
        merge = false;
    }
    // create or adopt the head vertex
    final JobVertex toReturn;
    final JobVertex headVertex;
    final TaskConfig headConfig;
    if (merge) {
        final PlanNode successor = wspn.getOutgoingChannels().get(0).getTarget();
        headVertex = this.vertices.get(successor);
        if (headVertex == null) {
            throw new CompilerException("Bug: Trying to merge solution set with its sucessor, but successor has not been created.");
        }
        // reset the vertex type to iteration head
        headVertex.setInvokableClass(IterationHeadTask.class);
        headConfig = new TaskConfig(headVertex.getConfiguration());
        toReturn = null;
    } else {
        // instantiate the head vertex and give it a no-op driver as the driver strategy.
        // everything else happens in the post visit, after the input (the initial partial solution)
        // is connected.
        headVertex = new JobVertex("IterationHead(" + iteration.getNodeName() + ")");
        headVertex.setResources(iteration.getMinResources(), iteration.getPreferredResources());
        headVertex.setInvokableClass(IterationHeadTask.class);
        headConfig = new TaskConfig(headVertex.getConfiguration());
        headConfig.setDriver(NoOpDriver.class);
        toReturn = headVertex;
    }
    headConfig.setSolutionSetUnmanaged(iteration.getIterationNode().getIterationContract().isSolutionSetUnManaged());
    // create the iteration descriptor and the iteration to it
    IterationDescriptor descr = this.iterations.get(iteration);
    if (descr == null) {
        throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
    }
    descr.setHeadTask(headVertex, headConfig);
    return toReturn;
}

Also used : NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) CompilerException(org.apache.flink.optimizer.CompilerException) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig)

Example 9 with WorksetPlanNode

use of org.apache.flink.optimizer.plan.WorksetPlanNode in project flink by apache.

the class JobGraphGenerator method createSingleInputVertex.

// ------------------------------------------------------------------------
// Methods for creating individual vertices
// ------------------------------------------------------------------------
private JobVertex createSingleInputVertex(SingleInputPlanNode node) throws CompilerException {
    final String taskName = node.getNodeName();
    final DriverStrategy ds = node.getDriverStrategy();
    // check, whether chaining is possible
    boolean chaining;
    {
        Channel inConn = node.getInput();
        PlanNode pred = inConn.getSource();
        chaining = ds.getPushChainDriverClass() != null && // first op after union is stand-alone, because union is merged
        !(pred instanceof NAryUnionPlanNode) && // partial solution merges anyways
        !(pred instanceof BulkPartialSolutionPlanNode) && // workset merges anyways
        !(pred instanceof WorksetPlanNode) && // cannot chain with iteration heads currently
        !(pred instanceof IterationPlanNode) && inConn.getShipStrategy() == ShipStrategyType.FORWARD && inConn.getLocalStrategy() == LocalStrategy.NONE && pred.getOutgoingChannels().size() == 1 && node.getParallelism() == pred.getParallelism() && node.getBroadcastInputs().isEmpty();
        // in a tail 
        if (this.currentIteration != null && this.currentIteration instanceof WorksetIterationPlanNode && node.getOutgoingChannels().size() > 0) {
            WorksetIterationPlanNode wspn = (WorksetIterationPlanNode) this.currentIteration;
            if (wspn.getSolutionSetDeltaPlanNode() == pred || wspn.getNextWorkSetPlanNode() == pred) {
                chaining = false;
            }
        }
        // cannot chain the nodes that produce the next workset in a bulk iteration if a termination criterion follows
        if (this.currentIteration != null && this.currentIteration instanceof BulkIterationPlanNode) {
            BulkIterationPlanNode wspn = (BulkIterationPlanNode) this.currentIteration;
            if (node == wspn.getRootOfTerminationCriterion() && wspn.getRootOfStepFunction() == pred) {
                chaining = false;
            } else if (node.getOutgoingChannels().size() > 0 && (wspn.getRootOfStepFunction() == pred || wspn.getRootOfTerminationCriterion() == pred)) {
                chaining = false;
            }
        }
    }
    final JobVertex vertex;
    final TaskConfig config;
    if (chaining) {
        vertex = null;
        config = new TaskConfig(new Configuration());
        this.chainedTasks.put(node, new TaskInChain(node, ds.getPushChainDriverClass(), config, taskName));
    } else {
        // create task vertex
        vertex = new JobVertex(taskName);
        vertex.setResources(node.getMinResources(), node.getPreferredResources());
        vertex.setInvokableClass((this.currentIteration != null && node.isOnDynamicPath()) ? IterationIntermediateTask.class : BatchTask.class);
        config = new TaskConfig(vertex.getConfiguration());
        config.setDriver(ds.getDriverClass());
    }
    // set user code
    config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper());
    config.setStubParameters(node.getProgramOperator().getParameters());
    // set the driver strategy
    config.setDriverStrategy(ds);
    for (int i = 0; i < ds.getNumRequiredComparators(); i++) {
        config.setDriverComparator(node.getComparator(i), i);
    }
    // assign memory, file-handles, etc.
    assignDriverResources(node, config);
    return vertex;
}

Also used : Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BatchTask(org.apache.flink.runtime.operators.BatchTask) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) DriverStrategy(org.apache.flink.runtime.operators.DriverStrategy) IterationIntermediateTask(org.apache.flink.runtime.iterative.task.IterationIntermediateTask) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 10 with WorksetPlanNode

use of org.apache.flink.optimizer.plan.WorksetPlanNode in project flink by apache.

the class GenericFlatTypePostPass method traverse.

@SuppressWarnings("unchecked")
protected void traverse(PlanNode node, T parentSchema, boolean createUtilities) {
    // distinguish the node types
    if (node instanceof SinkPlanNode) {
        SinkPlanNode sn = (SinkPlanNode) node;
        Channel inchannel = sn.getInput();
        T schema = createEmptySchema();
        sn.postPassHelper = schema;
        // add the sinks information to the schema
        try {
            getSinkSchema(sn, schema);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Conflicting type infomation for the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
        }
        // descend to the input channel
        try {
            propagateToChannel(schema, inchannel, createUtilities);
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Missing type infomation for the channel that inputs to the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
        }
    } else if (node instanceof SourcePlanNode) {
        if (createUtilities) {
            ((SourcePlanNode) node).setSerializer(createSerializer(parentSchema, node));
        // nothing else to be done here. the source has no input and no strategy itself
        }
    } else if (node instanceof BulkIterationPlanNode) {
        BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
        // get the nodes current schema
        T schema;
        if (iterationNode.postPassHelper == null) {
            schema = createEmptySchema();
            iterationNode.postPassHelper = schema;
        } else {
            schema = (T) iterationNode.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
            return;
        }
        if (iterationNode.getRootOfStepFunction() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile an iteration step function where next partial solution is created by a Union node.");
        }
        // traverse the termination criterion for the first time. create schema only, no utilities. Needed in case of intermediate termination criterion
        if (iterationNode.getRootOfTerminationCriterion() != null) {
            SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
            traverse(addMapper.getInput().getSource(), createEmptySchema(), false);
            try {
                addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
            } catch (MissingFieldTypeInfoException e) {
                throw new RuntimeException(e);
            }
        }
        // traverse the step function for the first time. create schema only, no utilities
        traverse(iterationNode.getRootOfStepFunction(), schema, false);
        T pss = (T) iterationNode.getPartialSolutionPlanNode().postPassHelper;
        if (pss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Partial solution schema is null after first traversal of the step function.");
        }
        // traverse the step function for the second time, taking the schema of the partial solution
        traverse(iterationNode.getRootOfStepFunction(), pss, createUtilities);
        if (iterationNode.getRootOfTerminationCriterion() != null) {
            SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
            traverse(addMapper.getInput().getSource(), createEmptySchema(), createUtilities);
            try {
                addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
            } catch (MissingFieldTypeInfoException e) {
                throw new RuntimeException(e);
            }
        }
        // take the schema from the partial solution node and add its fields to the iteration result schema.
        // input and output schema need to be identical, so this is essentially a sanity check
        addSchemaToSchema(pss, schema, iterationNode.getProgramOperator().getName());
        // set the serializer
        if (createUtilities) {
            iterationNode.setSerializerForIterationChannel(createSerializer(pss, iterationNode.getPartialSolutionPlanNode()));
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, iterationNode.getInput(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
        }
    } else if (node instanceof WorksetIterationPlanNode) {
        WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
        // get the nodes current schema
        T schema;
        if (iterationNode.postPassHelper == null) {
            schema = createEmptySchema();
            iterationNode.postPassHelper = schema;
        } else {
            schema = (T) iterationNode.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema (which refers to the solution set schema)
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
            return;
        }
        if (iterationNode.getNextWorkSetPlanNode() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile a workset iteration step function where the next workset is produced by a Union node.");
        }
        if (iterationNode.getSolutionSetDeltaPlanNode() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile a workset iteration step function where the solution set delta is produced by a Union node.");
        }
        // traverse the step function
        // pass an empty schema to the next workset and the parent schema to the solution set delta
        // these first traversals are schema only
        traverse(iterationNode.getNextWorkSetPlanNode(), createEmptySchema(), false);
        traverse(iterationNode.getSolutionSetDeltaPlanNode(), schema, false);
        T wss = (T) iterationNode.getWorksetPlanNode().postPassHelper;
        T sss = (T) iterationNode.getSolutionSetPlanNode().postPassHelper;
        if (wss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Workset schema is null after first traversal of the step function.");
        }
        if (sss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Solution set schema is null after first traversal of the step function.");
        }
        // make the second pass and instantiate the utilities
        traverse(iterationNode.getNextWorkSetPlanNode(), wss, createUtilities);
        traverse(iterationNode.getSolutionSetDeltaPlanNode(), sss, createUtilities);
        // the solution set input and the result must have the same schema, this acts as a sanity check.
        try {
            for (Map.Entry<Integer, X> entry : sss) {
                Integer pos = entry.getKey();
                schema.addType(pos, entry.getValue());
            }
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Conflicting type information for field " + e.getFieldNumber() + " in node '" + iterationNode.getProgramOperator().getName() + "'. Contradicting types between the " + "result of the iteration and the solution set schema: " + e.getPreviousType() + " and " + e.getNewType() + ". Most probable cause: Invalid constant field annotations.");
        }
        // set the serializers and comparators
        if (createUtilities) {
            WorksetIterationNode optNode = iterationNode.getIterationNode();
            iterationNode.setWorksetSerializer(createSerializer(wss, iterationNode.getWorksetPlanNode()));
            iterationNode.setSolutionSetSerializer(createSerializer(sss, iterationNode.getSolutionSetPlanNode()));
            try {
                iterationNode.setSolutionSetComparator(createComparator(optNode.getSolutionSetKeyFields(), null, sss));
            } catch (MissingFieldTypeInfoException ex) {
                throw new CompilerPostPassException("Could not set up the solution set for workset iteration '" + optNode.getOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber() + '.');
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, iterationNode.getInitialSolutionSetInput(), createUtilities);
            propagateToChannel(wss, iterationNode.getInitialWorksetInput(), createUtilities);
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber());
        }
    } else if (node instanceof SingleInputPlanNode) {
        SingleInputPlanNode sn = (SingleInputPlanNode) node;
        // get the nodes current schema
        T schema;
        if (sn.postPassHelper == null) {
            schema = createEmptySchema();
            sn.postPassHelper = schema;
        } else {
            schema = (T) sn.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        SingleInputNode optNode = sn.getSingleInputNode();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, optNode, 0);
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < sn.getOutgoingChannels().size()) {
            return;
        }
        // add the nodes local information
        try {
            getSingleInputNodeSchema(sn, schema);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
        }
        if (createUtilities) {
            // parameterize the node's driver strategy
            for (int i = 0; i < sn.getDriverStrategy().getNumRequiredComparators(); i++) {
                try {
                    sn.setComparator(createComparator(sn.getKeys(i), sn.getSortOrders(i), schema), i);
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
                }
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, sn.getInput(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        // don't forget the broadcast inputs
        for (Channel c : sn.getBroadcastInputs()) {
            try {
                propagateToChannel(createEmptySchema(), c, createUtilities);
            } catch (MissingFieldTypeInfoException e) {
                throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
            }
        }
    } else if (node instanceof DualInputPlanNode) {
        DualInputPlanNode dn = (DualInputPlanNode) node;
        // get the nodes current schema
        T schema1;
        T schema2;
        if (dn.postPassHelper1 == null) {
            schema1 = createEmptySchema();
            schema2 = createEmptySchema();
            dn.postPassHelper1 = schema1;
            dn.postPassHelper2 = schema2;
        } else {
            schema1 = (T) dn.postPassHelper1;
            schema2 = (T) dn.postPassHelper2;
        }
        schema1.increaseNumConnectionsThatContributed();
        schema2.increaseNumConnectionsThatContributed();
        TwoInputNode optNode = dn.getTwoInputNode();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema1, optNode, 0);
            addSchemaToSchema(parentSchema, schema2, optNode, 1);
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema1.getNumConnectionsThatContributed() < dn.getOutgoingChannels().size()) {
            return;
        }
        // add the nodes local information
        try {
            getDualInputNodeSchema(dn, schema1, schema2);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
        }
        // parameterize the node's driver strategy
        if (createUtilities) {
            if (dn.getDriverStrategy().getNumRequiredComparators() > 0) {
                // set the individual comparators
                try {
                    dn.setComparator1(createComparator(dn.getKeysForInput1(), dn.getSortOrders(), schema1));
                    dn.setComparator2(createComparator(dn.getKeysForInput2(), dn.getSortOrders(), schema2));
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
                }
                // set the pair comparator
                try {
                    dn.setPairComparator(createPairComparator(dn.getKeysForInput1(), dn.getKeysForInput2(), dn.getSortOrders(), schema1, schema2));
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
                }
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema1, dn.getInput1(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the first input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        try {
            propagateToChannel(schema2, dn.getInput2(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the second input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        // don't forget the broadcast inputs
        for (Channel c : dn.getBroadcastInputs()) {
            try {
                propagateToChannel(createEmptySchema(), c, createUtilities);
            } catch (MissingFieldTypeInfoException e) {
                throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
            }
        }
    } else if (node instanceof NAryUnionPlanNode) {
        // only propagate the info down
        try {
            for (Channel channel : node.getInputs()) {
                propagateToChannel(parentSchema, channel, createUtilities);
            }
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the input channel to " + " a union node. Missing type information for field " + ex.getFieldNumber());
        }
    } else // catch the sources of the iterative step functions
    if (node instanceof BulkPartialSolutionPlanNode || node instanceof SolutionSetPlanNode || node instanceof WorksetPlanNode) {
        // get the nodes current schema
        T schema;
        String name;
        if (node instanceof BulkPartialSolutionPlanNode) {
            BulkPartialSolutionPlanNode psn = (BulkPartialSolutionPlanNode) node;
            if (psn.postPassHelper == null) {
                schema = createEmptySchema();
                psn.postPassHelper = schema;
            } else {
                schema = (T) psn.postPassHelper;
            }
            name = "partial solution of bulk iteration '" + psn.getPartialSolutionNode().getIterationNode().getOperator().getName() + "'";
        } else if (node instanceof SolutionSetPlanNode) {
            SolutionSetPlanNode ssn = (SolutionSetPlanNode) node;
            if (ssn.postPassHelper == null) {
                schema = createEmptySchema();
                ssn.postPassHelper = schema;
            } else {
                schema = (T) ssn.postPassHelper;
            }
            name = "solution set of workset iteration '" + ssn.getSolutionSetNode().getIterationNode().getOperator().getName() + "'";
        } else if (node instanceof WorksetPlanNode) {
            WorksetPlanNode wsn = (WorksetPlanNode) node;
            if (wsn.postPassHelper == null) {
                schema = createEmptySchema();
                wsn.postPassHelper = schema;
            } else {
                schema = (T) wsn.postPassHelper;
            }
            name = "workset of workset iteration '" + wsn.getWorksetNode().getIterationNode().getOperator().getName() + "'";
        } else {
            throw new CompilerException();
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema
        addSchemaToSchema(parentSchema, schema, name);
    } else {
        throw new CompilerPostPassException("Unknown node type encountered: " + node.getClass().getName());
    }
}

Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) CompilerPostPassException(org.apache.flink.optimizer.CompilerPostPassException) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) TwoInputNode(org.apache.flink.optimizer.dag.TwoInputNode)

Aggregations

WorksetPlanNode (org.apache.flink.optimizer.plan.WorksetPlanNode)10 Channel (org.apache.flink.optimizer.plan.Channel)9 SolutionSetPlanNode (org.apache.flink.optimizer.plan.SolutionSetPlanNode)9 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)9 CompilerException (org.apache.flink.optimizer.CompilerException)8 BulkIterationPlanNode (org.apache.flink.optimizer.plan.BulkIterationPlanNode)8 BulkPartialSolutionPlanNode (org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode)8 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)8 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)8 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)8 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)8 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)7 IterationPlanNode (org.apache.flink.optimizer.plan.IterationPlanNode)6 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)6 PlanNode (org.apache.flink.optimizer.plan.PlanNode)6 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)5 TaskConfig (org.apache.flink.runtime.operators.util.TaskConfig)5 IOException (java.io.IOException)2 CompilerPostPassException (org.apache.flink.optimizer.CompilerPostPassException)2 NoOpUnaryUdfOp (org.apache.flink.optimizer.util.NoOpUnaryUdfOp)2