use of org.apache.flink.optimizer.plan.IterationPlanNode in project flink by apache.
the class JobGraphGenerator method compileJobGraph.
public JobGraph compileJobGraph(OptimizedPlan program, JobID jobId) {
if (program == null) {
throw new NullPointerException("Program is null, did you called " + "ExecutionEnvironment.execute()");
}
if (jobId == null) {
jobId = JobID.generate();
}
this.vertices = new HashMap<PlanNode, JobVertex>();
this.chainedTasks = new HashMap<PlanNode, TaskInChain>();
this.chainedTasksInSequence = new ArrayList<TaskInChain>();
this.auxVertices = new ArrayList<JobVertex>();
this.iterations = new HashMap<IterationPlanNode, IterationDescriptor>();
this.iterationStack = new ArrayList<IterationPlanNode>();
this.sharingGroup = new SlotSharingGroup();
// this starts the traversal that generates the job graph
program.accept(this);
// sanity check that we are not somehow in an iteration at the end
if (this.currentIteration != null) {
throw new CompilerException("The graph translation ended prematurely, leaving an unclosed iteration.");
}
// finalize the iterations
for (IterationDescriptor iteration : this.iterations.values()) {
if (iteration.getIterationNode() instanceof BulkIterationPlanNode) {
finalizeBulkIteration(iteration);
} else if (iteration.getIterationNode() instanceof WorksetIterationPlanNode) {
finalizeWorksetIteration(iteration);
} else {
throw new CompilerException();
}
}
// parents' configurations
for (TaskInChain tic : this.chainedTasksInSequence) {
TaskConfig t = new TaskConfig(tic.getContainingVertex().getConfiguration());
t.addChainedTask(tic.getChainedTask(), tic.getTaskConfig(), tic.getTaskName());
}
// ----- attach the additional info to the job vertices, for display in the runtime monitor
attachOperatorNamesAndDescriptions();
// ----------- finalize the job graph -----------
// create the job graph object
JobGraph graph = new JobGraph(jobId, program.getJobName());
try {
graph.setExecutionConfig(program.getOriginalPlan().getExecutionConfig());
} catch (IOException e) {
throw new CompilerException("Could not serialize the ExecutionConfig." + "This indicates that non-serializable types (like custom serializers) were registered");
}
graph.setAllowQueuedScheduling(false);
graph.setSessionTimeout(program.getOriginalPlan().getSessionTimeout());
// add vertices to the graph
for (JobVertex vertex : this.vertices.values()) {
graph.addVertex(vertex);
}
for (JobVertex vertex : this.auxVertices) {
graph.addVertex(vertex);
vertex.setSlotSharingGroup(sharingGroup);
}
// add registered cache file into job configuration
for (Entry<String, DistributedCacheEntry> e : program.getOriginalPlan().getCachedFiles()) {
DistributedCache.writeFileInfoToConfig(e.getKey(), e.getValue(), graph.getJobConfiguration());
}
// release all references again
this.vertices = null;
this.chainedTasks = null;
this.chainedTasksInSequence = null;
this.auxVertices = null;
this.iterations = null;
this.iterationStack = null;
// return job graph
return graph;
}
use of org.apache.flink.optimizer.plan.IterationPlanNode in project flink by apache.
the class PlanFinalizer method preVisit.
@Override
public boolean preVisit(PlanNode visitable) {
// if we come here again, prevent a further descend
if (!this.allNodes.add(visitable)) {
return false;
}
if (visitable instanceof SinkPlanNode) {
this.sinks.add((SinkPlanNode) visitable);
} else if (visitable instanceof SourcePlanNode) {
this.sources.add((SourcePlanNode) visitable);
} else if (visitable instanceof BinaryUnionPlanNode) {
BinaryUnionPlanNode unionNode = (BinaryUnionPlanNode) visitable;
if (unionNode.unionsStaticAndDynamicPath()) {
unionNode.setDriverStrategy(DriverStrategy.UNION_WITH_CACHED);
}
} else if (visitable instanceof BulkPartialSolutionPlanNode) {
// tell the partial solution about the iteration node that contains it
final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) visitable;
final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
// sanity check!
if (iteration == null || !(iteration instanceof BulkIterationPlanNode)) {
throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
}
pspn.setContainingIterationNode((BulkIterationPlanNode) iteration);
} else if (visitable instanceof WorksetPlanNode) {
// tell the partial solution about the iteration node that contains it
final WorksetPlanNode wspn = (WorksetPlanNode) visitable;
final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
// sanity check!
if (iteration == null || !(iteration instanceof WorksetIterationPlanNode)) {
throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
}
wspn.setContainingIterationNode((WorksetIterationPlanNode) iteration);
} else if (visitable instanceof SolutionSetPlanNode) {
// tell the partial solution about the iteration node that contains it
final SolutionSetPlanNode sspn = (SolutionSetPlanNode) visitable;
final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
// sanity check!
if (iteration == null || !(iteration instanceof WorksetIterationPlanNode)) {
throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
}
sspn.setContainingIterationNode((WorksetIterationPlanNode) iteration);
}
// one child candidate could have been referenced by multiple parents.
for (Channel conn : visitable.getInputs()) {
conn.setTarget(visitable);
conn.getSource().addOutgoingChannel(conn);
}
for (Channel c : visitable.getBroadcastInputs()) {
c.setTarget(visitable);
c.getSource().addOutgoingChannel(c);
}
// count the memory consumption
this.memoryConsumerWeights += visitable.getMemoryConsumerWeight();
for (Channel c : visitable.getInputs()) {
if (c.getLocalStrategy().dams()) {
this.memoryConsumerWeights++;
}
if (c.getTempMode() != TempMode.NONE) {
this.memoryConsumerWeights++;
}
}
for (Channel c : visitable.getBroadcastInputs()) {
if (c.getLocalStrategy().dams()) {
this.memoryConsumerWeights++;
}
if (c.getTempMode() != TempMode.NONE) {
this.memoryConsumerWeights++;
}
}
// pass the visitor to the iteraton's step function
if (visitable instanceof IterationPlanNode) {
// push the iteration node onto the stack
final IterationPlanNode iterNode = (IterationPlanNode) visitable;
this.stackOfIterationNodes.addLast(iterNode);
// recurse
((IterationPlanNode) visitable).acceptForStepFunction(this);
// pop the iteration node from the stack
this.stackOfIterationNodes.removeLast();
}
return true;
}
use of org.apache.flink.optimizer.plan.IterationPlanNode in project flink by apache.
the class RangePartitionRewriter method postVisit.
@Override
public void postVisit(PlanNode node) {
if (node instanceof IterationPlanNode) {
IterationPlanNode iNode = (IterationPlanNode) node;
if (!visitedIterationNodes.contains(iNode)) {
visitedIterationNodes.add(iNode);
iNode.acceptForStepFunction(this);
}
}
final Iterable<Channel> inputChannels = node.getInputs();
for (Channel channel : inputChannels) {
ShipStrategyType shipStrategy = channel.getShipStrategy();
// Make sure we only optimize the DAG for range partition, and do not optimize multi times.
if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {
if (channel.getDataDistribution() == null) {
if (node.isOnDynamicPath()) {
throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
}
PlanNode channelSource = channel.getSource();
List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
channelSource.getOutgoingChannels().remove(channel);
channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
}
}
}
}
use of org.apache.flink.optimizer.plan.IterationPlanNode in project flink by apache.
the class JobGraphGenerator method postVisit.
/**
* This method implements the post-visit during the depth-first traversal. When the post visit happens,
* all of the descendants have been processed, so this method connects all of the current node's
* predecessors to the current node.
*
* @param node
* The node currently processed during the post-visit.
* @see org.apache.flink.util.Visitor#postVisit(org.apache.flink.util.Visitable) t
*/
@Override
public void postVisit(PlanNode node) {
try {
// solution sets have no input. the initial solution set input is connected when the iteration node is in its postVisit
if (node instanceof SourcePlanNode || node instanceof NAryUnionPlanNode || node instanceof SolutionSetPlanNode) {
return;
}
// check if we have an iteration. in that case, translate the step function now
if (node instanceof IterationPlanNode) {
// prevent nested iterations
if (node.isOnDynamicPath()) {
throw new CompilerException("Nested Iterations are not possible at the moment!");
}
// another one), we push the current one onto the stack
if (this.currentIteration != null) {
this.iterationStack.add(this.currentIteration);
}
this.currentIteration = (IterationPlanNode) node;
this.currentIteration.acceptForStepFunction(this);
// pop the current iteration from the stack
if (this.iterationStack.isEmpty()) {
this.currentIteration = null;
} else {
this.currentIteration = this.iterationStack.remove(this.iterationStack.size() - 1);
}
// connect the initial solution set now.
if (node instanceof WorksetIterationPlanNode) {
// connect the initial solution set
WorksetIterationPlanNode wsNode = (WorksetIterationPlanNode) node;
JobVertex headVertex = this.iterations.get(wsNode).getHeadTask();
TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
int inputIndex = headConfig.getDriverStrategy().getNumInputs();
headConfig.setIterationHeadSolutionSetInputIndex(inputIndex);
translateChannel(wsNode.getInitialSolutionSetInput(), inputIndex, headVertex, headConfig, false);
}
return;
}
final JobVertex targetVertex = this.vertices.get(node);
// check whether this node has its own task, or is merged with another one
if (targetVertex == null) {
// node's task is merged with another task. it is either chained, of a merged head vertex
// from an iteration
final TaskInChain chainedTask;
if ((chainedTask = this.chainedTasks.get(node)) != null) {
// Chained Task. Sanity check first...
final Iterator<Channel> inConns = node.getInputs().iterator();
if (!inConns.hasNext()) {
throw new CompilerException("Bug: Found chained task with no input.");
}
final Channel inConn = inConns.next();
if (inConns.hasNext()) {
throw new CompilerException("Bug: Found a chained task with more than one input!");
}
if (inConn.getLocalStrategy() != null && inConn.getLocalStrategy() != LocalStrategy.NONE) {
throw new CompilerException("Bug: Found a chained task with an input local strategy.");
}
if (inConn.getShipStrategy() != null && inConn.getShipStrategy() != ShipStrategyType.FORWARD) {
throw new CompilerException("Bug: Found a chained task with an input ship strategy other than FORWARD.");
}
JobVertex container = chainedTask.getContainingVertex();
if (container == null) {
final PlanNode sourceNode = inConn.getSource();
container = this.vertices.get(sourceNode);
if (container == null) {
// predecessor is itself chained
container = this.chainedTasks.get(sourceNode).getContainingVertex();
if (container == null) {
throw new IllegalStateException("Bug: Chained task predecessor has not been assigned its containing vertex.");
}
} else {
// predecessor is a proper task job vertex and this is the first chained task. add a forward connection entry.
new TaskConfig(container.getConfiguration()).addOutputShipStrategy(ShipStrategyType.FORWARD);
}
chainedTask.setContainingVertex(container);
}
// add info about the input serializer type
chainedTask.getTaskConfig().setInputSerializer(inConn.getSerializer(), 0);
// update name of container task
String containerTaskName = container.getName();
if (containerTaskName.startsWith("CHAIN ")) {
container.setName(containerTaskName + " -> " + chainedTask.getTaskName());
} else {
container.setName("CHAIN " + containerTaskName + " -> " + chainedTask.getTaskName());
}
//update resource of container task
container.setResources(container.getMinResources().merge(node.getMinResources()), container.getPreferredResources().merge(node.getPreferredResources()));
this.chainedTasksInSequence.add(chainedTask);
return;
} else if (node instanceof BulkPartialSolutionPlanNode || node instanceof WorksetPlanNode) {
// merged iteration head task. the task that the head is merged with will take care of it
return;
} else {
throw new CompilerException("Bug: Unrecognized merged task vertex.");
}
}
if (this.currentIteration != null) {
JobVertex head = this.iterations.get(this.currentIteration).getHeadTask();
// their execution determines the deployment slots of the co-location group
if (node.isOnDynamicPath()) {
targetVertex.setStrictlyCoLocatedWith(head);
}
}
// create the config that will contain all the description of the inputs
final TaskConfig targetVertexConfig = new TaskConfig(targetVertex.getConfiguration());
// get the inputs. if this node is the head of an iteration, we obtain the inputs from the
// enclosing iteration node, because the inputs are the initial inputs to the iteration.
final Iterator<Channel> inConns;
if (node instanceof BulkPartialSolutionPlanNode) {
inConns = ((BulkPartialSolutionPlanNode) node).getContainingIterationNode().getInputs().iterator();
// because the partial solution has its own vertex, is has only one (logical) input.
// note this in the task configuration
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
} else if (node instanceof WorksetPlanNode) {
WorksetPlanNode wspn = (WorksetPlanNode) node;
// input that is the initial workset
inConns = Collections.singleton(wspn.getContainingIterationNode().getInput2()).iterator();
// because we have a stand-alone (non-merged) workset iteration head, the initial workset will
// be input 0 and the solution set will be input 1
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
targetVertexConfig.setIterationHeadSolutionSetInputIndex(1);
} else {
inConns = node.getInputs().iterator();
}
if (!inConns.hasNext()) {
throw new CompilerException("Bug: Found a non-source task with no input.");
}
int inputIndex = 0;
while (inConns.hasNext()) {
Channel input = inConns.next();
inputIndex += translateChannel(input, inputIndex, targetVertex, targetVertexConfig, false);
}
// broadcast variables
int broadcastInputIndex = 0;
for (NamedChannel broadcastInput : node.getBroadcastInputs()) {
int broadcastInputIndexDelta = translateChannel(broadcastInput, broadcastInputIndex, targetVertex, targetVertexConfig, true);
targetVertexConfig.setBroadcastInputName(broadcastInput.getName(), broadcastInputIndex);
targetVertexConfig.setBroadcastInputSerializer(broadcastInput.getSerializer(), broadcastInputIndex);
broadcastInputIndex += broadcastInputIndexDelta;
}
} catch (Exception e) {
throw new CompilerException("An error occurred while translating the optimized plan to a JobGraph: " + e.getMessage(), e);
}
}
use of org.apache.flink.optimizer.plan.IterationPlanNode in project flink by apache.
the class JobGraphGenerator method createSingleInputVertex.
// ------------------------------------------------------------------------
// Methods for creating individual vertices
// ------------------------------------------------------------------------
private JobVertex createSingleInputVertex(SingleInputPlanNode node) throws CompilerException {
final String taskName = node.getNodeName();
final DriverStrategy ds = node.getDriverStrategy();
// check, whether chaining is possible
boolean chaining;
{
Channel inConn = node.getInput();
PlanNode pred = inConn.getSource();
chaining = ds.getPushChainDriverClass() != null && // first op after union is stand-alone, because union is merged
!(pred instanceof NAryUnionPlanNode) && // partial solution merges anyways
!(pred instanceof BulkPartialSolutionPlanNode) && // workset merges anyways
!(pred instanceof WorksetPlanNode) && // cannot chain with iteration heads currently
!(pred instanceof IterationPlanNode) && inConn.getShipStrategy() == ShipStrategyType.FORWARD && inConn.getLocalStrategy() == LocalStrategy.NONE && pred.getOutgoingChannels().size() == 1 && node.getParallelism() == pred.getParallelism() && node.getBroadcastInputs().isEmpty();
// in a tail
if (this.currentIteration != null && this.currentIteration instanceof WorksetIterationPlanNode && node.getOutgoingChannels().size() > 0) {
WorksetIterationPlanNode wspn = (WorksetIterationPlanNode) this.currentIteration;
if (wspn.getSolutionSetDeltaPlanNode() == pred || wspn.getNextWorkSetPlanNode() == pred) {
chaining = false;
}
}
// cannot chain the nodes that produce the next workset in a bulk iteration if a termination criterion follows
if (this.currentIteration != null && this.currentIteration instanceof BulkIterationPlanNode) {
BulkIterationPlanNode wspn = (BulkIterationPlanNode) this.currentIteration;
if (node == wspn.getRootOfTerminationCriterion() && wspn.getRootOfStepFunction() == pred) {
chaining = false;
} else if (node.getOutgoingChannels().size() > 0 && (wspn.getRootOfStepFunction() == pred || wspn.getRootOfTerminationCriterion() == pred)) {
chaining = false;
}
}
}
final JobVertex vertex;
final TaskConfig config;
if (chaining) {
vertex = null;
config = new TaskConfig(new Configuration());
this.chainedTasks.put(node, new TaskInChain(node, ds.getPushChainDriverClass(), config, taskName));
} else {
// create task vertex
vertex = new JobVertex(taskName);
vertex.setResources(node.getMinResources(), node.getPreferredResources());
vertex.setInvokableClass((this.currentIteration != null && node.isOnDynamicPath()) ? IterationIntermediateTask.class : BatchTask.class);
config = new TaskConfig(vertex.getConfiguration());
config.setDriver(ds.getDriverClass());
}
// set user code
config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper());
config.setStubParameters(node.getProgramOperator().getParameters());
// set the driver strategy
config.setDriverStrategy(ds);
for (int i = 0; i < ds.getNumRequiredComparators(); i++) {
config.setDriverComparator(node.getComparator(i), i);
}
// assign memory, file-handles, etc.
assignDriverResources(node, config);
return vertex;
}
Aggregations