use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class JobGraphGenerator method finalizeBulkIteration.
private void finalizeBulkIteration(IterationDescriptor descr) {
final BulkIterationPlanNode bulkNode = (BulkIterationPlanNode) descr.getIterationNode();
final JobVertex headVertex = descr.getHeadTask();
final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
// ------------ finalize the head config with the final outputs and the sync gate
// ------------
final int numStepFunctionOuts = headConfig.getNumOutputs();
final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
if (numStepFunctionOuts == 0) {
throw new CompilerException("The iteration has no operation inside the step function.");
}
headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
final double relativeMemForBackChannel = bulkNode.getRelativeMemoryPerSubTask();
if (relativeMemForBackChannel <= 0) {
throw new CompilerException("Bug: No memory has been assigned to the iteration back channel.");
}
headConfig.setRelativeBackChannelMemory(relativeMemForBackChannel);
// --------------------------- create the sync task ---------------------------
final JobVertex sync = new JobVertex("Sync (" + bulkNode.getNodeName() + ")");
sync.setResources(bulkNode.getMinResources(), bulkNode.getPreferredResources());
sync.setInvokableClass(IterationSynchronizationSinkTask.class);
sync.setParallelism(1);
sync.setMaxParallelism(1);
this.auxVertices.add(sync);
final TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getParallelism());
// set the number of iteration / convergence criterion for the sync
final int maxNumIterations = bulkNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
if (maxNumIterations < 1) {
throw new CompilerException("Cannot create bulk iteration with unspecified maximum number of iterations.");
}
syncConfig.setNumberOfIterations(maxNumIterations);
// connect the sync task
sync.connectNewDataSetAsInput(headVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
// ----------------------------- create the iteration tail ------------------------------
final PlanNode rootOfTerminationCriterion = bulkNode.getRootOfTerminationCriterion();
final PlanNode rootOfStepFunction = bulkNode.getRootOfStepFunction();
final TaskConfig tailConfig;
JobVertex rootOfStepFunctionVertex = this.vertices.get(rootOfStepFunction);
if (rootOfStepFunctionVertex == null) {
// last op is chained
final TaskInChain taskInChain = this.chainedTasks.get(rootOfStepFunction);
if (taskInChain == null) {
throw new CompilerException("Bug: Tail of step function not found as vertex or chained task.");
}
rootOfStepFunctionVertex = taskInChain.getContainingVertex();
// the fake channel is statically typed to pact record. no data is sent over this
// channel anyways.
tailConfig = taskInChain.getTaskConfig();
} else {
tailConfig = new TaskConfig(rootOfStepFunctionVertex.getConfiguration());
}
tailConfig.setIsWorksetUpdate();
// No following termination criterion
if (rootOfStepFunction.getOutgoingChannels().isEmpty()) {
rootOfStepFunctionVertex.setInvokableClass(IterationTailTask.class);
tailConfig.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
}
// create the fake output task for termination criterion, if needed
final TaskConfig tailConfigOfTerminationCriterion;
// If we have a termination criterion and it is not an intermediate node
if (rootOfTerminationCriterion != null && rootOfTerminationCriterion.getOutgoingChannels().isEmpty()) {
JobVertex rootOfTerminationCriterionVertex = this.vertices.get(rootOfTerminationCriterion);
if (rootOfTerminationCriterionVertex == null) {
// last op is chained
final TaskInChain taskInChain = this.chainedTasks.get(rootOfTerminationCriterion);
if (taskInChain == null) {
throw new CompilerException("Bug: Tail of termination criterion not found as vertex or chained task.");
}
rootOfTerminationCriterionVertex = taskInChain.getContainingVertex();
// the fake channel is statically typed to pact record. no data is sent over this
// channel anyways.
tailConfigOfTerminationCriterion = taskInChain.getTaskConfig();
} else {
tailConfigOfTerminationCriterion = new TaskConfig(rootOfTerminationCriterionVertex.getConfiguration());
}
rootOfTerminationCriterionVertex.setInvokableClass(IterationTailTask.class);
// Hack
tailConfigOfTerminationCriterion.setIsSolutionSetUpdate();
tailConfigOfTerminationCriterion.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
// tell the head that it needs to wait for the solution set updates
headConfig.setWaitForSolutionSetUpdate();
}
// ------------------- register the aggregators -------------------
AggregatorRegistry aggs = bulkNode.getIterationNode().getIterationContract().getAggregators();
Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
headConfig.addIterationAggregators(allAggregators);
syncConfig.addIterationAggregators(allAggregators);
String convAggName = aggs.getConvergenceCriterionAggregatorName();
ConvergenceCriterion<?> convCriterion = aggs.getConvergenceCriterion();
if (convCriterion != null || convAggName != null) {
if (convCriterion == null) {
throw new CompilerException("Error: Convergence criterion aggregator set, but criterion is null.");
}
if (convAggName == null) {
throw new CompilerException("Error: Aggregator convergence criterion set, but aggregator is null.");
}
syncConfig.setConvergenceCriterion(convAggName, convCriterion);
}
}
use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class JobGraphGenerator method translateChannel.
private int translateChannel(Channel input, int inputIndex, JobVertex targetVertex, TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception {
final PlanNode inputPlanNode = input.getSource();
final Iterator<Channel> allInChannels;
if (inputPlanNode instanceof NAryUnionPlanNode) {
allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
// deadlocks when closing a branching flow at runtime.
for (Channel in : inputPlanNode.getInputs()) {
if (input.getDataExchangeMode().equals(DataExchangeMode.BATCH)) {
in.setDataExchangeMode(DataExchangeMode.BATCH);
}
if (isBroadcast) {
in.setShipStrategy(ShipStrategyType.BROADCAST, in.getDataExchangeMode());
}
}
// The outgoing connection of an NAryUnion must be a forward connection.
if (input.getShipStrategy() != ShipStrategyType.FORWARD && !isBroadcast) {
throw new CompilerException("Optimized plan contains Union with non-forward outgoing ship strategy.");
}
} else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof WorksetPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput2().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof SolutionSetPlanNode) {
// rather than a vertex connection
return 0;
} else {
allInChannels = Collections.singletonList(input).iterator();
}
// check that the type serializer is consistent
TypeSerializerFactory<?> typeSerFact = null;
// accounting for channels on the dynamic path
int numChannelsTotal = 0;
int numChannelsDynamicPath = 0;
int numDynamicSenderTasksTotal = 0;
// source
while (allInChannels.hasNext()) {
final Channel inConn = allInChannels.next();
// sanity check the common serializer
if (typeSerFact == null) {
typeSerFact = inConn.getSerializer();
} else if (!typeSerFact.equals(inConn.getSerializer())) {
throw new CompilerException("Conflicting types in union operator.");
}
final PlanNode sourceNode = inConn.getSource();
JobVertex sourceVertex = this.vertices.get(sourceNode);
TaskConfig sourceVertexConfig;
if (sourceVertex == null) {
// this predecessor is chained to another task or an iteration
final TaskInChain chainedTask;
final IterationDescriptor iteration;
if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
// push chained task
if (chainedTask.getContainingVertex() == null) {
throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
}
sourceVertex = chainedTask.getContainingVertex();
sourceVertexConfig = chainedTask.getTaskConfig();
} else if ((iteration = this.iterations.get(sourceNode)) != null) {
// predecessor is an iteration
sourceVertex = iteration.getHeadTask();
sourceVertexConfig = iteration.getHeadFinalResultConfig();
} else {
throw new CompilerException("Bug: Could not resolve source node for a channel.");
}
} else {
// predecessor is its own vertex
sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
}
DistributionPattern pattern = connectJobVertices(inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
// accounting on channels and senders
numChannelsTotal++;
if (inConn.isOnDynamicPath()) {
numChannelsDynamicPath++;
numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern, sourceVertex.getParallelism(), targetVertex.getParallelism());
}
}
// is a union between nodes on the static and nodes on the dynamic path
if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
}
if (numDynamicSenderTasksTotal > 0) {
if (isBroadcast) {
targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
} else {
targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
}
}
// the local strategy is added only once. in non-union case that is the actual edge,
// in the union case, it is the edge between union and the target node
addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
return 1;
}
use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class JobGraphGenerator method compileJobGraph.
public JobGraph compileJobGraph(OptimizedPlan program, JobID jobId) {
if (program == null) {
throw new NullPointerException("Program is null, did you called " + "ExecutionEnvironment.execute()");
}
if (jobId == null) {
jobId = JobID.generate();
}
this.vertices = new HashMap<PlanNode, JobVertex>();
this.chainedTasks = new HashMap<PlanNode, TaskInChain>();
this.chainedTasksInSequence = new ArrayList<TaskInChain>();
this.auxVertices = new ArrayList<JobVertex>();
this.iterations = new HashMap<IterationPlanNode, IterationDescriptor>();
this.iterationStack = new ArrayList<IterationPlanNode>();
this.sharingGroup = new SlotSharingGroup();
// this starts the traversal that generates the job graph
program.accept(this);
// sanity check that we are not somehow in an iteration at the end
if (this.currentIteration != null) {
throw new CompilerException("The graph translation ended prematurely, leaving an unclosed iteration.");
}
// finalize the iterations
for (IterationDescriptor iteration : this.iterations.values()) {
if (iteration.getIterationNode() instanceof BulkIterationPlanNode) {
finalizeBulkIteration(iteration);
} else if (iteration.getIterationNode() instanceof WorksetIterationPlanNode) {
finalizeWorksetIteration(iteration);
} else {
throw new CompilerException();
}
}
// parents' configurations
for (TaskInChain tic : this.chainedTasksInSequence) {
TaskConfig t = new TaskConfig(tic.getContainingVertex().getConfiguration());
t.addChainedTask(tic.getChainedTask(), tic.getTaskConfig(), tic.getTaskName());
}
// ----- attach the additional info to the job vertices, for display in the runtime monitor
attachOperatorNamesAndDescriptions();
for (JobVertex vertex : this.auxVertices) {
vertex.setSlotSharingGroup(sharingGroup);
}
final Map<String, DistributedCache.DistributedCacheEntry> userArtifacts = JobGraphUtils.prepareUserArtifactEntries(program.getOriginalPlan().getCachedFiles().stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)), jobId);
// create the job graph object
final JobGraph graph;
try {
graph = JobGraphBuilder.newBatchJobGraphBuilder().setJobId(jobId).setJobName(program.getJobName()).setExecutionConfig(program.getOriginalPlan().getExecutionConfig()).addJobVertices(vertices.values()).addJobVertices(auxVertices).addUserArtifacts(userArtifacts).build();
} catch (IOException e) {
throw new CompilerException("Could not serialize the ExecutionConfig." + "This indicates that non-serializable types (like custom serializers) were registered");
}
// release all references again
this.vertices = null;
this.chainedTasks = null;
this.chainedTasksInSequence = null;
this.auxVertices = null;
this.iterations = null;
this.iterationStack = null;
// return job graph
return graph;
}
use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class JobGraphGenerator method createBulkIterationHead.
private JobVertex createBulkIterationHead(BulkPartialSolutionPlanNode pspn) {
// get the bulk iteration that corresponds to this partial solution node
final BulkIterationPlanNode iteration = pspn.getContainingIterationNode();
// check whether we need an individual vertex for the partial solution, or whether we
// attach ourselves to the vertex of the parent node. We can combine the head with a node of
// the step function, if
// 1) There is one parent that the partial solution connects to via a forward pattern and no
// local strategy
// 2) parallelism and the number of subtasks per instance does not change
// 3) That successor is not a union
// 4) That successor is not itself the last node of the step function
// 5) There is no local strategy on the edge for the initial partial solution, as
// this translates to a local strategy that would only be executed in the first iteration
final boolean merge;
if (mergeIterationAuxTasks && pspn.getOutgoingChannels().size() == 1) {
final Channel c = pspn.getOutgoingChannels().get(0);
final PlanNode successor = c.getTarget();
merge = c.getShipStrategy() == ShipStrategyType.FORWARD && c.getLocalStrategy() == LocalStrategy.NONE && c.getTempMode() == TempMode.NONE && successor.getParallelism() == pspn.getParallelism() && !(successor instanceof NAryUnionPlanNode) && successor != iteration.getRootOfStepFunction() && iteration.getInput().getLocalStrategy() == LocalStrategy.NONE;
} else {
merge = false;
}
// create or adopt the head vertex
final JobVertex toReturn;
final JobVertex headVertex;
final TaskConfig headConfig;
if (merge) {
final PlanNode successor = pspn.getOutgoingChannels().get(0).getTarget();
headVertex = this.vertices.get(successor);
if (headVertex == null) {
throw new CompilerException("Bug: Trying to merge solution set with its successor, but successor has not been created.");
}
// reset the vertex type to iteration head
headVertex.setInvokableClass(IterationHeadTask.class);
headConfig = new TaskConfig(headVertex.getConfiguration());
toReturn = null;
} else {
// instantiate the head vertex and give it a no-op driver as the driver strategy.
// everything else happens in the post visit, after the input (the initial partial
// solution)
// is connected.
headVertex = new JobVertex("PartialSolution (" + iteration.getNodeName() + ")");
headVertex.setResources(iteration.getMinResources(), iteration.getPreferredResources());
headVertex.setInvokableClass(IterationHeadTask.class);
headConfig = new TaskConfig(headVertex.getConfiguration());
headConfig.setDriver(NoOpDriver.class);
toReturn = headVertex;
}
// create the iteration descriptor and the iteration to it
IterationDescriptor descr = this.iterations.get(iteration);
if (descr == null) {
throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
}
descr.setHeadTask(headVertex, headConfig);
return toReturn;
}
use of org.apache.flink.optimizer.plan.BulkIterationPlanNode in project flink by apache.
the class PlanFinalizer method preVisit.
@Override
public boolean preVisit(PlanNode visitable) {
// if we come here again, prevent a further descend
if (!this.allNodes.add(visitable)) {
return false;
}
if (visitable instanceof SinkPlanNode) {
this.sinks.add((SinkPlanNode) visitable);
} else if (visitable instanceof SourcePlanNode) {
this.sources.add((SourcePlanNode) visitable);
} else if (visitable instanceof BinaryUnionPlanNode) {
BinaryUnionPlanNode unionNode = (BinaryUnionPlanNode) visitable;
if (unionNode.unionsStaticAndDynamicPath()) {
unionNode.setDriverStrategy(DriverStrategy.UNION_WITH_CACHED);
}
} else if (visitable instanceof BulkPartialSolutionPlanNode) {
// tell the partial solution about the iteration node that contains it
final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) visitable;
final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
// sanity check!
if (!(iteration instanceof BulkIterationPlanNode)) {
throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
}
pspn.setContainingIterationNode((BulkIterationPlanNode) iteration);
} else if (visitable instanceof WorksetPlanNode) {
// tell the partial solution about the iteration node that contains it
final WorksetPlanNode wspn = (WorksetPlanNode) visitable;
final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
// sanity check!
if (!(iteration instanceof WorksetIterationPlanNode)) {
throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
}
wspn.setContainingIterationNode((WorksetIterationPlanNode) iteration);
} else if (visitable instanceof SolutionSetPlanNode) {
// tell the partial solution about the iteration node that contains it
final SolutionSetPlanNode sspn = (SolutionSetPlanNode) visitable;
final IterationPlanNode iteration = this.stackOfIterationNodes.peekLast();
// sanity check!
if (!(iteration instanceof WorksetIterationPlanNode)) {
throw new CompilerException("Bug: Error finalizing the plan. " + "Cannot associate the node for a partial solutions with its containing iteration.");
}
sspn.setContainingIterationNode((WorksetIterationPlanNode) iteration);
}
// one child candidate could have been referenced by multiple parents.
for (Channel conn : visitable.getInputs()) {
conn.setTarget(visitable);
conn.getSource().addOutgoingChannel(conn);
}
for (Channel c : visitable.getBroadcastInputs()) {
c.setTarget(visitable);
c.getSource().addOutgoingChannel(c);
}
// count the memory consumption
this.memoryConsumerWeights += visitable.getMemoryConsumerWeight();
for (Channel c : visitable.getInputs()) {
if (c.getLocalStrategy().dams()) {
this.memoryConsumerWeights++;
}
if (c.getTempMode() != TempMode.NONE) {
this.memoryConsumerWeights++;
}
}
for (Channel c : visitable.getBroadcastInputs()) {
if (c.getLocalStrategy().dams()) {
this.memoryConsumerWeights++;
}
if (c.getTempMode() != TempMode.NONE) {
this.memoryConsumerWeights++;
}
}
// pass the visitor to the iteration's step function
if (visitable instanceof IterationPlanNode) {
// push the iteration node onto the stack
final IterationPlanNode iterNode = (IterationPlanNode) visitable;
this.stackOfIterationNodes.addLast(iterNode);
// recurse
((IterationPlanNode) visitable).acceptForStepFunction(this);
// pop the iteration node from the stack
this.stackOfIterationNodes.removeLast();
}
return true;
}
Aggregations