use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class JobGraphGenerator method finalizeBulkIteration.
private void finalizeBulkIteration(IterationDescriptor descr) {
final BulkIterationPlanNode bulkNode = (BulkIterationPlanNode) descr.getIterationNode();
final JobVertex headVertex = descr.getHeadTask();
final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
// ------------ finalize the head config with the final outputs and the sync gate ------------
final int numStepFunctionOuts = headConfig.getNumOutputs();
final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
if (numStepFunctionOuts == 0) {
throw new CompilerException("The iteration has no operation inside the step function.");
}
headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
final double relativeMemForBackChannel = bulkNode.getRelativeMemoryPerSubTask();
if (relativeMemForBackChannel <= 0) {
throw new CompilerException("Bug: No memory has been assigned to the iteration back channel.");
}
headConfig.setRelativeBackChannelMemory(relativeMemForBackChannel);
// --------------------------- create the sync task ---------------------------
final JobVertex sync = new JobVertex("Sync(" + bulkNode.getNodeName() + ")");
sync.setResources(bulkNode.getMinResources(), bulkNode.getPreferredResources());
sync.setInvokableClass(IterationSynchronizationSinkTask.class);
sync.setParallelism(1);
sync.setMaxParallelism(1);
this.auxVertices.add(sync);
final TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getParallelism());
// set the number of iteration / convergence criterion for the sync
final int maxNumIterations = bulkNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
if (maxNumIterations < 1) {
throw new CompilerException("Cannot create bulk iteration with unspecified maximum number of iterations.");
}
syncConfig.setNumberOfIterations(maxNumIterations);
// connect the sync task
sync.connectNewDataSetAsInput(headVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
// ----------------------------- create the iteration tail ------------------------------
final PlanNode rootOfTerminationCriterion = bulkNode.getRootOfTerminationCriterion();
final PlanNode rootOfStepFunction = bulkNode.getRootOfStepFunction();
final TaskConfig tailConfig;
JobVertex rootOfStepFunctionVertex = this.vertices.get(rootOfStepFunction);
if (rootOfStepFunctionVertex == null) {
// last op is chained
final TaskInChain taskInChain = this.chainedTasks.get(rootOfStepFunction);
if (taskInChain == null) {
throw new CompilerException("Bug: Tail of step function not found as vertex or chained task.");
}
rootOfStepFunctionVertex = taskInChain.getContainingVertex();
// the fake channel is statically typed to pact record. no data is sent over this channel anyways.
tailConfig = taskInChain.getTaskConfig();
} else {
tailConfig = new TaskConfig(rootOfStepFunctionVertex.getConfiguration());
}
tailConfig.setIsWorksetUpdate();
// No following termination criterion
if (rootOfStepFunction.getOutgoingChannels().isEmpty()) {
rootOfStepFunctionVertex.setInvokableClass(IterationTailTask.class);
tailConfig.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
}
// create the fake output task for termination criterion, if needed
final TaskConfig tailConfigOfTerminationCriterion;
// If we have a termination criterion and it is not an intermediate node
if (rootOfTerminationCriterion != null && rootOfTerminationCriterion.getOutgoingChannels().isEmpty()) {
JobVertex rootOfTerminationCriterionVertex = this.vertices.get(rootOfTerminationCriterion);
if (rootOfTerminationCriterionVertex == null) {
// last op is chained
final TaskInChain taskInChain = this.chainedTasks.get(rootOfTerminationCriterion);
if (taskInChain == null) {
throw new CompilerException("Bug: Tail of termination criterion not found as vertex or chained task.");
}
rootOfTerminationCriterionVertex = taskInChain.getContainingVertex();
// the fake channel is statically typed to pact record. no data is sent over this channel anyways.
tailConfigOfTerminationCriterion = taskInChain.getTaskConfig();
} else {
tailConfigOfTerminationCriterion = new TaskConfig(rootOfTerminationCriterionVertex.getConfiguration());
}
rootOfTerminationCriterionVertex.setInvokableClass(IterationTailTask.class);
// Hack
tailConfigOfTerminationCriterion.setIsSolutionSetUpdate();
tailConfigOfTerminationCriterion.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
// tell the head that it needs to wait for the solution set updates
headConfig.setWaitForSolutionSetUpdate();
}
// ------------------- register the aggregators -------------------
AggregatorRegistry aggs = bulkNode.getIterationNode().getIterationContract().getAggregators();
Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
headConfig.addIterationAggregators(allAggregators);
syncConfig.addIterationAggregators(allAggregators);
String convAggName = aggs.getConvergenceCriterionAggregatorName();
ConvergenceCriterion<?> convCriterion = aggs.getConvergenceCriterion();
if (convCriterion != null || convAggName != null) {
if (convCriterion == null) {
throw new CompilerException("Error: Convergence criterion aggregator set, but criterion is null.");
}
if (convAggName == null) {
throw new CompilerException("Error: Aggregator convergence criterion set, but aggregator is null.");
}
syncConfig.setConvergenceCriterion(convAggName, convCriterion);
}
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class JobGraphGenerator method preVisit.
/**
* This methods implements the pre-visiting during a depth-first traversal. It create the job vertex and
* sets local strategy.
*
* @param node
* The node that is currently processed.
* @return True, if the visitor should descend to the node's children, false if not.
* @see org.apache.flink.util.Visitor#preVisit(org.apache.flink.util.Visitable)
*/
@Override
public boolean preVisit(PlanNode node) {
// check if we have visited this node before. in non-tree graphs, this happens
if (this.vertices.containsKey(node) || this.chainedTasks.containsKey(node) || this.iterations.containsKey(node)) {
// return false to prevent further descend
return false;
}
// the vertex to be created for the current node
final JobVertex vertex;
try {
if (node instanceof SinkPlanNode) {
vertex = createDataSinkVertex((SinkPlanNode) node);
} else if (node instanceof SourcePlanNode) {
vertex = createDataSourceVertex((SourcePlanNode) node);
} else if (node instanceof BulkIterationPlanNode) {
BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
// for the bulk iteration, we skip creating anything for now. we create the graph
// for the step function in the post visit.
// check that the root of the step function has the same parallelism as the iteration.
// because the tail must have the same parallelism as the head, we can only merge the last
// operator with the tail, if they have the same parallelism. not merging is currently not
// implemented
PlanNode root = iterationNode.getRootOfStepFunction();
if (root.getParallelism() != node.getParallelism()) {
throw new CompilerException("Error: The final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
this.iterations.put(iterationNode, descr);
vertex = null;
} else if (node instanceof WorksetIterationPlanNode) {
WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
// we have the same constraints as for the bulk iteration
PlanNode nextWorkSet = iterationNode.getNextWorkSetPlanNode();
PlanNode solutionSetDelta = iterationNode.getSolutionSetDeltaPlanNode();
if (nextWorkSet.getParallelism() != node.getParallelism()) {
throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
if (solutionSetDelta.getParallelism() != node.getParallelism()) {
throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
this.iterations.put(iterationNode, descr);
vertex = null;
} else if (node instanceof SingleInputPlanNode) {
vertex = createSingleInputVertex((SingleInputPlanNode) node);
} else if (node instanceof DualInputPlanNode) {
vertex = createDualInputVertex((DualInputPlanNode) node);
} else if (node instanceof NAryUnionPlanNode) {
// skip the union for now
vertex = null;
} else if (node instanceof BulkPartialSolutionPlanNode) {
// create a head node (or not, if it is merged into its successor)
vertex = createBulkIterationHead((BulkPartialSolutionPlanNode) node);
} else if (node instanceof SolutionSetPlanNode) {
// we adjust the joins / cogroups that go into the solution set here
for (Channel c : node.getOutgoingChannels()) {
DualInputPlanNode target = (DualInputPlanNode) c.getTarget();
JobVertex accessingVertex = this.vertices.get(target);
TaskConfig conf = new TaskConfig(accessingVertex.getConfiguration());
int inputNum = c == target.getInput1() ? 0 : c == target.getInput2() ? 1 : -1;
// sanity checks
if (inputNum == -1) {
throw new CompilerException();
}
// adjust the driver
if (conf.getDriver().equals(JoinDriver.class)) {
conf.setDriver(inputNum == 0 ? JoinWithSolutionSetFirstDriver.class : JoinWithSolutionSetSecondDriver.class);
} else if (conf.getDriver().equals(CoGroupDriver.class)) {
conf.setDriver(inputNum == 0 ? CoGroupWithSolutionSetFirstDriver.class : CoGroupWithSolutionSetSecondDriver.class);
} else {
throw new CompilerException("Found join with solution set using incompatible operator (only Join/CoGroup are valid).");
}
}
// make sure we do not visit this node again. for that, we add a 'already seen' entry into one of the sets
this.chainedTasks.put(node, ALREADY_VISITED_PLACEHOLDER);
vertex = null;
} else if (node instanceof WorksetPlanNode) {
// create the iteration head here
vertex = createWorksetIterationHead((WorksetPlanNode) node);
} else {
throw new CompilerException("Unrecognized node type: " + node.getClass().getName());
}
} catch (Exception e) {
throw new CompilerException("Error translating node '" + node + "': " + e.getMessage(), e);
}
// check if a vertex was created, or if it was chained or skipped
if (vertex != null) {
// set parallelism
int pd = node.getParallelism();
vertex.setParallelism(pd);
vertex.setMaxParallelism(pd);
vertex.setSlotSharingGroup(sharingGroup);
// check whether this vertex is part of an iteration step function
if (this.currentIteration != null) {
// check that the task has the same parallelism as the iteration as such
PlanNode iterationNode = (PlanNode) this.currentIteration;
if (iterationNode.getParallelism() < pd) {
throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, parallelism than the iteration operator.");
}
// store the id of the iterations the step functions participate in
IterationDescriptor descr = this.iterations.get(this.currentIteration);
new TaskConfig(vertex.getConfiguration()).setIterationId(descr.getId());
}
// store in the map
this.vertices.put(node, vertex);
}
// returning true causes deeper descend
return true;
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class WorksetIterationNode method instantiate.
@SuppressWarnings("unchecked")
@Override
protected void instantiate(OperatorDescriptorDual operator, Channel solutionSetIn, Channel worksetIn, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReqSolutionSet, RequestedGlobalProperties globPropsReqWorkset, RequestedLocalProperties locPropsReqSolutionSet, RequestedLocalProperties locPropsReqWorkset) {
// check for pipeline breaking using hash join with build on the solution set side
placePipelineBreakersIfNecessary(DriverStrategy.HYBRIDHASH_BUILD_FIRST, solutionSetIn, worksetIn);
// NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
// Whenever we instantiate the iteration, we enumerate new candidates for the step function.
// That way, we make sure we have an appropriate plan for each candidate for the initial partial solution,
// we have a fitting candidate for the step function (often, work is pushed out of the step function).
// Among the candidates of the step function, we keep only those that meet the requested properties of the
// current candidate initial partial solution. That makes sure these properties exist at the beginning of
// every iteration.
// 1) Because we enumerate multiple times, we may need to clean the cached plans
// before starting another enumeration
this.nextWorkset.accept(PlanCacheCleaner.INSTANCE);
this.solutionSetDelta.accept(PlanCacheCleaner.INSTANCE);
// 2) Give the partial solution the properties of the current candidate for the initial partial solution
// This concerns currently only the workset.
this.worksetNode.setCandidateProperties(worksetIn.getGlobalProperties(), worksetIn.getLocalProperties(), worksetIn);
this.solutionSetNode.setCandidateProperties(this.partitionedProperties, new LocalProperties(), solutionSetIn);
final SolutionSetPlanNode sspn = this.solutionSetNode.getCurrentSolutionSetPlanNode();
final WorksetPlanNode wspn = this.worksetNode.getCurrentWorksetPlanNode();
// 3) Get the alternative plans
List<PlanNode> solutionSetDeltaCandidates = this.solutionSetDelta.getAlternativePlans(estimator);
List<PlanNode> worksetCandidates = this.nextWorkset.getAlternativePlans(estimator);
// 4) Throw away all that are not compatible with the properties currently requested to the
// initial partial solution
// Make sure that the workset candidates fulfill the input requirements
{
List<PlanNode> newCandidates = new ArrayList<PlanNode>();
for (Iterator<PlanNode> planDeleter = worksetCandidates.iterator(); planDeleter.hasNext(); ) {
PlanNode candidate = planDeleter.next();
GlobalProperties atEndGlobal = candidate.getGlobalProperties();
LocalProperties atEndLocal = candidate.getLocalProperties();
FeedbackPropertiesMeetRequirementsReport report = candidate.checkPartialSolutionPropertiesMet(wspn, atEndGlobal, atEndLocal);
if (report == FeedbackPropertiesMeetRequirementsReport.NO_PARTIAL_SOLUTION) {
// depends only through broadcast variable on the workset solution
} else if (report == FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
// attach a no-op node through which we create the properties of the original input
Channel toNoOp = new Channel(candidate);
globPropsReqWorkset.parameterizeChannel(toNoOp, false, nextWorksetRootConnection.getDataExchangeMode(), false);
locPropsReqWorkset.parameterizeChannel(toNoOp);
NoOpUnaryUdfOp noOpUnaryUdfOp = new NoOpUnaryUdfOp<>();
noOpUnaryUdfOp.setInput(candidate.getProgramOperator());
UnaryOperatorNode rebuildWorksetPropertiesNode = new UnaryOperatorNode("Rebuild Workset Properties", noOpUnaryUdfOp, true);
rebuildWorksetPropertiesNode.setParallelism(candidate.getParallelism());
SingleInputPlanNode rebuildWorksetPropertiesPlanNode = new SingleInputPlanNode(rebuildWorksetPropertiesNode, "Rebuild Workset Properties", toNoOp, DriverStrategy.UNARY_NO_OP);
rebuildWorksetPropertiesPlanNode.initProperties(toNoOp.getGlobalProperties(), toNoOp.getLocalProperties());
estimator.costOperator(rebuildWorksetPropertiesPlanNode);
GlobalProperties atEndGlobalModified = rebuildWorksetPropertiesPlanNode.getGlobalProperties();
LocalProperties atEndLocalModified = rebuildWorksetPropertiesPlanNode.getLocalProperties();
if (!(atEndGlobalModified.equals(atEndGlobal) && atEndLocalModified.equals(atEndLocal))) {
FeedbackPropertiesMeetRequirementsReport report2 = candidate.checkPartialSolutionPropertiesMet(wspn, atEndGlobalModified, atEndLocalModified);
if (report2 != FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
newCandidates.add(rebuildWorksetPropertiesPlanNode);
}
}
// remove the original operator and add the modified candidate
planDeleter.remove();
}
}
worksetCandidates.addAll(newCandidates);
}
if (worksetCandidates.isEmpty()) {
return;
}
// sanity check the solution set delta
for (PlanNode solutionSetDeltaCandidate : solutionSetDeltaCandidates) {
SingleInputPlanNode candidate = (SingleInputPlanNode) solutionSetDeltaCandidate;
GlobalProperties gp = candidate.getGlobalProperties();
if (gp.getPartitioning() != PartitioningProperty.HASH_PARTITIONED || gp.getPartitioningFields() == null || !gp.getPartitioningFields().equals(this.solutionSetKeyFields)) {
throw new CompilerException("Bug: The solution set delta is not partitioned.");
}
}
// 5) Create a candidate for the Iteration Node for every remaining plan of the step function.
final GlobalProperties gp = new GlobalProperties();
gp.setHashPartitioned(this.solutionSetKeyFields);
gp.addUniqueFieldCombination(this.solutionSetKeyFields);
LocalProperties lp = LocalProperties.EMPTY.addUniqueFields(this.solutionSetKeyFields);
// take all combinations of solution set delta and workset plans
for (PlanNode solutionSetCandidate : solutionSetDeltaCandidates) {
for (PlanNode worksetCandidate : worksetCandidates) {
// check whether they have the same operator at their latest branching point
if (this.singleRoot.areBranchCompatible(solutionSetCandidate, worksetCandidate)) {
SingleInputPlanNode siSolutionDeltaCandidate = (SingleInputPlanNode) solutionSetCandidate;
boolean immediateDeltaUpdate;
// check whether we need a dedicated solution set delta operator, or whether we can update on the fly
if (siSolutionDeltaCandidate.getInput().getShipStrategy() == ShipStrategyType.FORWARD && this.solutionDeltaImmediatelyAfterSolutionJoin) {
// sanity check the node and connection
if (siSolutionDeltaCandidate.getDriverStrategy() != DriverStrategy.UNARY_NO_OP || siSolutionDeltaCandidate.getInput().getLocalStrategy() != LocalStrategy.NONE) {
throw new CompilerException("Invalid Solution set delta node.");
}
solutionSetCandidate = siSolutionDeltaCandidate.getInput().getSource();
immediateDeltaUpdate = true;
} else {
// was not partitioned, we need to keep this node.
// mark that we materialize the input
siSolutionDeltaCandidate.getInput().setTempMode(TempMode.PIPELINE_BREAKER);
immediateDeltaUpdate = false;
}
WorksetIterationPlanNode wsNode = new WorksetIterationPlanNode(this, this.getOperator().getName(), solutionSetIn, worksetIn, sspn, wspn, worksetCandidate, solutionSetCandidate);
wsNode.setImmediateSolutionSetUpdate(immediateDeltaUpdate);
wsNode.initProperties(gp, lp);
target.add(wsNode);
}
}
}
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class PlanFinalizer method createFinalPlan.
public OptimizedPlan createFinalPlan(List<SinkPlanNode> sinks, String jobName, Plan originalPlan) {
this.memoryConsumerWeights = 0;
// traverse the graph
for (SinkPlanNode node : sinks) {
node.accept(this);
}
// assign the memory to each node
if (this.memoryConsumerWeights > 0) {
for (PlanNode node : this.allNodes) {
// assign memory to the driver strategy of the node
final int consumerWeight = node.getMemoryConsumerWeight();
if (consumerWeight > 0) {
final double relativeMem = (double) consumerWeight / this.memoryConsumerWeights;
node.setRelativeMemoryPerSubtask(relativeMem);
if (Optimizer.LOG.isDebugEnabled()) {
Optimizer.LOG.debug("Assigned " + relativeMem + " of total memory to each subtask of " + node.getProgramOperator().getName() + ".");
}
}
// assign memory to the local and global strategies of the channels
for (Channel c : node.getInputs()) {
if (c.getLocalStrategy().dams()) {
final double relativeMem = 1.0 / this.memoryConsumerWeights;
c.setRelativeMemoryLocalStrategy(relativeMem);
if (Optimizer.LOG.isDebugEnabled()) {
Optimizer.LOG.debug("Assigned " + relativeMem + " of total memory to each local strategy " + "instance of " + c + ".");
}
}
if (c.getTempMode() != TempMode.NONE) {
final double relativeMem = 1.0 / this.memoryConsumerWeights;
c.setRelativeTempMemory(relativeMem);
if (Optimizer.LOG.isDebugEnabled()) {
Optimizer.LOG.debug("Assigned " + relativeMem + " of total memory to each instance of the temp " + "table for " + c + ".");
}
}
}
}
}
return new OptimizedPlan(this.sources, this.sinks, this.allNodes, jobName, originalPlan);
}
use of org.apache.flink.optimizer.plan.PlanNode in project flink by apache.
the class RangePartitionRewriter method postVisit.
@Override
public void postVisit(PlanNode node) {
if (node instanceof IterationPlanNode) {
IterationPlanNode iNode = (IterationPlanNode) node;
if (!visitedIterationNodes.contains(iNode)) {
visitedIterationNodes.add(iNode);
iNode.acceptForStepFunction(this);
}
}
final Iterable<Channel> inputChannels = node.getInputs();
for (Channel channel : inputChannels) {
ShipStrategyType shipStrategy = channel.getShipStrategy();
// Make sure we only optimize the DAG for range partition, and do not optimize multi times.
if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {
if (channel.getDataDistribution() == null) {
if (node.isOnDynamicPath()) {
throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
}
PlanNode channelSource = channel.getSource();
List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
channelSource.getOutgoingChannels().remove(channel);
channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
}
}
}
}
Aggregations