use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.
the class GraphCreatingVisitor method preVisit.
@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
// check if we have been here before
if (this.con2node.containsKey(c)) {
return false;
}
final OptimizerNode n;
// create a node for the operator (or sink or source) if we have not been here before
if (c instanceof GenericDataSinkBase) {
DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
this.sinks.add(dsn);
n = dsn;
} else if (c instanceof GenericDataSourceBase) {
n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
} else if (c instanceof MapOperatorBase) {
n = new MapNode((MapOperatorBase<?, ?, ?>) c);
} else if (c instanceof MapPartitionOperatorBase) {
n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
} else if (c instanceof FlatMapOperatorBase) {
n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
} else if (c instanceof FilterOperatorBase) {
n = new FilterNode((FilterOperatorBase<?, ?>) c);
} else if (c instanceof ReduceOperatorBase) {
n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
} else if (c instanceof GroupCombineOperatorBase) {
n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
} else if (c instanceof GroupReduceOperatorBase) {
n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
} else if (c instanceof InnerJoinOperatorBase) {
n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof OuterJoinOperatorBase) {
n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupOperatorBase) {
n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CoGroupRawOperatorBase) {
n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof CrossOperatorBase) {
n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
} else if (c instanceof BulkIterationBase) {
n = new BulkIterationNode((BulkIterationBase<?>) c);
} else if (c instanceof DeltaIterationBase) {
n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
} else if (c instanceof Union) {
n = new BinaryUnionNode((Union<?>) c);
} else if (c instanceof PartitionOperatorBase) {
n = new PartitionNode((PartitionOperatorBase<?>) c);
} else if (c instanceof SortPartitionOperatorBase) {
n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
} else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
WorksetNode p = new WorksetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
if (this.parent == null) {
throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
}
final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
// catch this for the recursive translation of step functions
SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
p.setParallelism(containingIterationNode.getParallelism());
n = p;
} else {
throw new IllegalArgumentException("Unknown operator type: " + c);
}
this.con2node.put(c, n);
// key-less reducer (all-reduce)
if (n.getParallelism() < 1) {
// set the parallelism
int par = c.getParallelism();
if (n instanceof BinaryUnionNode) {
// Keep parallelism of union undefined for now.
// It will be determined based on the parallelism of its successor.
par = -1;
} else if (par > 0) {
if (this.forceParallelism && par != this.defaultParallelism) {
par = this.defaultParallelism;
Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
}
} else {
par = this.defaultParallelism;
}
n.setParallelism(par);
}
return true;
}
use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.
the class PlanJSONDumpGenerator method visit.
private boolean visit(DumpableNode<?> node, PrintWriter writer, boolean first) {
// check for duplicate traversal
if (this.nodeIds.containsKey(node)) {
return false;
}
// assign an id first
this.nodeIds.put(node, this.nodeCnt++);
// then recurse
for (DumpableNode<?> child : node.getPredecessors()) {
// to set first to false!
if (visit(child, writer, first)) {
first = false;
}
}
// check if this node should be skipped from the dump
final OptimizerNode n = node.getOptimizerNode();
// start a new node and output node id
if (!first) {
writer.print(",\n");
}
// open the node
writer.print("\t{\n");
// recurse, it is is an iteration node
if (node instanceof BulkIterationNode || node instanceof BulkIterationPlanNode) {
DumpableNode<?> innerChild = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getNextPartialSolution() : ((BulkIterationPlanNode) node).getRootOfStepFunction();
DumpableNode<?> begin = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getPartialSolution() : ((BulkIterationPlanNode) node).getPartialSolutionPlanNode();
writer.print("\t\t\"step_function\": [\n");
visit(innerChild, writer, true);
writer.print("\n\t\t],\n");
writer.print("\t\t\"partial_solution\": " + this.nodeIds.get(begin) + ",\n");
writer.print("\t\t\"next_partial_solution\": " + this.nodeIds.get(innerChild) + ",\n");
} else if (node instanceof WorksetIterationNode || node instanceof WorksetIterationPlanNode) {
DumpableNode<?> worksetRoot = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getNextWorkset() : ((WorksetIterationPlanNode) node).getNextWorkSetPlanNode();
DumpableNode<?> solutionDelta = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetDelta() : ((WorksetIterationPlanNode) node).getSolutionSetDeltaPlanNode();
DumpableNode<?> workset = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getWorksetNode() : ((WorksetIterationPlanNode) node).getWorksetPlanNode();
DumpableNode<?> solutionSet = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetNode() : ((WorksetIterationPlanNode) node).getSolutionSetPlanNode();
writer.print("\t\t\"step_function\": [\n");
visit(worksetRoot, writer, true);
visit(solutionDelta, writer, false);
writer.print("\n\t\t],\n");
writer.print("\t\t\"workset\": " + this.nodeIds.get(workset) + ",\n");
writer.print("\t\t\"solution_set\": " + this.nodeIds.get(solutionSet) + ",\n");
writer.print("\t\t\"next_workset\": " + this.nodeIds.get(worksetRoot) + ",\n");
writer.print("\t\t\"solution_delta\": " + this.nodeIds.get(solutionDelta) + ",\n");
}
// print the id
writer.print("\t\t\"id\": " + this.nodeIds.get(node));
final String type;
String contents;
if (n instanceof DataSinkNode) {
type = "sink";
contents = n.getOperator().toString();
} else if (n instanceof DataSourceNode) {
type = "source";
contents = n.getOperator().toString();
} else if (n instanceof BulkIterationNode) {
type = "bulk_iteration";
contents = n.getOperator().getName();
} else if (n instanceof WorksetIterationNode) {
type = "workset_iteration";
contents = n.getOperator().getName();
} else if (n instanceof BinaryUnionNode) {
type = "pact";
contents = "";
} else {
type = "pact";
contents = n.getOperator().getName();
}
contents = StringUtils.showControlCharacters(contents);
if (encodeForHTML) {
contents = StringEscapeUtils.escapeHtml4(contents);
contents = contents.replace("\\", "\");
}
String name = n.getOperatorName();
if (name.equals("Reduce") && (node instanceof SingleInputPlanNode) && ((SingleInputPlanNode) node).getDriverStrategy() == DriverStrategy.SORTED_GROUP_COMBINE) {
name = "Combine";
}
// output the type identifier
writer.print(",\n\t\t\"type\": \"" + type + "\"");
// output node name
writer.print(",\n\t\t\"pact\": \"" + name + "\"");
// output node contents
writer.print(",\n\t\t\"contents\": \"" + contents + "\"");
// parallelism
writer.print(",\n\t\t\"parallelism\": \"" + (n.getParallelism() >= 1 ? n.getParallelism() : "default") + "\"");
// output node predecessors
Iterator<? extends DumpableConnection<?>> inConns = node.getDumpableInputs().iterator();
String child1name = "", child2name = "";
if (inConns != null && inConns.hasNext()) {
// start predecessor list
writer.print(",\n\t\t\"predecessors\": [");
int inputNum = 0;
while (inConns.hasNext()) {
final DumpableConnection<?> inConn = inConns.next();
final DumpableNode<?> source = inConn.getSource();
writer.print(inputNum == 0 ? "\n" : ",\n");
if (inputNum == 0) {
child1name += child1name.length() > 0 ? ", " : "";
child1name += source.getOptimizerNode().getOperator().getName() + " (id: " + this.nodeIds.get(source) + ")";
} else if (inputNum == 1) {
child2name += child2name.length() > 0 ? ", " : "";
child2name += source.getOptimizerNode().getOperator().getName() + " (id: " + this.nodeIds.get(source) + ")";
}
// output predecessor id
writer.print("\t\t\t{\"id\": " + this.nodeIds.get(source));
// output connection side
if (inConns.hasNext() || inputNum > 0) {
writer.print(", \"side\": \"" + (inputNum == 0 ? "first" : "second") + "\"");
}
// output shipping strategy and channel type
final Channel channel = (inConn instanceof Channel) ? (Channel) inConn : null;
final ShipStrategyType shipType = channel != null ? channel.getShipStrategy() : inConn.getShipStrategy();
String shipStrategy = null;
if (shipType != null) {
switch(shipType) {
case NONE:
// nothing
break;
case FORWARD:
shipStrategy = "Forward";
break;
case BROADCAST:
shipStrategy = "Broadcast";
break;
case PARTITION_HASH:
shipStrategy = "Hash Partition";
break;
case PARTITION_RANGE:
shipStrategy = "Range Partition";
break;
case PARTITION_RANDOM:
shipStrategy = "Redistribute";
break;
case PARTITION_FORCED_REBALANCE:
shipStrategy = "Rebalance";
break;
case PARTITION_CUSTOM:
shipStrategy = "Custom Partition";
break;
default:
throw new CompilerException("Unknown ship strategy '" + inConn.getShipStrategy().name() + "' in JSON generator.");
}
}
if (channel != null && channel.getShipStrategyKeys() != null && channel.getShipStrategyKeys().size() > 0) {
shipStrategy += " on " + (channel.getShipStrategySortOrder() == null ? channel.getShipStrategyKeys().toString() : Utils.createOrdering(channel.getShipStrategyKeys(), channel.getShipStrategySortOrder()).toString());
}
if (shipStrategy != null) {
writer.print(", \"ship_strategy\": \"" + shipStrategy + "\"");
}
if (channel != null) {
String localStrategy = null;
switch(channel.getLocalStrategy()) {
case NONE:
break;
case SORT:
localStrategy = "Sort";
break;
case COMBININGSORT:
localStrategy = "Sort (combining)";
break;
default:
throw new CompilerException("Unknown local strategy " + channel.getLocalStrategy().name());
}
if (channel != null && channel.getLocalStrategyKeys() != null && channel.getLocalStrategyKeys().size() > 0) {
localStrategy += " on " + (channel.getLocalStrategySortOrder() == null ? channel.getLocalStrategyKeys().toString() : Utils.createOrdering(channel.getLocalStrategyKeys(), channel.getLocalStrategySortOrder()).toString());
}
if (localStrategy != null) {
writer.print(", \"local_strategy\": \"" + localStrategy + "\"");
}
if (channel != null && channel.getTempMode() != TempMode.NONE) {
String tempMode = channel.getTempMode().toString();
writer.print(", \"temp_mode\": \"" + tempMode + "\"");
}
if (channel != null) {
String exchangeMode = channel.getDataExchangeMode().toString();
writer.print(", \"exchange_mode\": \"" + exchangeMode + "\"");
}
}
writer.print('}');
inputNum++;
}
// finish predecessors
writer.print("\n\t\t]");
}
// ---------------------------------------------------------------------------------------
// the part below here is relevant only to plan nodes with concrete strategies, etc
// ---------------------------------------------------------------------------------------
final PlanNode p = node.getPlanNode();
if (p == null) {
// finish node
writer.print("\n\t}");
return true;
}
// local strategy
String locString = null;
if (p.getDriverStrategy() != null) {
switch(p.getDriverStrategy()) {
case NONE:
case BINARY_NO_OP:
break;
case UNARY_NO_OP:
locString = "No-Op";
break;
case MAP:
locString = "Map";
break;
case FLAT_MAP:
locString = "FlatMap";
break;
case MAP_PARTITION:
locString = "Map Partition";
break;
case ALL_REDUCE:
locString = "Reduce All";
break;
case ALL_GROUP_REDUCE:
case ALL_GROUP_REDUCE_COMBINE:
locString = "Group Reduce All";
break;
case SORTED_REDUCE:
locString = "Sorted Reduce";
break;
case SORTED_PARTIAL_REDUCE:
locString = "Sorted Combine/Reduce";
break;
case SORTED_GROUP_REDUCE:
locString = "Sorted Group Reduce";
break;
case SORTED_GROUP_COMBINE:
locString = "Sorted Combine";
break;
case HYBRIDHASH_BUILD_FIRST:
locString = "Hybrid Hash (build: " + child1name + ")";
break;
case HYBRIDHASH_BUILD_SECOND:
locString = "Hybrid Hash (build: " + child2name + ")";
break;
case HYBRIDHASH_BUILD_FIRST_CACHED:
locString = "Hybrid Hash (CACHED) (build: " + child1name + ")";
break;
case HYBRIDHASH_BUILD_SECOND_CACHED:
locString = "Hybrid Hash (CACHED) (build: " + child2name + ")";
break;
case NESTEDLOOP_BLOCKED_OUTER_FIRST:
locString = "Nested Loops (Blocked Outer: " + child1name + ")";
break;
case NESTEDLOOP_BLOCKED_OUTER_SECOND:
locString = "Nested Loops (Blocked Outer: " + child2name + ")";
break;
case NESTEDLOOP_STREAMED_OUTER_FIRST:
locString = "Nested Loops (Streamed Outer: " + child1name + ")";
break;
case NESTEDLOOP_STREAMED_OUTER_SECOND:
locString = "Nested Loops (Streamed Outer: " + child2name + ")";
break;
case INNER_MERGE:
locString = "Merge";
break;
case CO_GROUP:
locString = "Co-Group";
break;
default:
locString = p.getDriverStrategy().name();
break;
}
if (locString != null) {
writer.print(",\n\t\t\"driver_strategy\": \"");
writer.print(locString);
writer.print("\"");
}
}
{
// output node global properties
final GlobalProperties gp = p.getGlobalProperties();
writer.print(",\n\t\t\"global_properties\": [\n");
addProperty(writer, "Partitioning", gp.getPartitioning().name(), true);
if (gp.getPartitioningFields() != null) {
addProperty(writer, "Partitioned on", gp.getPartitioningFields().toString(), false);
}
if (gp.getPartitioningOrdering() != null) {
addProperty(writer, "Partitioning Order", gp.getPartitioningOrdering().toString(), false);
} else {
addProperty(writer, "Partitioning Order", "(none)", false);
}
if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
addProperty(writer, "Uniqueness", "not unique", false);
} else {
addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
}
writer.print("\n\t\t]");
}
{
// output node local properties
LocalProperties lp = p.getLocalProperties();
writer.print(",\n\t\t\"local_properties\": [\n");
if (lp.getOrdering() != null) {
addProperty(writer, "Order", lp.getOrdering().toString(), true);
} else {
addProperty(writer, "Order", "(none)", true);
}
if (lp.getGroupedFields() != null && lp.getGroupedFields().size() > 0) {
addProperty(writer, "Grouped on", lp.getGroupedFields().toString(), false);
} else {
addProperty(writer, "Grouping", "not grouped", false);
}
if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
addProperty(writer, "Uniqueness", "not unique", false);
} else {
addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
}
writer.print("\n\t\t]");
}
// output node size estimates
writer.print(",\n\t\t\"estimates\": [\n");
addProperty(writer, "Est. Output Size", n.getEstimatedOutputSize() == -1 ? "(unknown)" : formatNumber(n.getEstimatedOutputSize(), "B"), true);
addProperty(writer, "Est. Cardinality", n.getEstimatedNumRecords() == -1 ? "(unknown)" : formatNumber(n.getEstimatedNumRecords()), false);
writer.print("\t\t]");
// output node cost
if (p.getNodeCosts() != null) {
writer.print(",\n\t\t\"costs\": [\n");
addProperty(writer, "Network", p.getNodeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getNetworkCost(), "B"), true);
addProperty(writer, "Disk I/O", p.getNodeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getDiskCost(), "B"), false);
addProperty(writer, "CPU", p.getNodeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getCpuCost(), ""), false);
addProperty(writer, "Cumulative Network", p.getCumulativeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getNetworkCost(), "B"), false);
addProperty(writer, "Cumulative Disk I/O", p.getCumulativeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getDiskCost(), "B"), false);
addProperty(writer, "Cumulative CPU", p.getCumulativeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getCpuCost(), ""), false);
writer.print("\n\t\t]");
}
// output the node compiler hints
if (n.getOperator().getCompilerHints() != null) {
CompilerHints hints = n.getOperator().getCompilerHints();
CompilerHints defaults = new CompilerHints();
String size = hints.getOutputSize() == defaults.getOutputSize() ? "(none)" : String.valueOf(hints.getOutputSize());
String card = hints.getOutputCardinality() == defaults.getOutputCardinality() ? "(none)" : String.valueOf(hints.getOutputCardinality());
String width = hints.getAvgOutputRecordSize() == defaults.getAvgOutputRecordSize() ? "(none)" : String.valueOf(hints.getAvgOutputRecordSize());
String filter = hints.getFilterFactor() == defaults.getFilterFactor() ? "(none)" : String.valueOf(hints.getFilterFactor());
writer.print(",\n\t\t\"compiler_hints\": [\n");
addProperty(writer, "Output Size (bytes)", size, true);
addProperty(writer, "Output Cardinality", card, false);
addProperty(writer, "Avg. Output Record Size (bytes)", width, false);
addProperty(writer, "Filter Factor", filter, false);
writer.print("\t\t]");
}
// finish node
writer.print("\n\t}");
return true;
}
use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.
the class BulkIterationPlanNode method mergeBranchPlanMaps.
private void mergeBranchPlanMaps() {
for (OptimizerNode.UnclosedBranchDescriptor desc : template.getOpenBranches()) {
OptimizerNode brancher = desc.getBranchingNode();
if (branchPlan == null) {
branchPlan = new HashMap<OptimizerNode, PlanNode>(6);
}
if (!branchPlan.containsKey(brancher)) {
PlanNode selectedCandidate = null;
if (rootOfStepFunction.branchPlan != null) {
selectedCandidate = rootOfStepFunction.branchPlan.get(brancher);
}
if (selectedCandidate == null) {
throw new CompilerException("Candidates for a node with open branches are missing information about the selected candidate ");
}
this.branchPlan.put(brancher, selectedCandidate);
}
}
}
use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.
the class PlanNode method mergeBranchPlanMaps.
protected void mergeBranchPlanMaps(Map<OptimizerNode, PlanNode> branchPlan1, Map<OptimizerNode, PlanNode> branchPlan2) {
// merge the branchPlan maps according the template's uncloseBranchesStack
if (this.template.hasUnclosedBranches()) {
if (this.branchPlan == null) {
this.branchPlan = new HashMap<OptimizerNode, PlanNode>(8);
}
for (UnclosedBranchDescriptor uc : this.template.getOpenBranches()) {
OptimizerNode brancher = uc.getBranchingNode();
PlanNode selectedCandidate = null;
if (branchPlan1 != null) {
// predecessor 1 has branching children, see if it got the branch we are looking
// for
selectedCandidate = branchPlan1.get(brancher);
}
if (selectedCandidate == null && branchPlan2 != null) {
// predecessor 2 has branching children, see if it got the branch we are looking
// for
selectedCandidate = branchPlan2.get(brancher);
}
// are set
if (selectedCandidate != null) {
this.branchPlan.put(brancher, selectedCandidate);
}
}
}
}
use of org.apache.flink.optimizer.dag.OptimizerNode in project flink by apache.
the class Optimizer method compile.
/**
* Translates the given program to an OptimizedPlan. The optimized plan describes for each
* operator which strategy to use (such as hash join versus sort-merge join), what data exchange
* method to use (local pipe forward, shuffle, broadcast), what exchange mode to use (pipelined,
* batch), where to cache intermediate results, etc,
*
* <p>The optimization happens in multiple phases:
*
* <ol>
* <li>Create optimizer dag implementation of the program.
* <p><tt>OptimizerNode</tt> representations of the PACTs, assign parallelism and compute
* size estimates.
* <li>Compute interesting properties and auxiliary structures.
* <li>Enumerate plan alternatives. This cannot be done in the same step as the interesting
* property computation (as opposed to the Database approaches), because we support plans
* that are not trees.
* </ol>
*
* @param program The program to be translated.
* @param postPasser The function to be used for post passing the optimizer's plan and setting
* the data type specific serialization routines.
* @return The optimized plan.
* @throws CompilerException Thrown, if the plan is invalid or the optimizer encountered an
* inconsistent situation during the compilation process.
*/
private OptimizedPlan compile(Plan program, OptimizerPostPass postPasser) throws CompilerException {
if (program == null || postPasser == null) {
throw new NullPointerException();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Beginning compilation of program '" + program.getJobName() + '\'');
}
final ExecutionMode defaultDataExchangeMode = program.getExecutionConfig().getExecutionMode();
final int defaultParallelism = program.getDefaultParallelism() > 0 ? program.getDefaultParallelism() : this.defaultParallelism;
// log the default settings
LOG.debug("Using a default parallelism of {}", defaultParallelism);
LOG.debug("Using default data exchange mode {}", defaultDataExchangeMode);
// the first step in the compilation is to create the optimizer plan representation
// this step does the following:
// 1) It creates an optimizer plan node for each operator
// 2) It connects them via channels
// 3) It looks for hints about local strategies and channel types and
// sets the types and strategies accordingly
// 4) It makes estimates about the data volume of the data sources and
// propagates those estimates through the plan
GraphCreatingVisitor graphCreator = new GraphCreatingVisitor(defaultParallelism, defaultDataExchangeMode);
program.accept(graphCreator);
// if we have a plan with multiple data sinks, add logical optimizer nodes that have two
// data-sinks as children
// each until we have only a single root node. This allows to transparently deal with the
// nodes with
// multiple outputs
OptimizerNode rootNode;
if (graphCreator.getSinks().size() == 1) {
rootNode = graphCreator.getSinks().get(0);
} else if (graphCreator.getSinks().size() > 1) {
Iterator<DataSinkNode> iter = graphCreator.getSinks().iterator();
rootNode = iter.next();
while (iter.hasNext()) {
rootNode = new SinkJoiner(rootNode, iter.next());
}
} else {
throw new CompilerException("Bug: The optimizer plan representation has no sinks.");
}
// now that we have all nodes created and recorded which ones consume memory, tell the nodes
// their minimal
// guaranteed memory, for further cost estimations. We assume an equal distribution of
// memory among consumer tasks
rootNode.accept(new IdAndEstimatesVisitor(this.statistics));
// We need to enforce that union nodes always forward their output to their successor.
// Any partitioning must be either pushed before or done after the union, but not on the
// union's output.
UnionParallelismAndForwardEnforcer unionEnforcer = new UnionParallelismAndForwardEnforcer();
rootNode.accept(unionEnforcer);
// We are dealing with operator DAGs, rather than operator trees.
// That requires us to deviate at some points from the classical DB optimizer algorithms.
// This step builds auxiliary structures to help track branches and joins in the DAG
BranchesVisitor branchingVisitor = new BranchesVisitor();
rootNode.accept(branchingVisitor);
// Propagate the interesting properties top-down through the graph
InterestingPropertyVisitor propsVisitor = new InterestingPropertyVisitor(this.costEstimator);
rootNode.accept(propsVisitor);
// perform a sanity check: the root may not have any unclosed branches
if (rootNode.getOpenBranches() != null && rootNode.getOpenBranches().size() > 0) {
throw new CompilerException("Bug: Logic for branching plans (non-tree plans) has an error, and does not " + "track the re-joining of branches correctly.");
}
// the final step is now to generate the actual plan alternatives
List<PlanNode> bestPlan = rootNode.getAlternativePlans(this.costEstimator);
if (bestPlan.size() != 1) {
throw new CompilerException("Error in compiler: more than one best plan was created!");
}
// check if the best plan's root is a data sink (single sink plan)
// if so, directly take it. if it is a sink joiner node, get its contained sinks
PlanNode bestPlanRoot = bestPlan.get(0);
List<SinkPlanNode> bestPlanSinks = new ArrayList<SinkPlanNode>(4);
if (bestPlanRoot instanceof SinkPlanNode) {
bestPlanSinks.add((SinkPlanNode) bestPlanRoot);
} else if (bestPlanRoot instanceof SinkJoinerPlanNode) {
((SinkJoinerPlanNode) bestPlanRoot).getDataSinks(bestPlanSinks);
}
// finalize the plan
OptimizedPlan plan = new PlanFinalizer().createFinalPlan(bestPlanSinks, program.getJobName(), program);
plan.accept(new BinaryUnionReplacer());
plan.accept(new RangePartitionRewriter(plan));
// post pass the plan. this is the phase where the serialization and comparator code is set
postPasser.postPass(plan);
return plan;
}
Aggregations