use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class PregelCompilerTest method testPregelWithCombiner.
@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), new CCCombiner(), 100).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the combiner
SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class RangePartitionRewriter method rewriteRangePartitionChannel.
private List<Channel> rewriteRangePartitionChannel(Channel channel) {
final List<Channel> sourceNewOutputChannels = new ArrayList<>();
final PlanNode sourceNode = channel.getSource();
final PlanNode targetNode = channel.getTarget();
final int sourceParallelism = sourceNode.getParallelism();
final int targetParallelism = targetNode.getParallelism();
final Costs defaultZeroCosts = new Costs(0, 0, 0);
final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
// 1. Fixed size sample in each partitions.
final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
sipNode.setParallelism(sourceParallelism);
sipPlanNode.setParallelism(sourceParallelism);
sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sipPlanNode.setCosts(defaultZeroCosts);
sipChannel.setTarget(sipPlanNode);
this.plan.getAllNodes().add(sipPlanNode);
sourceNewOutputChannels.add(sipChannel);
// 2. Fixed size sample in a single coordinator.
final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
sicNode.setParallelism(1);
sicPlanNode.setParallelism(1);
sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sicPlanNode.setCosts(defaultZeroCosts);
sicChannel.setTarget(sicPlanNode);
sipPlanNode.addOutgoingChannel(sicChannel);
this.plan.getAllNodes().add(sicPlanNode);
// 3. Use sampled data to build range boundaries.
final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
rbNode.setParallelism(1);
rbPlanNode.setParallelism(1);
rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
rbPlanNode.setCosts(defaultZeroCosts);
rbChannel.setTarget(rbPlanNode);
sicPlanNode.addOutgoingChannel(rbChannel);
this.plan.getAllNodes().add(rbPlanNode);
// 4. Take range boundaries as broadcast input and take the tuple of partition id and record as output.
final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
// To avoid deadlock, set the DataExchangeMode of channel between source node and this to Batch.
ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
ariNode.setParallelism(sourceParallelism);
ariPlanNode.setParallelism(sourceParallelism);
ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
ariPlanNode.setCosts(defaultZeroCosts);
ariChannel.setTarget(ariPlanNode);
this.plan.getAllNodes().add(ariPlanNode);
sourceNewOutputChannels.add(ariChannel);
final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
broadcastChannel.setTarget(ariPlanNode);
List<NamedChannel> broadcastChannels = new ArrayList<>(1);
broadcastChannels.add(broadcastChannel);
ariPlanNode.setBroadcastInputs(broadcastChannels);
// 5. Remove the partition id.
final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
final FieldList keys = new FieldList(0);
partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
ariPlanNode.addOutgoingChannel(partChannel);
final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
final MapNode prRemoverNode = new MapNode(prOperatorBase);
final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
partChannel.setTarget(prPlanNode);
prRemoverNode.setParallelism(targetParallelism);
prPlanNode.setParallelism(targetParallelism);
GlobalProperties globalProperties = new GlobalProperties();
globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
prPlanNode.initProperties(globalProperties, new LocalProperties());
prPlanNode.setCosts(defaultZeroCosts);
this.plan.getAllNodes().add(prPlanNode);
// 6. Connect to target node.
channel.setSource(prPlanNode);
channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
prPlanNode.addOutgoingChannel(channel);
return sourceNewOutputChannels;
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class PlanJSONDumpGenerator method visit.
private boolean visit(DumpableNode<?> node, PrintWriter writer, boolean first) {
// check for duplicate traversal
if (this.nodeIds.containsKey(node)) {
return false;
}
// assign an id first
this.nodeIds.put(node, this.nodeCnt++);
// then recurse
for (DumpableNode<?> child : node.getPredecessors()) {
//to set first to false!
if (visit(child, writer, first)) {
first = false;
}
}
// check if this node should be skipped from the dump
final OptimizerNode n = node.getOptimizerNode();
// start a new node and output node id
if (!first) {
writer.print(",\n");
}
// open the node
writer.print("\t{\n");
// recurse, it is is an iteration node
if (node instanceof BulkIterationNode || node instanceof BulkIterationPlanNode) {
DumpableNode<?> innerChild = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getNextPartialSolution() : ((BulkIterationPlanNode) node).getRootOfStepFunction();
DumpableNode<?> begin = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getPartialSolution() : ((BulkIterationPlanNode) node).getPartialSolutionPlanNode();
writer.print("\t\t\"step_function\": [\n");
visit(innerChild, writer, true);
writer.print("\n\t\t],\n");
writer.print("\t\t\"partial_solution\": " + this.nodeIds.get(begin) + ",\n");
writer.print("\t\t\"next_partial_solution\": " + this.nodeIds.get(innerChild) + ",\n");
} else if (node instanceof WorksetIterationNode || node instanceof WorksetIterationPlanNode) {
DumpableNode<?> worksetRoot = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getNextWorkset() : ((WorksetIterationPlanNode) node).getNextWorkSetPlanNode();
DumpableNode<?> solutionDelta = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetDelta() : ((WorksetIterationPlanNode) node).getSolutionSetDeltaPlanNode();
DumpableNode<?> workset = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getWorksetNode() : ((WorksetIterationPlanNode) node).getWorksetPlanNode();
DumpableNode<?> solutionSet = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetNode() : ((WorksetIterationPlanNode) node).getSolutionSetPlanNode();
writer.print("\t\t\"step_function\": [\n");
visit(worksetRoot, writer, true);
visit(solutionDelta, writer, false);
writer.print("\n\t\t],\n");
writer.print("\t\t\"workset\": " + this.nodeIds.get(workset) + ",\n");
writer.print("\t\t\"solution_set\": " + this.nodeIds.get(solutionSet) + ",\n");
writer.print("\t\t\"next_workset\": " + this.nodeIds.get(worksetRoot) + ",\n");
writer.print("\t\t\"solution_delta\": " + this.nodeIds.get(solutionDelta) + ",\n");
}
// print the id
writer.print("\t\t\"id\": " + this.nodeIds.get(node));
final String type;
String contents;
if (n instanceof DataSinkNode) {
type = "sink";
contents = n.getOperator().toString();
} else if (n instanceof DataSourceNode) {
type = "source";
contents = n.getOperator().toString();
} else if (n instanceof BulkIterationNode) {
type = "bulk_iteration";
contents = n.getOperator().getName();
} else if (n instanceof WorksetIterationNode) {
type = "workset_iteration";
contents = n.getOperator().getName();
} else if (n instanceof BinaryUnionNode) {
type = "pact";
contents = "";
} else {
type = "pact";
contents = n.getOperator().getName();
}
contents = StringUtils.showControlCharacters(contents);
if (encodeForHTML) {
contents = StringEscapeUtils.escapeHtml4(contents);
contents = contents.replace("\\", "\");
}
String name = n.getOperatorName();
if (name.equals("Reduce") && (node instanceof SingleInputPlanNode) && ((SingleInputPlanNode) node).getDriverStrategy() == DriverStrategy.SORTED_GROUP_COMBINE) {
name = "Combine";
}
// output the type identifier
writer.print(",\n\t\t\"type\": \"" + type + "\"");
// output node name
writer.print(",\n\t\t\"pact\": \"" + name + "\"");
// output node contents
writer.print(",\n\t\t\"contents\": \"" + contents + "\"");
// parallelism
writer.print(",\n\t\t\"parallelism\": \"" + (n.getParallelism() >= 1 ? n.getParallelism() : "default") + "\"");
// output node predecessors
Iterator<? extends DumpableConnection<?>> inConns = node.getDumpableInputs().iterator();
String child1name = "", child2name = "";
if (inConns != null && inConns.hasNext()) {
// start predecessor list
writer.print(",\n\t\t\"predecessors\": [");
int inputNum = 0;
while (inConns.hasNext()) {
final DumpableConnection<?> inConn = inConns.next();
final DumpableNode<?> source = inConn.getSource();
writer.print(inputNum == 0 ? "\n" : ",\n");
if (inputNum == 0) {
child1name += child1name.length() > 0 ? ", " : "";
child1name += source.getOptimizerNode().getOperator().getName() + " (id: " + this.nodeIds.get(source) + ")";
} else if (inputNum == 1) {
child2name += child2name.length() > 0 ? ", " : "";
child2name += source.getOptimizerNode().getOperator().getName() + " (id: " + this.nodeIds.get(source) + ")";
}
// output predecessor id
writer.print("\t\t\t{\"id\": " + this.nodeIds.get(source));
// output connection side
if (inConns.hasNext() || inputNum > 0) {
writer.print(", \"side\": \"" + (inputNum == 0 ? "first" : "second") + "\"");
}
// output shipping strategy and channel type
final Channel channel = (inConn instanceof Channel) ? (Channel) inConn : null;
final ShipStrategyType shipType = channel != null ? channel.getShipStrategy() : inConn.getShipStrategy();
String shipStrategy = null;
if (shipType != null) {
switch(shipType) {
case NONE:
// nothing
break;
case FORWARD:
shipStrategy = "Forward";
break;
case BROADCAST:
shipStrategy = "Broadcast";
break;
case PARTITION_HASH:
shipStrategy = "Hash Partition";
break;
case PARTITION_RANGE:
shipStrategy = "Range Partition";
break;
case PARTITION_RANDOM:
shipStrategy = "Redistribute";
break;
case PARTITION_FORCED_REBALANCE:
shipStrategy = "Rebalance";
break;
case PARTITION_CUSTOM:
shipStrategy = "Custom Partition";
break;
default:
throw new CompilerException("Unknown ship strategy '" + inConn.getShipStrategy().name() + "' in JSON generator.");
}
}
if (channel != null && channel.getShipStrategyKeys() != null && channel.getShipStrategyKeys().size() > 0) {
shipStrategy += " on " + (channel.getShipStrategySortOrder() == null ? channel.getShipStrategyKeys().toString() : Utils.createOrdering(channel.getShipStrategyKeys(), channel.getShipStrategySortOrder()).toString());
}
if (shipStrategy != null) {
writer.print(", \"ship_strategy\": \"" + shipStrategy + "\"");
}
if (channel != null) {
String localStrategy = null;
switch(channel.getLocalStrategy()) {
case NONE:
break;
case SORT:
localStrategy = "Sort";
break;
case COMBININGSORT:
localStrategy = "Sort (combining)";
break;
default:
throw new CompilerException("Unknown local strategy " + channel.getLocalStrategy().name());
}
if (channel != null && channel.getLocalStrategyKeys() != null && channel.getLocalStrategyKeys().size() > 0) {
localStrategy += " on " + (channel.getLocalStrategySortOrder() == null ? channel.getLocalStrategyKeys().toString() : Utils.createOrdering(channel.getLocalStrategyKeys(), channel.getLocalStrategySortOrder()).toString());
}
if (localStrategy != null) {
writer.print(", \"local_strategy\": \"" + localStrategy + "\"");
}
if (channel != null && channel.getTempMode() != TempMode.NONE) {
String tempMode = channel.getTempMode().toString();
writer.print(", \"temp_mode\": \"" + tempMode + "\"");
}
if (channel != null) {
String exchangeMode = channel.getDataExchangeMode().toString();
writer.print(", \"exchange_mode\": \"" + exchangeMode + "\"");
}
}
writer.print('}');
inputNum++;
}
// finish predecessors
writer.print("\n\t\t]");
}
//---------------------------------------------------------------------------------------
// the part below here is relevant only to plan nodes with concrete strategies, etc
//---------------------------------------------------------------------------------------
final PlanNode p = node.getPlanNode();
if (p == null) {
// finish node
writer.print("\n\t}");
return true;
}
// local strategy
String locString = null;
if (p.getDriverStrategy() != null) {
switch(p.getDriverStrategy()) {
case NONE:
case BINARY_NO_OP:
break;
case UNARY_NO_OP:
locString = "No-Op";
break;
case MAP:
locString = "Map";
break;
case FLAT_MAP:
locString = "FlatMap";
break;
case MAP_PARTITION:
locString = "Map Partition";
break;
case ALL_REDUCE:
locString = "Reduce All";
break;
case ALL_GROUP_REDUCE:
case ALL_GROUP_REDUCE_COMBINE:
locString = "Group Reduce All";
break;
case SORTED_REDUCE:
locString = "Sorted Reduce";
break;
case SORTED_PARTIAL_REDUCE:
locString = "Sorted Combine/Reduce";
break;
case SORTED_GROUP_REDUCE:
locString = "Sorted Group Reduce";
break;
case SORTED_GROUP_COMBINE:
locString = "Sorted Combine";
break;
case HYBRIDHASH_BUILD_FIRST:
locString = "Hybrid Hash (build: " + child1name + ")";
break;
case HYBRIDHASH_BUILD_SECOND:
locString = "Hybrid Hash (build: " + child2name + ")";
break;
case HYBRIDHASH_BUILD_FIRST_CACHED:
locString = "Hybrid Hash (CACHED) (build: " + child1name + ")";
break;
case HYBRIDHASH_BUILD_SECOND_CACHED:
locString = "Hybrid Hash (CACHED) (build: " + child2name + ")";
break;
case NESTEDLOOP_BLOCKED_OUTER_FIRST:
locString = "Nested Loops (Blocked Outer: " + child1name + ")";
break;
case NESTEDLOOP_BLOCKED_OUTER_SECOND:
locString = "Nested Loops (Blocked Outer: " + child2name + ")";
break;
case NESTEDLOOP_STREAMED_OUTER_FIRST:
locString = "Nested Loops (Streamed Outer: " + child1name + ")";
break;
case NESTEDLOOP_STREAMED_OUTER_SECOND:
locString = "Nested Loops (Streamed Outer: " + child2name + ")";
break;
case INNER_MERGE:
locString = "Merge";
break;
case CO_GROUP:
locString = "Co-Group";
break;
default:
locString = p.getDriverStrategy().name();
break;
}
if (locString != null) {
writer.print(",\n\t\t\"driver_strategy\": \"");
writer.print(locString);
writer.print("\"");
}
}
{
// output node global properties
final GlobalProperties gp = p.getGlobalProperties();
writer.print(",\n\t\t\"global_properties\": [\n");
addProperty(writer, "Partitioning", gp.getPartitioning().name(), true);
if (gp.getPartitioningFields() != null) {
addProperty(writer, "Partitioned on", gp.getPartitioningFields().toString(), false);
}
if (gp.getPartitioningOrdering() != null) {
addProperty(writer, "Partitioning Order", gp.getPartitioningOrdering().toString(), false);
} else {
addProperty(writer, "Partitioning Order", "(none)", false);
}
if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
addProperty(writer, "Uniqueness", "not unique", false);
} else {
addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
}
writer.print("\n\t\t]");
}
{
// output node local properties
LocalProperties lp = p.getLocalProperties();
writer.print(",\n\t\t\"local_properties\": [\n");
if (lp.getOrdering() != null) {
addProperty(writer, "Order", lp.getOrdering().toString(), true);
} else {
addProperty(writer, "Order", "(none)", true);
}
if (lp.getGroupedFields() != null && lp.getGroupedFields().size() > 0) {
addProperty(writer, "Grouped on", lp.getGroupedFields().toString(), false);
} else {
addProperty(writer, "Grouping", "not grouped", false);
}
if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
addProperty(writer, "Uniqueness", "not unique", false);
} else {
addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
}
writer.print("\n\t\t]");
}
// output node size estimates
writer.print(",\n\t\t\"estimates\": [\n");
addProperty(writer, "Est. Output Size", n.getEstimatedOutputSize() == -1 ? "(unknown)" : formatNumber(n.getEstimatedOutputSize(), "B"), true);
addProperty(writer, "Est. Cardinality", n.getEstimatedNumRecords() == -1 ? "(unknown)" : formatNumber(n.getEstimatedNumRecords()), false);
writer.print("\t\t]");
// output node cost
if (p.getNodeCosts() != null) {
writer.print(",\n\t\t\"costs\": [\n");
addProperty(writer, "Network", p.getNodeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getNetworkCost(), "B"), true);
addProperty(writer, "Disk I/O", p.getNodeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getDiskCost(), "B"), false);
addProperty(writer, "CPU", p.getNodeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getCpuCost(), ""), false);
addProperty(writer, "Cumulative Network", p.getCumulativeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getNetworkCost(), "B"), false);
addProperty(writer, "Cumulative Disk I/O", p.getCumulativeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getDiskCost(), "B"), false);
addProperty(writer, "Cumulative CPU", p.getCumulativeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getCpuCost(), ""), false);
writer.print("\n\t\t]");
}
// output the node compiler hints
if (n.getOperator().getCompilerHints() != null) {
CompilerHints hints = n.getOperator().getCompilerHints();
CompilerHints defaults = new CompilerHints();
String size = hints.getOutputSize() == defaults.getOutputSize() ? "(none)" : String.valueOf(hints.getOutputSize());
String card = hints.getOutputCardinality() == defaults.getOutputCardinality() ? "(none)" : String.valueOf(hints.getOutputCardinality());
String width = hints.getAvgOutputRecordSize() == defaults.getAvgOutputRecordSize() ? "(none)" : String.valueOf(hints.getAvgOutputRecordSize());
String filter = hints.getFilterFactor() == defaults.getFilterFactor() ? "(none)" : String.valueOf(hints.getFilterFactor());
writer.print(",\n\t\t\"compiler_hints\": [\n");
addProperty(writer, "Output Size (bytes)", size, true);
addProperty(writer, "Output Cardinality", card, false);
addProperty(writer, "Avg. Output Record Size (bytes)", width, false);
addProperty(writer, "Filter Factor", filter, false);
writer.print("\t\t]");
}
// finish node
writer.print("\n\t}");
return true;
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class JobGraphGenerator method createSingleInputVertex.
// ------------------------------------------------------------------------
// Methods for creating individual vertices
// ------------------------------------------------------------------------
private JobVertex createSingleInputVertex(SingleInputPlanNode node) throws CompilerException {
final String taskName = node.getNodeName();
final DriverStrategy ds = node.getDriverStrategy();
// check, whether chaining is possible
boolean chaining;
{
Channel inConn = node.getInput();
PlanNode pred = inConn.getSource();
chaining = ds.getPushChainDriverClass() != null && // first op after union is stand-alone, because union is merged
!(pred instanceof NAryUnionPlanNode) && // partial solution merges anyways
!(pred instanceof BulkPartialSolutionPlanNode) && // workset merges anyways
!(pred instanceof WorksetPlanNode) && // cannot chain with iteration heads currently
!(pred instanceof IterationPlanNode) && inConn.getShipStrategy() == ShipStrategyType.FORWARD && inConn.getLocalStrategy() == LocalStrategy.NONE && pred.getOutgoingChannels().size() == 1 && node.getParallelism() == pred.getParallelism() && node.getBroadcastInputs().isEmpty();
// in a tail
if (this.currentIteration != null && this.currentIteration instanceof WorksetIterationPlanNode && node.getOutgoingChannels().size() > 0) {
WorksetIterationPlanNode wspn = (WorksetIterationPlanNode) this.currentIteration;
if (wspn.getSolutionSetDeltaPlanNode() == pred || wspn.getNextWorkSetPlanNode() == pred) {
chaining = false;
}
}
// cannot chain the nodes that produce the next workset in a bulk iteration if a termination criterion follows
if (this.currentIteration != null && this.currentIteration instanceof BulkIterationPlanNode) {
BulkIterationPlanNode wspn = (BulkIterationPlanNode) this.currentIteration;
if (node == wspn.getRootOfTerminationCriterion() && wspn.getRootOfStepFunction() == pred) {
chaining = false;
} else if (node.getOutgoingChannels().size() > 0 && (wspn.getRootOfStepFunction() == pred || wspn.getRootOfTerminationCriterion() == pred)) {
chaining = false;
}
}
}
final JobVertex vertex;
final TaskConfig config;
if (chaining) {
vertex = null;
config = new TaskConfig(new Configuration());
this.chainedTasks.put(node, new TaskInChain(node, ds.getPushChainDriverClass(), config, taskName));
} else {
// create task vertex
vertex = new JobVertex(taskName);
vertex.setResources(node.getMinResources(), node.getPreferredResources());
vertex.setInvokableClass((this.currentIteration != null && node.isOnDynamicPath()) ? IterationIntermediateTask.class : BatchTask.class);
config = new TaskConfig(vertex.getConfiguration());
config.setDriver(ds.getDriverClass());
}
// set user code
config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper());
config.setStubParameters(node.getProgramOperator().getParameters());
// set the driver strategy
config.setDriverStrategy(ds);
for (int i = 0; i < ds.getNumRequiredComparators(); i++) {
config.setDriverComparator(node.getComparator(i), i);
}
// assign memory, file-handles, etc.
assignDriverResources(node, config);
return vertex;
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class GenericFlatTypePostPass method traverse.
@SuppressWarnings("unchecked")
protected void traverse(PlanNode node, T parentSchema, boolean createUtilities) {
// distinguish the node types
if (node instanceof SinkPlanNode) {
SinkPlanNode sn = (SinkPlanNode) node;
Channel inchannel = sn.getInput();
T schema = createEmptySchema();
sn.postPassHelper = schema;
// add the sinks information to the schema
try {
getSinkSchema(sn, schema);
} catch (ConflictingFieldTypeInfoException e) {
throw new CompilerPostPassException("Conflicting type infomation for the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
}
// descend to the input channel
try {
propagateToChannel(schema, inchannel, createUtilities);
} catch (MissingFieldTypeInfoException ex) {
throw new CompilerPostPassException("Missing type infomation for the channel that inputs to the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
}
} else if (node instanceof SourcePlanNode) {
if (createUtilities) {
((SourcePlanNode) node).setSerializer(createSerializer(parentSchema, node));
// nothing else to be done here. the source has no input and no strategy itself
}
} else if (node instanceof BulkIterationPlanNode) {
BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
// get the nodes current schema
T schema;
if (iterationNode.postPassHelper == null) {
schema = createEmptySchema();
iterationNode.postPassHelper = schema;
} else {
schema = (T) iterationNode.postPassHelper;
}
schema.increaseNumConnectionsThatContributed();
// add the parent schema to the schema
if (propagateParentSchemaDown) {
addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
}
// check whether all outgoing channels have not yet contributed. come back later if not.
if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
return;
}
if (iterationNode.getRootOfStepFunction() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile an iteration step function where next partial solution is created by a Union node.");
}
// traverse the termination criterion for the first time. create schema only, no utilities. Needed in case of intermediate termination criterion
if (iterationNode.getRootOfTerminationCriterion() != null) {
SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
traverse(addMapper.getInput().getSource(), createEmptySchema(), false);
try {
addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
} catch (MissingFieldTypeInfoException e) {
throw new RuntimeException(e);
}
}
// traverse the step function for the first time. create schema only, no utilities
traverse(iterationNode.getRootOfStepFunction(), schema, false);
T pss = (T) iterationNode.getPartialSolutionPlanNode().postPassHelper;
if (pss == null) {
throw new CompilerException("Error in Optimizer Post Pass: Partial solution schema is null after first traversal of the step function.");
}
// traverse the step function for the second time, taking the schema of the partial solution
traverse(iterationNode.getRootOfStepFunction(), pss, createUtilities);
if (iterationNode.getRootOfTerminationCriterion() != null) {
SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
traverse(addMapper.getInput().getSource(), createEmptySchema(), createUtilities);
try {
addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
} catch (MissingFieldTypeInfoException e) {
throw new RuntimeException(e);
}
}
// take the schema from the partial solution node and add its fields to the iteration result schema.
// input and output schema need to be identical, so this is essentially a sanity check
addSchemaToSchema(pss, schema, iterationNode.getProgramOperator().getName());
// set the serializer
if (createUtilities) {
iterationNode.setSerializerForIterationChannel(createSerializer(pss, iterationNode.getPartialSolutionPlanNode()));
}
// done, we can now propagate our info down
try {
propagateToChannel(schema, iterationNode.getInput(), createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
}
} else if (node instanceof WorksetIterationPlanNode) {
WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
// get the nodes current schema
T schema;
if (iterationNode.postPassHelper == null) {
schema = createEmptySchema();
iterationNode.postPassHelper = schema;
} else {
schema = (T) iterationNode.postPassHelper;
}
schema.increaseNumConnectionsThatContributed();
// add the parent schema to the schema (which refers to the solution set schema)
if (propagateParentSchemaDown) {
addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
}
// check whether all outgoing channels have not yet contributed. come back later if not.
if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
return;
}
if (iterationNode.getNextWorkSetPlanNode() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile a workset iteration step function where the next workset is produced by a Union node.");
}
if (iterationNode.getSolutionSetDeltaPlanNode() instanceof NAryUnionPlanNode) {
throw new CompilerException("Optimizer cannot compile a workset iteration step function where the solution set delta is produced by a Union node.");
}
// traverse the step function
// pass an empty schema to the next workset and the parent schema to the solution set delta
// these first traversals are schema only
traverse(iterationNode.getNextWorkSetPlanNode(), createEmptySchema(), false);
traverse(iterationNode.getSolutionSetDeltaPlanNode(), schema, false);
T wss = (T) iterationNode.getWorksetPlanNode().postPassHelper;
T sss = (T) iterationNode.getSolutionSetPlanNode().postPassHelper;
if (wss == null) {
throw new CompilerException("Error in Optimizer Post Pass: Workset schema is null after first traversal of the step function.");
}
if (sss == null) {
throw new CompilerException("Error in Optimizer Post Pass: Solution set schema is null after first traversal of the step function.");
}
// make the second pass and instantiate the utilities
traverse(iterationNode.getNextWorkSetPlanNode(), wss, createUtilities);
traverse(iterationNode.getSolutionSetDeltaPlanNode(), sss, createUtilities);
// the solution set input and the result must have the same schema, this acts as a sanity check.
try {
for (Map.Entry<Integer, X> entry : sss) {
Integer pos = entry.getKey();
schema.addType(pos, entry.getValue());
}
} catch (ConflictingFieldTypeInfoException e) {
throw new CompilerPostPassException("Conflicting type information for field " + e.getFieldNumber() + " in node '" + iterationNode.getProgramOperator().getName() + "'. Contradicting types between the " + "result of the iteration and the solution set schema: " + e.getPreviousType() + " and " + e.getNewType() + ". Most probable cause: Invalid constant field annotations.");
}
// set the serializers and comparators
if (createUtilities) {
WorksetIterationNode optNode = iterationNode.getIterationNode();
iterationNode.setWorksetSerializer(createSerializer(wss, iterationNode.getWorksetPlanNode()));
iterationNode.setSolutionSetSerializer(createSerializer(sss, iterationNode.getSolutionSetPlanNode()));
try {
iterationNode.setSolutionSetComparator(createComparator(optNode.getSolutionSetKeyFields(), null, sss));
} catch (MissingFieldTypeInfoException ex) {
throw new CompilerPostPassException("Could not set up the solution set for workset iteration '" + optNode.getOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber() + '.');
}
}
// done, we can now propagate our info down
try {
propagateToChannel(schema, iterationNode.getInitialSolutionSetInput(), createUtilities);
propagateToChannel(wss, iterationNode.getInitialWorksetInput(), createUtilities);
} catch (MissingFieldTypeInfoException ex) {
throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber());
}
} else if (node instanceof SingleInputPlanNode) {
SingleInputPlanNode sn = (SingleInputPlanNode) node;
// get the nodes current schema
T schema;
if (sn.postPassHelper == null) {
schema = createEmptySchema();
sn.postPassHelper = schema;
} else {
schema = (T) sn.postPassHelper;
}
schema.increaseNumConnectionsThatContributed();
SingleInputNode optNode = sn.getSingleInputNode();
// add the parent schema to the schema
if (propagateParentSchemaDown) {
addSchemaToSchema(parentSchema, schema, optNode, 0);
}
// check whether all outgoing channels have not yet contributed. come back later if not.
if (schema.getNumConnectionsThatContributed() < sn.getOutgoingChannels().size()) {
return;
}
// add the nodes local information
try {
getSingleInputNodeSchema(sn, schema);
} catch (ConflictingFieldTypeInfoException e) {
throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
}
if (createUtilities) {
// parameterize the node's driver strategy
for (int i = 0; i < sn.getDriverStrategy().getNumRequiredComparators(); i++) {
try {
sn.setComparator(createComparator(sn.getKeys(i), sn.getSortOrders(i), schema), i);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
}
}
}
// done, we can now propagate our info down
try {
propagateToChannel(schema, sn.getInput(), createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
// don't forget the broadcast inputs
for (Channel c : sn.getBroadcastInputs()) {
try {
propagateToChannel(createEmptySchema(), c, createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
}
} else if (node instanceof DualInputPlanNode) {
DualInputPlanNode dn = (DualInputPlanNode) node;
// get the nodes current schema
T schema1;
T schema2;
if (dn.postPassHelper1 == null) {
schema1 = createEmptySchema();
schema2 = createEmptySchema();
dn.postPassHelper1 = schema1;
dn.postPassHelper2 = schema2;
} else {
schema1 = (T) dn.postPassHelper1;
schema2 = (T) dn.postPassHelper2;
}
schema1.increaseNumConnectionsThatContributed();
schema2.increaseNumConnectionsThatContributed();
TwoInputNode optNode = dn.getTwoInputNode();
// add the parent schema to the schema
if (propagateParentSchemaDown) {
addSchemaToSchema(parentSchema, schema1, optNode, 0);
addSchemaToSchema(parentSchema, schema2, optNode, 1);
}
// check whether all outgoing channels have not yet contributed. come back later if not.
if (schema1.getNumConnectionsThatContributed() < dn.getOutgoingChannels().size()) {
return;
}
// add the nodes local information
try {
getDualInputNodeSchema(dn, schema1, schema2);
} catch (ConflictingFieldTypeInfoException e) {
throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
}
// parameterize the node's driver strategy
if (createUtilities) {
if (dn.getDriverStrategy().getNumRequiredComparators() > 0) {
// set the individual comparators
try {
dn.setComparator1(createComparator(dn.getKeysForInput1(), dn.getSortOrders(), schema1));
dn.setComparator2(createComparator(dn.getKeysForInput2(), dn.getSortOrders(), schema2));
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
// set the pair comparator
try {
dn.setPairComparator(createPairComparator(dn.getKeysForInput1(), dn.getKeysForInput2(), dn.getSortOrders(), schema1, schema2));
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
}
}
// done, we can now propagate our info down
try {
propagateToChannel(schema1, dn.getInput1(), createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for the first input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
try {
propagateToChannel(schema2, dn.getInput2(), createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for the second input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
// don't forget the broadcast inputs
for (Channel c : dn.getBroadcastInputs()) {
try {
propagateToChannel(createEmptySchema(), c, createUtilities);
} catch (MissingFieldTypeInfoException e) {
throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
}
}
} else if (node instanceof NAryUnionPlanNode) {
// only propagate the info down
try {
for (Channel channel : node.getInputs()) {
propagateToChannel(parentSchema, channel, createUtilities);
}
} catch (MissingFieldTypeInfoException ex) {
throw new CompilerPostPassException("Could not set up runtime strategy for the input channel to " + " a union node. Missing type information for field " + ex.getFieldNumber());
}
} else // catch the sources of the iterative step functions
if (node instanceof BulkPartialSolutionPlanNode || node instanceof SolutionSetPlanNode || node instanceof WorksetPlanNode) {
// get the nodes current schema
T schema;
String name;
if (node instanceof BulkPartialSolutionPlanNode) {
BulkPartialSolutionPlanNode psn = (BulkPartialSolutionPlanNode) node;
if (psn.postPassHelper == null) {
schema = createEmptySchema();
psn.postPassHelper = schema;
} else {
schema = (T) psn.postPassHelper;
}
name = "partial solution of bulk iteration '" + psn.getPartialSolutionNode().getIterationNode().getOperator().getName() + "'";
} else if (node instanceof SolutionSetPlanNode) {
SolutionSetPlanNode ssn = (SolutionSetPlanNode) node;
if (ssn.postPassHelper == null) {
schema = createEmptySchema();
ssn.postPassHelper = schema;
} else {
schema = (T) ssn.postPassHelper;
}
name = "solution set of workset iteration '" + ssn.getSolutionSetNode().getIterationNode().getOperator().getName() + "'";
} else if (node instanceof WorksetPlanNode) {
WorksetPlanNode wsn = (WorksetPlanNode) node;
if (wsn.postPassHelper == null) {
schema = createEmptySchema();
wsn.postPassHelper = schema;
} else {
schema = (T) wsn.postPassHelper;
}
name = "workset of workset iteration '" + wsn.getWorksetNode().getIterationNode().getOperator().getName() + "'";
} else {
throw new CompilerException();
}
schema.increaseNumConnectionsThatContributed();
// add the parent schema to the schema
addSchemaToSchema(parentSchema, schema, name);
} else {
throw new CompilerPostPassException("Unknown node type encountered: " + node.getClass().getName());
}
}
Aggregations