use of org.apache.flink.optimizer.costs.Costs in project flink by apache.
the class DataSourceNode method getAlternativePlans.
@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
if (this.cachedPlans != null) {
return this.cachedPlans;
}
SourcePlanNode candidate = new SourcePlanNode(this, "DataSource (" + this.getOperator().getName() + ")", this.gprops, this.lprops);
if (!replicatedInput) {
candidate.updatePropertiesWithUniqueSets(getUniqueFields());
final Costs costs = new Costs();
if (FileInputFormat.class.isAssignableFrom(getOperator().getFormatWrapper().getUserCodeClass()) && this.estimatedOutputSize >= 0) {
estimator.addFileInputCost(this.estimatedOutputSize, costs);
}
candidate.setCosts(costs);
} else {
// replicated input
final Costs costs = new Costs();
InputFormat<?, ?> inputFormat = ((ReplicatingInputFormat<?, ?>) getOperator().getFormatWrapper().getUserCodeObject()).getReplicatedInputFormat();
if (FileInputFormat.class.isAssignableFrom(inputFormat.getClass()) && this.estimatedOutputSize >= 0) {
estimator.addFileInputCost(this.estimatedOutputSize * this.getParallelism(), costs);
}
candidate.setCosts(costs);
}
// since there is only a single plan for the data-source, return a list with that element only
List<PlanNode> plans = new ArrayList<PlanNode>(1);
plans.add(candidate);
this.cachedPlans = plans;
return plans;
}
use of org.apache.flink.optimizer.costs.Costs in project flink by apache.
the class SinkJoinerPlanNode method setCosts.
// --------------------------------------------------------------------------------------------
public void setCosts(Costs nodeCosts) {
// the plan enumeration logic works as for regular two-input-operators, which is important
// because of the branch handling logic. it does pick redistributing network channels
// between the sink and the sink joiner, because sinks joiner has a different parallelism than the sink.
// we discard any cost and simply use the sum of the costs from the two children.
Costs totalCosts = getInput1().getSource().getCumulativeCosts().clone();
totalCosts.addCosts(getInput2().getSource().getCumulativeCosts());
super.setCosts(totalCosts);
}
use of org.apache.flink.optimizer.costs.Costs in project flink by apache.
the class AllReduceProperties method instantiate.
@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
// locally connected, directly instantiate
return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", in, DriverStrategy.ALL_REDUCE);
} else {
// non forward case.plug in a combiner
Channel toCombiner = new Channel(in.getSource());
toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
// create an input node for combine with same parallelism as input node
ReduceNode combinerNode = ((ReduceNode) node).getCombinerUtilityNode();
combinerNode.setParallelism(in.getSource().getParallelism());
SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode, "Combine (" + node.getOperator().getName() + ")", toCombiner, DriverStrategy.ALL_REDUCE);
combiner.setCosts(new Costs(0, 0));
combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
Channel toReducer = new Channel(combiner);
toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(), in.getShipStrategySortOrder(), in.getDataExchangeMode());
toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", toReducer, DriverStrategy.ALL_REDUCE);
}
}
use of org.apache.flink.optimizer.costs.Costs in project flink by apache.
the class RangePartitionRewriter method rewriteRangePartitionChannel.
private List<Channel> rewriteRangePartitionChannel(Channel channel) {
final List<Channel> sourceNewOutputChannels = new ArrayList<>();
final PlanNode sourceNode = channel.getSource();
final PlanNode targetNode = channel.getTarget();
final int sourceParallelism = sourceNode.getParallelism();
final int targetParallelism = targetNode.getParallelism();
final Costs defaultZeroCosts = new Costs(0, 0, 0);
final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
// 1. Fixed size sample in each partitions.
final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
sipNode.setParallelism(sourceParallelism);
sipPlanNode.setParallelism(sourceParallelism);
sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sipPlanNode.setCosts(defaultZeroCosts);
sipChannel.setTarget(sipPlanNode);
this.plan.getAllNodes().add(sipPlanNode);
sourceNewOutputChannels.add(sipChannel);
// 2. Fixed size sample in a single coordinator.
final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
sicNode.setParallelism(1);
sicPlanNode.setParallelism(1);
sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sicPlanNode.setCosts(defaultZeroCosts);
sicChannel.setTarget(sicPlanNode);
sipPlanNode.addOutgoingChannel(sicChannel);
this.plan.getAllNodes().add(sicPlanNode);
// 3. Use sampled data to build range boundaries.
final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
rbNode.setParallelism(1);
rbPlanNode.setParallelism(1);
rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
rbPlanNode.setCosts(defaultZeroCosts);
rbChannel.setTarget(rbPlanNode);
sicPlanNode.addOutgoingChannel(rbChannel);
this.plan.getAllNodes().add(rbPlanNode);
// 4. Take range boundaries as broadcast input and take the tuple of partition id and record as output.
final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
// To avoid deadlock, set the DataExchangeMode of channel between source node and this to Batch.
ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
ariNode.setParallelism(sourceParallelism);
ariPlanNode.setParallelism(sourceParallelism);
ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
ariPlanNode.setCosts(defaultZeroCosts);
ariChannel.setTarget(ariPlanNode);
this.plan.getAllNodes().add(ariPlanNode);
sourceNewOutputChannels.add(ariChannel);
final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
broadcastChannel.setTarget(ariPlanNode);
List<NamedChannel> broadcastChannels = new ArrayList<>(1);
broadcastChannels.add(broadcastChannel);
ariPlanNode.setBroadcastInputs(broadcastChannels);
// 5. Remove the partition id.
final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
final FieldList keys = new FieldList(0);
partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
ariPlanNode.addOutgoingChannel(partChannel);
final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
final MapNode prRemoverNode = new MapNode(prOperatorBase);
final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
partChannel.setTarget(prPlanNode);
prRemoverNode.setParallelism(targetParallelism);
prPlanNode.setParallelism(targetParallelism);
GlobalProperties globalProperties = new GlobalProperties();
globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
prPlanNode.initProperties(globalProperties, new LocalProperties());
prPlanNode.setCosts(defaultZeroCosts);
this.plan.getAllNodes().add(prPlanNode);
// 6. Connect to target node.
channel.setSource(prPlanNode);
channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
prPlanNode.addOutgoingChannel(channel);
return sourceNewOutputChannels;
}
use of org.apache.flink.optimizer.costs.Costs in project flink by apache.
the class PlanNode method setCosts.
/**
* Sets the basic cost for this node to the given value, and sets the cumulative costs
* to those costs plus the cost shares of all inputs (regular and broadcast).
*
* @param nodeCosts The already knows costs for this node
* (this cost a produces by a concrete {@code OptimizerNode} subclass.
*/
public void setCosts(Costs nodeCosts) {
// set the node costs
this.nodeCosts = nodeCosts;
// the cumulative costs are the node costs plus the costs of all inputs
this.cumulativeCosts = nodeCosts.clone();
// add all the normal inputs
for (PlanNode pred : getPredecessors()) {
Costs parentCosts = pred.getCumulativeCostsShare();
if (parentCosts != null) {
this.cumulativeCosts.addCosts(parentCosts);
} else {
throw new CompilerException("Trying to set the costs of an operator before the predecessor costs are computed.");
}
}
// add all broadcast variable inputs
if (this.broadcastInputs != null) {
for (NamedChannel nc : this.broadcastInputs) {
Costs bcInputCost = nc.getSource().getCumulativeCostsShare();
if (bcInputCost != null) {
this.cumulativeCosts.addCosts(bcInputCost);
} else {
throw new CompilerException("Trying to set the costs of an operator before the broadcast input costs are computed.");
}
}
}
}
Aggregations