Search in sources :

Example 1 with Costs

use of org.apache.flink.optimizer.costs.Costs in project flink by apache.

the class DataSourceNode method getAlternativePlans.

@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
    if (this.cachedPlans != null) {
        return this.cachedPlans;
    }
    SourcePlanNode candidate = new SourcePlanNode(this, "DataSource (" + this.getOperator().getName() + ")", this.gprops, this.lprops);
    if (!replicatedInput) {
        candidate.updatePropertiesWithUniqueSets(getUniqueFields());
        final Costs costs = new Costs();
        if (FileInputFormat.class.isAssignableFrom(getOperator().getFormatWrapper().getUserCodeClass()) && this.estimatedOutputSize >= 0) {
            estimator.addFileInputCost(this.estimatedOutputSize, costs);
        }
        candidate.setCosts(costs);
    } else {
        // replicated input
        final Costs costs = new Costs();
        InputFormat<?, ?> inputFormat = ((ReplicatingInputFormat<?, ?>) getOperator().getFormatWrapper().getUserCodeObject()).getReplicatedInputFormat();
        if (FileInputFormat.class.isAssignableFrom(inputFormat.getClass()) && this.estimatedOutputSize >= 0) {
            estimator.addFileInputCost(this.estimatedOutputSize * this.getParallelism(), costs);
        }
        candidate.setCosts(costs);
    }
    // since there is only a single plan for the data-source, return a list with that element only
    List<PlanNode> plans = new ArrayList<PlanNode>(1);
    plans.add(candidate);
    this.cachedPlans = plans;
    return plans;
}
Also used : ReplicatingInputFormat(org.apache.flink.api.common.io.ReplicatingInputFormat) Costs(org.apache.flink.optimizer.costs.Costs) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) ArrayList(java.util.ArrayList) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) FileInputFormat(org.apache.flink.api.common.io.FileInputFormat)

Example 2 with Costs

use of org.apache.flink.optimizer.costs.Costs in project flink by apache.

the class SinkJoinerPlanNode method setCosts.

// --------------------------------------------------------------------------------------------
public void setCosts(Costs nodeCosts) {
    // the plan enumeration logic works as for regular two-input-operators, which is important
    // because of the branch handling logic. it does pick redistributing network channels
    // between the sink and the sink joiner, because sinks joiner has a different parallelism than the sink.
    // we discard any cost and simply use the sum of the costs from the two children.
    Costs totalCosts = getInput1().getSource().getCumulativeCosts().clone();
    totalCosts.addCosts(getInput2().getSource().getCumulativeCosts());
    super.setCosts(totalCosts);
}
Also used : Costs(org.apache.flink.optimizer.costs.Costs)

Example 3 with Costs

use of org.apache.flink.optimizer.costs.Costs in project flink by apache.

the class AllReduceProperties method instantiate.

@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
    if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
        // locally connected, directly instantiate
        return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", in, DriverStrategy.ALL_REDUCE);
    } else {
        // non forward case.plug in a combiner
        Channel toCombiner = new Channel(in.getSource());
        toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
        // create an input node for combine with same parallelism as input node
        ReduceNode combinerNode = ((ReduceNode) node).getCombinerUtilityNode();
        combinerNode.setParallelism(in.getSource().getParallelism());
        SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode, "Combine (" + node.getOperator().getName() + ")", toCombiner, DriverStrategy.ALL_REDUCE);
        combiner.setCosts(new Costs(0, 0));
        combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
        Channel toReducer = new Channel(combiner);
        toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(), in.getShipStrategySortOrder(), in.getDataExchangeMode());
        toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
        return new SingleInputPlanNode(node, "Reduce (" + node.getOperator().getName() + ")", toReducer, DriverStrategy.ALL_REDUCE);
    }
}
Also used : SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) ReduceNode(org.apache.flink.optimizer.dag.ReduceNode) Costs(org.apache.flink.optimizer.costs.Costs) Channel(org.apache.flink.optimizer.plan.Channel)

Example 4 with Costs

use of org.apache.flink.optimizer.costs.Costs in project flink by apache.

the class RangePartitionRewriter method rewriteRangePartitionChannel.

private List<Channel> rewriteRangePartitionChannel(Channel channel) {
    final List<Channel> sourceNewOutputChannels = new ArrayList<>();
    final PlanNode sourceNode = channel.getSource();
    final PlanNode targetNode = channel.getTarget();
    final int sourceParallelism = sourceNode.getParallelism();
    final int targetParallelism = targetNode.getParallelism();
    final Costs defaultZeroCosts = new Costs(0, 0, 0);
    final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
    // 1. Fixed size sample in each partitions.
    final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
    final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
    final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
    final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
    final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
    final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
    final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
    final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
    sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
    sipNode.setParallelism(sourceParallelism);
    sipPlanNode.setParallelism(sourceParallelism);
    sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sipPlanNode.setCosts(defaultZeroCosts);
    sipChannel.setTarget(sipPlanNode);
    this.plan.getAllNodes().add(sipPlanNode);
    sourceNewOutputChannels.add(sipChannel);
    // 2. Fixed size sample in a single coordinator.
    final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
    final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
    final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
    final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
    final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
    sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
    sicNode.setParallelism(1);
    sicPlanNode.setParallelism(1);
    sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sicPlanNode.setCosts(defaultZeroCosts);
    sicChannel.setTarget(sicPlanNode);
    sipPlanNode.addOutgoingChannel(sicChannel);
    this.plan.getAllNodes().add(sicPlanNode);
    // 3. Use sampled data to build range boundaries.
    final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
    final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
    final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
    final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
    final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
    final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
    rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
    rbNode.setParallelism(1);
    rbPlanNode.setParallelism(1);
    rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    rbPlanNode.setCosts(defaultZeroCosts);
    rbChannel.setTarget(rbPlanNode);
    sicPlanNode.addOutgoingChannel(rbChannel);
    this.plan.getAllNodes().add(rbPlanNode);
    // 4. Take range boundaries as broadcast input and take the tuple of partition id and record as output.
    final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
    final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
    final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
    final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
    final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
    final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
    // To avoid deadlock, set the DataExchangeMode of channel between source node and this to Batch.
    ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
    final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
    ariNode.setParallelism(sourceParallelism);
    ariPlanNode.setParallelism(sourceParallelism);
    ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    ariPlanNode.setCosts(defaultZeroCosts);
    ariChannel.setTarget(ariPlanNode);
    this.plan.getAllNodes().add(ariPlanNode);
    sourceNewOutputChannels.add(ariChannel);
    final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
    broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
    broadcastChannel.setTarget(ariPlanNode);
    List<NamedChannel> broadcastChannels = new ArrayList<>(1);
    broadcastChannels.add(broadcastChannel);
    ariPlanNode.setBroadcastInputs(broadcastChannels);
    // 5. Remove the partition id.
    final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
    final FieldList keys = new FieldList(0);
    partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
    ariPlanNode.addOutgoingChannel(partChannel);
    final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
    final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
    final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
    final MapNode prRemoverNode = new MapNode(prOperatorBase);
    final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
    partChannel.setTarget(prPlanNode);
    prRemoverNode.setParallelism(targetParallelism);
    prPlanNode.setParallelism(targetParallelism);
    GlobalProperties globalProperties = new GlobalProperties();
    globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
    prPlanNode.initProperties(globalProperties, new LocalProperties());
    prPlanNode.setCosts(defaultZeroCosts);
    this.plan.getAllNodes().add(prPlanNode);
    // 6. Connect to target node.
    channel.setSource(prPlanNode);
    channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    prPlanNode.addOutgoingChannel(channel);
    return sourceNewOutputChannels;
}
Also used : SampleInPartition(org.apache.flink.api.java.functions.SampleInPartition) Costs(org.apache.flink.optimizer.costs.Costs) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) ArrayList(java.util.ArrayList) SampleInCoordinator(org.apache.flink.api.java.functions.SampleInCoordinator) MapNode(org.apache.flink.optimizer.dag.MapNode) RangeBoundaryBuilder(org.apache.flink.runtime.operators.udf.RangeBoundaryBuilder) FieldList(org.apache.flink.api.common.operators.util.FieldList) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) RemoveRangeIndex(org.apache.flink.runtime.operators.udf.RemoveRangeIndex) Ordering(org.apache.flink.api.common.operators.Ordering) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) AssignRangeIndex(org.apache.flink.runtime.operators.udf.AssignRangeIndex) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IntermediateSampleData(org.apache.flink.api.java.sampling.IntermediateSampleData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) CommonRangeBoundaries(org.apache.flink.api.common.distributions.CommonRangeBoundaries)

Example 5 with Costs

use of org.apache.flink.optimizer.costs.Costs in project flink by apache.

the class PlanNode method setCosts.

/**
	 * Sets the basic cost for this node to the given value, and sets the cumulative costs
	 * to those costs plus the cost shares of all inputs (regular and broadcast).
	 * 
	 * @param nodeCosts	 The already knows costs for this node
	 * 						(this cost a produces by a concrete {@code OptimizerNode} subclass.
	 */
public void setCosts(Costs nodeCosts) {
    // set the node costs
    this.nodeCosts = nodeCosts;
    // the cumulative costs are the node costs plus the costs of all inputs
    this.cumulativeCosts = nodeCosts.clone();
    // add all the normal inputs
    for (PlanNode pred : getPredecessors()) {
        Costs parentCosts = pred.getCumulativeCostsShare();
        if (parentCosts != null) {
            this.cumulativeCosts.addCosts(parentCosts);
        } else {
            throw new CompilerException("Trying to set the costs of an operator before the predecessor costs are computed.");
        }
    }
    // add all broadcast variable inputs
    if (this.broadcastInputs != null) {
        for (NamedChannel nc : this.broadcastInputs) {
            Costs bcInputCost = nc.getSource().getCumulativeCostsShare();
            if (bcInputCost != null) {
                this.cumulativeCosts.addCosts(bcInputCost);
            } else {
                throw new CompilerException("Trying to set the costs of an operator before the broadcast input costs are computed.");
            }
        }
    }
}
Also used : Costs(org.apache.flink.optimizer.costs.Costs) CompilerException(org.apache.flink.optimizer.CompilerException)

Aggregations

Costs (org.apache.flink.optimizer.costs.Costs)8 Channel (org.apache.flink.optimizer.plan.Channel)5 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)5 GroupReduceNode (org.apache.flink.optimizer.dag.GroupReduceNode)3 ArrayList (java.util.ArrayList)2 PartitionNode (org.apache.flink.optimizer.dag.PartitionNode)2 ReduceNode (org.apache.flink.optimizer.dag.ReduceNode)2 PlanNode (org.apache.flink.optimizer.plan.PlanNode)2 CommonRangeBoundaries (org.apache.flink.api.common.distributions.CommonRangeBoundaries)1 FileInputFormat (org.apache.flink.api.common.io.FileInputFormat)1 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)1 Ordering (org.apache.flink.api.common.operators.Ordering)1 UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)1 GroupReduceOperatorBase (org.apache.flink.api.common.operators.base.GroupReduceOperatorBase)1 MapOperatorBase (org.apache.flink.api.common.operators.base.MapOperatorBase)1 MapPartitionOperatorBase (org.apache.flink.api.common.operators.base.MapPartitionOperatorBase)1 FieldList (org.apache.flink.api.common.operators.util.FieldList)1 SampleInCoordinator (org.apache.flink.api.java.functions.SampleInCoordinator)1 SampleInPartition (org.apache.flink.api.java.functions.SampleInPartition)1 IntermediateSampleData (org.apache.flink.api.java.sampling.IntermediateSampleData)1