use of org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory in project drill by apache.
the class DrillJoinRelBase method computeHashJoinCostWithKeySize.
/**
*
* @param planner : Optimization Planner.
* @param keySize : the # of join keys in join condition. Left key size should be equal to right key size.
* @return : RelOptCost
*/
private RelOptCost computeHashJoinCostWithKeySize(RelOptPlanner planner, int keySize, RelMetadataQuery mq) {
double probeRowCount = mq.getRowCount(this.getLeft());
double buildRowCount = mq.getRowCount(this.getRight());
// cpu cost of hashing the join keys for the build side
double cpuCostBuild = DrillCostBase.HASH_CPU_COST * keySize * buildRowCount;
// cpu cost of hashing the join keys for the probe side
double cpuCostProbe = DrillCostBase.HASH_CPU_COST * keySize * probeRowCount;
// cpu cost of evaluating each leftkey=rightkey join condition
double joinConditionCost = DrillCostBase.COMPARE_CPU_COST * keySize;
double factor = PrelUtil.getPlannerSettings(planner).getOptions().getOption(ExecConstants.HASH_JOIN_TABLE_FACTOR_KEY).float_val;
long fieldWidth = PrelUtil.getPlannerSettings(planner).getOptions().getOption(ExecConstants.AVERAGE_FIELD_WIDTH_KEY).num_val;
// table + hashValues + links
double memCost = ((fieldWidth * keySize) + IntHolder.WIDTH + IntHolder.WIDTH) * buildRowCount * factor;
double cpuCost = // probe size determine the join condition comparison cost
joinConditionCost * (probeRowCount) + cpuCostBuild + cpuCostProbe;
DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
return costFactory.makeCost(buildRowCount + probeRowCount, cpuCost, 0, 0, memCost);
}
use of org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory in project drill by apache.
the class DrillProjectRelBase method computeSelfCost.
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
return super.computeSelfCost(planner, mq).multiplyBy(.1);
}
// cost is proportional to the number of rows and number of columns being projected
double rowCount = nonSimpleFieldCount > 0 ? mq.getRowCount(this) : 0;
double cpuCost = DrillCostBase.PROJECT_CPU_COST * rowCount * nonSimpleFieldCount;
DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
return costFactory.makeCost(rowCount, cpuCost, 0, 0);
}
use of org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory in project drill by apache.
the class DrillScreenRelBase method computeSelfCost.
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
return super.computeSelfCost(planner, mq).multiplyBy(.1);
}
// by default, assume cost is proportional to number of rows
double rowCount = mq.getRowCount(this);
DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
return costFactory.makeCost(rowCount, rowCount, 0, 0).multiplyBy(0.1);
}
use of org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory in project drill by apache.
the class TopNPrel method computeSelfCost.
/**
* Cost of doing Top-N is proportional to M log N where M is the total number of
* input rows and N is the limit for Top-N. This makes Top-N preferable to Sort
* since cost of full Sort is proportional to M log M .
*/
@Override
public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
//We use multiplier 0.05 for TopN operator, and 0.1 for Sort, to make TopN a preferred choice.
return super.computeSelfCost(planner, mq).multiplyBy(0.05);
}
RelNode child = this.getInput();
double inputRows = mq.getRowCount(child);
int numSortFields = this.collation.getFieldCollations().size();
double cpuCost = DrillCostBase.COMPARE_CPU_COST * numSortFields * inputRows * (Math.log(limit) / Math.log(2));
// assume in-memory for now until we enforce operator-level memory constraints
double diskIOCost = 0;
DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
return costFactory.makeCost(inputRows, cpuCost, diskIOCost, 0);
}
use of org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory in project drill by apache.
the class ScanPrel method computeSelfCost.
@Override
public RelOptCost computeSelfCost(final RelOptPlanner planner, RelMetadataQuery mq) {
final PlannerSettings settings = PrelUtil.getPlannerSettings(planner);
final ScanStats stats = this.groupScan.getScanStats(settings);
final int columnCount = this.getRowType().getFieldCount();
if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
return planner.getCostFactory().makeCost(stats.getRecordCount() * columnCount, stats.getCpuCost(), stats.getDiskCost());
}
// double rowCount = RelMetadataQuery.getRowCount(this);
double rowCount = stats.getRecordCount();
// As DRILL-4083 points out, when columnCount == 0, cpuCost becomes zero,
// which makes the costs of HiveScan and HiveDrillNativeParquetScan the same
// For now, assume cpu cost is proportional to row count.
double cpuCost = rowCount * Math.max(columnCount, 1);
// If a positive value for CPU cost is given multiply the default CPU cost by given CPU cost.
if (stats.getCpuCost() > 0) {
cpuCost *= stats.getCpuCost();
}
// Even though scan is reading from disk, in the currently generated plans all plans will
// need to read the same amount of data, so keeping the disk io cost 0 is ok for now.
// In the future we might consider alternative scans that go against projections or
// different compression schemes etc that affect the amount of data read. Such alternatives
// would affect both cpu and io cost.
double ioCost = 0;
DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
return costFactory.makeCost(rowCount, cpuCost, ioCost, 0);
}
Aggregations