use of org.apache.drill.exec.physical.base.ScanStats in project drill by apache.
the class ScanPrel method computeSelfCost.
@Override
public RelOptCost computeSelfCost(final RelOptPlanner planner, RelMetadataQuery mq) {
final PlannerSettings settings = PrelUtil.getPlannerSettings(planner);
final ScanStats stats = getGroupScan().getScanStats(settings);
final int columnCount = getRowType().getFieldCount();
if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
return planner.getCostFactory().makeCost(stats.getRecordCount() * columnCount, stats.getCpuCost(), stats.getDiskCost());
}
double rowCount = mq.getRowCount(this);
// As DRILL-4083 points out, when columnCount == 0, cpuCost becomes zero,
// which makes the costs of HiveScan and HiveDrillNativeParquetScan the same
// For now, assume cpu cost is proportional to row count.
// Note that this ignores the disk cost estimate (which should be a proxy for
// row count * row width.)
double cpuCost = rowCount * Math.max(columnCount, 1);
// If a positive value for CPU cost is given multiply the default CPU cost by given CPU cost.
if (stats.getCpuCost() > 0) {
cpuCost *= stats.getCpuCost();
}
double ioCost = stats.getDiskCost();
DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
return costFactory.makeCost(rowCount, cpuCost, ioCost, 0);
}
use of org.apache.drill.exec.physical.base.ScanStats in project drill by apache.
the class MongoGroupScan method getScanStats.
@Override
public ScanStats getScanStats() {
try {
MongoClient client = storagePlugin.getClient();
MongoDatabase db = client.getDatabase(scanSpec.getDbName());
MongoCollection<Document> collection = db.getCollection(scanSpec.getCollectionName());
long recordCount = collection.estimatedDocumentCount();
float approxDiskCost = 0;
if (recordCount != 0) {
// toJson should use client's codec, otherwise toJson could fail on
// some types not known to DocumentCodec, e.g. DBRef.
DocumentCodec codec = new DocumentCodec(db.getCodecRegistry(), new BsonTypeClassMap());
String json = collection.find().first().toJson(codec);
approxDiskCost = json.getBytes().length * recordCount;
}
return new ScanStats(GroupScanProperty.ESTIMATED_TOTAL_COST, recordCount, 1, approxDiskCost);
} catch (Exception e) {
throw new DrillRuntimeException(e.getMessage(), e);
}
}
use of org.apache.drill.exec.physical.base.ScanStats in project drill by apache.
the class ConvertCountToDirectScanPrule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final DrillAggregateRel agg = call.rel(0);
final DrillScanRel scan = call.rel(call.rels.length - 1);
final DrillProjectRel project = call.rels.length == 3 ? call.rel(1) : null;
final GroupScan oldGrpScan = scan.getGroupScan();
final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
// 3) No distinct agg call.
if (!(oldGrpScan.getScanStats(settings).getGroupScanProperty().hasExactRowCount() && agg.getGroupCount() == 0 && !agg.containsDistinctCall())) {
return;
}
Map<String, Long> result = collectCounts(settings, agg, scan, project);
logger.trace("Calculated the following aggregate counts: {}", result);
// if could not determine the counts, rule won't be applied
if (result.isEmpty()) {
return;
}
final RelDataType scanRowType = CountToDirectScanUtils.constructDataType(agg, result.keySet());
final DynamicPojoRecordReader<Long> reader = new DynamicPojoRecordReader<>(CountToDirectScanUtils.buildSchema(scanRowType.getFieldNames()), Collections.singletonList(new ArrayList<>(result.values())));
final ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, 1, 1, scanRowType.getFieldCount());
final int numFiles = oldGrpScan.hasFiles() ? oldGrpScan.getFiles().size() : -1;
final GroupScan directScan = new MetadataDirectGroupScan(reader, oldGrpScan.getSelectionRoot(), numFiles, scanStats, false, oldGrpScan.usedMetastore());
final DirectScanPrel newScan = DirectScanPrel.create(scan, scan.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), directScan, scanRowType);
final ProjectPrel newProject = new ProjectPrel(agg.getCluster(), agg.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), newScan, CountToDirectScanUtils.prepareFieldExpressions(scanRowType), agg.getRowType());
call.transformTo(newProject);
}
Aggregations