Search in sources :

Example 21 with ScanStats

use of org.apache.drill.exec.physical.base.ScanStats in project drill by apache.

the class ScanPrel method computeSelfCost.

@Override
public RelOptCost computeSelfCost(final RelOptPlanner planner, RelMetadataQuery mq) {
    final PlannerSettings settings = PrelUtil.getPlannerSettings(planner);
    final ScanStats stats = getGroupScan().getScanStats(settings);
    final int columnCount = getRowType().getFieldCount();
    if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
        return planner.getCostFactory().makeCost(stats.getRecordCount() * columnCount, stats.getCpuCost(), stats.getDiskCost());
    }
    double rowCount = mq.getRowCount(this);
    // As DRILL-4083 points out, when columnCount == 0, cpuCost becomes zero,
    // which makes the costs of HiveScan and HiveDrillNativeParquetScan the same
    // For now, assume cpu cost is proportional to row count.
    // Note that this ignores the disk cost estimate (which should be a proxy for
    // row count * row width.)
    double cpuCost = rowCount * Math.max(columnCount, 1);
    // If a positive value for CPU cost is given multiply the default CPU cost by given CPU cost.
    if (stats.getCpuCost() > 0) {
        cpuCost *= stats.getCpuCost();
    }
    double ioCost = stats.getDiskCost();
    DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
    return costFactory.makeCost(rowCount, cpuCost, ioCost, 0);
}
Also used : DrillCostFactory(org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory) ScanStats(org.apache.drill.exec.physical.base.ScanStats)

Example 22 with ScanStats

use of org.apache.drill.exec.physical.base.ScanStats in project drill by apache.

the class MongoGroupScan method getScanStats.

@Override
public ScanStats getScanStats() {
    try {
        MongoClient client = storagePlugin.getClient();
        MongoDatabase db = client.getDatabase(scanSpec.getDbName());
        MongoCollection<Document> collection = db.getCollection(scanSpec.getCollectionName());
        long recordCount = collection.estimatedDocumentCount();
        float approxDiskCost = 0;
        if (recordCount != 0) {
            // toJson should use client's codec, otherwise toJson could fail on
            // some types not known to DocumentCodec, e.g. DBRef.
            DocumentCodec codec = new DocumentCodec(db.getCodecRegistry(), new BsonTypeClassMap());
            String json = collection.find().first().toJson(codec);
            approxDiskCost = json.getBytes().length * recordCount;
        }
        return new ScanStats(GroupScanProperty.ESTIMATED_TOTAL_COST, recordCount, 1, approxDiskCost);
    } catch (Exception e) {
        throw new DrillRuntimeException(e.getMessage(), e);
    }
}
Also used : MongoClient(com.mongodb.client.MongoClient) DocumentCodec(org.bson.codecs.DocumentCodec) BsonTypeClassMap(org.bson.codecs.BsonTypeClassMap) Document(org.bson.Document) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) MongoDatabase(com.mongodb.client.MongoDatabase) ScanStats(org.apache.drill.exec.physical.base.ScanStats)

Example 23 with ScanStats

use of org.apache.drill.exec.physical.base.ScanStats in project drill by apache.

the class ConvertCountToDirectScanPrule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final DrillAggregateRel agg = call.rel(0);
    final DrillScanRel scan = call.rel(call.rels.length - 1);
    final DrillProjectRel project = call.rels.length == 3 ? call.rel(1) : null;
    final GroupScan oldGrpScan = scan.getGroupScan();
    final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
    // 3) No distinct agg call.
    if (!(oldGrpScan.getScanStats(settings).getGroupScanProperty().hasExactRowCount() && agg.getGroupCount() == 0 && !agg.containsDistinctCall())) {
        return;
    }
    Map<String, Long> result = collectCounts(settings, agg, scan, project);
    logger.trace("Calculated the following aggregate counts: {}", result);
    // if could not determine the counts, rule won't be applied
    if (result.isEmpty()) {
        return;
    }
    final RelDataType scanRowType = CountToDirectScanUtils.constructDataType(agg, result.keySet());
    final DynamicPojoRecordReader<Long> reader = new DynamicPojoRecordReader<>(CountToDirectScanUtils.buildSchema(scanRowType.getFieldNames()), Collections.singletonList(new ArrayList<>(result.values())));
    final ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, 1, 1, scanRowType.getFieldCount());
    final int numFiles = oldGrpScan.hasFiles() ? oldGrpScan.getFiles().size() : -1;
    final GroupScan directScan = new MetadataDirectGroupScan(reader, oldGrpScan.getSelectionRoot(), numFiles, scanStats, false, oldGrpScan.usedMetastore());
    final DirectScanPrel newScan = DirectScanPrel.create(scan, scan.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), directScan, scanRowType);
    final ProjectPrel newProject = new ProjectPrel(agg.getCluster(), agg.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), newScan, CountToDirectScanUtils.prepareFieldExpressions(scanRowType), agg.getRowType());
    call.transformTo(newProject);
}
Also used : DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) DrillProjectRel(org.apache.drill.exec.planner.logical.DrillProjectRel) DynamicPojoRecordReader(org.apache.drill.exec.store.pojo.DynamicPojoRecordReader) ArrayList(java.util.ArrayList) DrillAggregateRel(org.apache.drill.exec.planner.logical.DrillAggregateRel) RelDataType(org.apache.calcite.rel.type.RelDataType) MetadataDirectGroupScan(org.apache.drill.exec.store.direct.MetadataDirectGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) MetadataDirectGroupScan(org.apache.drill.exec.store.direct.MetadataDirectGroupScan) ScanStats(org.apache.drill.exec.physical.base.ScanStats)

Aggregations

ScanStats (org.apache.drill.exec.physical.base.ScanStats)23 IOException (java.io.IOException)4 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)4 DrillCostFactory (org.apache.drill.exec.planner.cost.DrillCostBase.DrillCostFactory)4 DynamicPojoRecordReader (org.apache.drill.exec.store.pojo.DynamicPojoRecordReader)4 CompleteFileWork (org.apache.drill.exec.store.schedule.CompleteFileWork)4 ArrayList (java.util.ArrayList)3 RelDataType (org.apache.calcite.rel.type.RelDataType)3 GroupScan (org.apache.drill.exec.physical.base.GroupScan)3 PluginCost (org.apache.drill.exec.planner.cost.PluginCost)3 MetadataDirectGroupScan (org.apache.drill.exec.store.direct.MetadataDirectGroupScan)3 MongoDatabase (com.mongodb.client.MongoDatabase)2 List (java.util.List)2 SchemaPath (org.apache.drill.common.expression.SchemaPath)2 DrillAggregateRel (org.apache.drill.exec.planner.logical.DrillAggregateRel)2 DrillProjectRel (org.apache.drill.exec.planner.logical.DrillProjectRel)2 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)2 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)2 FormatSelection (org.apache.drill.exec.store.dfs.FormatSelection)2 HiveStats (org.apache.drill.exec.store.hive.HiveMetadataProvider.HiveStats)2