use of org.apache.drill.exec.physical.base.ScanStats in project drill by axbaretto.
the class ConvertCountToDirectScan method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final DrillAggregateRel agg = (DrillAggregateRel) call.rel(0);
final DrillScanRel scan = (DrillScanRel) call.rel(call.rels.length - 1);
final DrillProjectRel project = call.rels.length == 3 ? (DrillProjectRel) call.rel(1) : null;
final GroupScan oldGrpScan = scan.getGroupScan();
final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
// 3) No distinct agg call.
if (!(oldGrpScan.getScanStats(settings).getGroupScanProperty().hasExactRowCount() && agg.getGroupCount() == 0 && !agg.containsDistinctCall())) {
return;
}
Map<String, Long> result = collectCounts(settings, agg, scan, project);
logger.trace("Calculated the following aggregate counts: ", result);
// if could not determine the counts, rule won't be applied
if (result.isEmpty()) {
return;
}
final RelDataType scanRowType = constructDataType(agg, result.keySet());
final DynamicPojoRecordReader<Long> reader = new DynamicPojoRecordReader<>(buildSchema(scanRowType.getFieldNames()), Collections.singletonList((List<Long>) new ArrayList<>(result.values())));
final ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, 1, 1, scanRowType.getFieldCount());
final GroupScan directScan = new MetadataDirectGroupScan(reader, oldGrpScan.getFiles(), scanStats);
final ScanPrel newScan = ScanPrel.create(scan, scan.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), directScan, scanRowType);
final ProjectPrel newProject = new ProjectPrel(agg.getCluster(), agg.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), newScan, prepareFieldExpressions(scanRowType), agg.getRowType());
call.transformTo(newProject);
}
use of org.apache.drill.exec.physical.base.ScanStats in project drill by axbaretto.
the class DrillScanRel method computeSelfCost.
// / TODO: this method is same as the one for ScanPrel...eventually we should consolidate
// / this and few other methods in a common base class which would be extended
// / by both logical and physical rels.
@Override
public RelOptCost computeSelfCost(final RelOptPlanner planner, RelMetadataQuery mq) {
final ScanStats stats = groupScan.getScanStats(settings);
int columnCount = getRowType().getFieldCount();
double ioCost = 0;
boolean isStarQuery = Utilities.isStarQuery(columns);
if (isStarQuery) {
columnCount = STAR_COLUMN_COST;
}
// double rowCount = RelMetadataQuery.getRowCount(this);
double rowCount = stats.getRecordCount();
if (rowCount < 1) {
rowCount = 1;
}
if (PrelUtil.getSettings(getCluster()).useDefaultCosting()) {
return planner.getCostFactory().makeCost(rowCount * columnCount, stats.getCpuCost(), stats.getDiskCost());
}
// for now, assume cpu cost is proportional to row count.
double cpuCost = rowCount * columnCount;
// Even though scan is reading from disk, in the currently generated plans all plans will
// need to read the same amount of data, so keeping the disk io cost 0 is ok for now.
// In the future we might consider alternative scans that go against projections or
// different compression schemes etc that affect the amount of data read. Such alternatives
// would affect both cpu and io cost.
DrillCostFactory costFactory = (DrillCostFactory) planner.getCostFactory();
return costFactory.makeCost(rowCount, cpuCost, ioCost, 0);
}
use of org.apache.drill.exec.physical.base.ScanStats in project drill by axbaretto.
the class MongoGroupScan method getScanStats.
@Override
public ScanStats getScanStats() {
try {
MongoClient client = storagePlugin.getClient();
MongoDatabase db = client.getDatabase(scanSpec.getDbName());
MongoCollection<Document> collection = db.getCollection(scanSpec.getCollectionName());
long numDocs = collection.count();
float approxDiskCost = 0;
if (numDocs != 0) {
// toJson should use client's codec, otherwise toJson could fail on
// some types not known to DocumentCodec, e.g. DBRef.
final DocumentCodec codec = new DocumentCodec(client.getMongoClientOptions().getCodecRegistry(), new BsonTypeClassMap());
String json = collection.find().first().toJson(codec);
approxDiskCost = json.getBytes().length * numDocs;
}
return new ScanStats(GroupScanProperty.EXACT_ROW_COUNT, numDocs, 1, approxDiskCost);
} catch (Exception e) {
throw new DrillRuntimeException(e.getMessage(), e);
}
}
use of org.apache.drill.exec.physical.base.ScanStats in project drill by axbaretto.
the class TextFormatPlugin method getScanStats.
@Override
protected ScanStats getScanStats(final PlannerSettings settings, final EasyGroupScan scan) {
long data = 0;
for (final CompleteFileWork work : scan.getWorkIterable()) {
data += work.getTotalBytes();
}
final double estimatedRowSize = settings.getOptions().getOption(ExecConstants.TEXT_ESTIMATED_ROW_SIZE);
final double estRowCount = data / estimatedRowSize;
return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, (long) estRowCount, 1, data);
}
use of org.apache.drill.exec.physical.base.ScanStats in project drill by axbaretto.
the class EasyFormatPlugin method getScanStats.
protected ScanStats getScanStats(final PlannerSettings settings, final EasyGroupScan scan) {
long data = 0;
for (final CompleteFileWork work : scan.getWorkIterable()) {
data += work.getTotalBytes();
}
final long estRowCount = data / 1024;
return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, estRowCount, 1, data);
}
Aggregations