use of org.apache.drill.exec.planner.cost.PluginCost in project drill by apache.
the class RestrictedJsonTableGroupScan method getScanStats.
@Override
public ScanStats getScanStats() {
// TODO: ideally here we should use the rowcount from index scan, and multiply a factor of restricted scan
double rowCount;
PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size();
// Get the restricted group scan row count - same as the right side index rows
rowCount = computeRestrictedScanRowcount();
// Get the average row size of the primary table
double avgRowSize = stats.getAvgRowSize(null, true);
if (avgRowSize == Statistics.AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) {
avgRowSize = avgColumnSize * numColumns;
}
// restricted scan does random lookups and each row may belong to a different block, with the number
// of blocks upper bounded by the total num blocks in the primary table
double totalBlocksPrimary = Math.ceil((avgRowSize * fullTableRowCount) / pluginCostModel.getBlockSize(this));
double numBlocks = Math.min(totalBlocksPrimary, rowCount);
double diskCost = numBlocks * pluginCostModel.getRandomBlockReadCost(this);
// For non-covering plans, the dominating cost would be of the join back. Reduce it using the factor
// for biasing towards non-covering plans.
diskCost *= stats.getRowKeyJoinBackIOFactor();
logger.debug("RestrictedJsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", System.identityHashCode(this), rowCount, avgRowSize, numBlocks, totalBlocksPrimary, diskCost);
return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);
}
use of org.apache.drill.exec.planner.cost.PluginCost in project drill by apache.
the class JsonTableGroupScan method fullTableScanStats.
private ScanStats fullTableScanStats() {
PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
final int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size();
// index will be NULL for FTS
double rowCount = stats.getRowCount(scanSpec.getCondition(), null);
// rowcount based on _id predicate. If NO _id predicate present in condition, then the
// rowcount should be same as totalRowCount. Equality b/w the two rowcounts should not be
// construed as NO _id predicate since stats are approximate.
double leadingRowCount = stats.getLeadingRowCount(scanSpec.getCondition(), null);
double avgRowSize = stats.getAvgRowSize(null, true);
double totalRowCount = stats.getRowCount(null, null);
logger.debug("GroupScan {} with stats {}: rowCount={}, condition={}, totalRowCount={}, fullTableRowCount={}", System.identityHashCode(this), System.identityHashCode(stats), rowCount, scanSpec.getCondition() == null ? "null" : scanSpec.getCondition(), totalRowCount, fullTableRowCount);
// If UNKNOWN, or DB stats sync issues(manifests as 0 rows) use defaults.
if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0) {
rowCount = (scanSpec.getSerializedFilter() != null ? .5 : 1) * fullTableRowCount;
}
// If limit pushdown has occurred - factor it in the rowcount
if (this.maxRecordsToRead > 0) {
rowCount = Math.min(rowCount, this.maxRecordsToRead);
}
if (totalRowCount == ROWCOUNT_UNKNOWN || totalRowCount == 0) {
logger.debug("did not get valid totalRowCount, will take this: {}", fullTableRowCount);
totalRowCount = fullTableRowCount;
}
if (avgRowSize == AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) {
avgRowSize = fullTableEstimatedSize / fullTableRowCount;
}
double totalBlocks = getNumOfBlocks(totalRowCount, fullTableEstimatedSize, avgRowSize, pluginCostModel);
double numBlocks = Math.min(totalBlocks, getNumOfBlocks(leadingRowCount, fullTableEstimatedSize, avgRowSize, pluginCostModel));
double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this);
/*
* Table scan cost made INFINITE in order to pick index plans. Use the MAX possible rowCount for
* costing purposes.
* NOTE: Full table rowCounts are specified with the NULL condition.
* e.g. forcedRowCountMap<NULL, 1000>
*/
if (// Forced full table rowcount and it is HUGE
forcedRowCountMap.get(null) != null && forcedRowCountMap.get(null) == ROWCOUNT_HUGE) {
rowCount = ROWCOUNT_HUGE;
diskCost = ROWCOUNT_HUGE;
}
logger.debug("JsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", this.getOperatorId(), rowCount, avgRowSize, numBlocks, totalBlocks, diskCost);
return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);
}
use of org.apache.drill.exec.planner.cost.PluginCost in project drill by apache.
the class JsonTableGroupScan method indexScanStats.
private ScanStats indexScanStats() {
if (!this.getIndexHint().equals("") && this.getIndexHint().equals(getIndexDesc().getIndexName())) {
logger.debug("JsonIndexGroupScan:{} forcing index {} by making tiny cost", this, this.getIndexHint());
return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, 1, 1, 0);
}
int totalColNum = STAR_COLS;
PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
boolean filterPushed = (scanSpec.getSerializedFilter() != null);
if (scanSpec != null && scanSpec.getIndexDesc() != null) {
totalColNum = scanSpec.getIndexDesc().getIncludedFields().size() + scanSpec.getIndexDesc().getIndexedFields().size() + 1;
}
int numColumns = (columns == null || columns.isEmpty()) ? totalColNum : columns.size();
String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName());
double rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier);
// rowcount based on index leading columns predicate.
double leadingRowCount = stats.getLeadingRowCount(scanSpec.getCondition(), idxIdentifier);
double avgRowSize = stats.getAvgRowSize(idxIdentifier, false);
// If UNKNOWN, use defaults
if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0) {
rowCount = (filterPushed ? 0.0005f : 0.001f) * fullTableRowCount / scanSpec.getIndexDesc().getIndexedFields().size();
}
// If limit pushdown has occurred - factor it in the rowcount
if (this.maxRecordsToRead > 0) {
rowCount = Math.min(rowCount, this.maxRecordsToRead);
}
if (leadingRowCount == ROWCOUNT_UNKNOWN || leadingRowCount == 0) {
leadingRowCount = rowCount;
}
if (avgRowSize == AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) {
avgRowSize = avgColumnSize * numColumns;
}
double rowsFromDisk = leadingRowCount;
if (!filterPushed) {
// both start and stop rows are empty, indicating this is a full scan so
// use the total rows for calculating disk i/o
rowsFromDisk = fullTableRowCount;
}
double totalBlocks = Math.ceil((avgRowSize * fullTableRowCount) / pluginCostModel.getBlockSize(this));
double numBlocks = Math.ceil(((avgRowSize * rowsFromDisk) / pluginCostModel.getBlockSize(this)));
numBlocks = Math.min(totalBlocks, numBlocks);
double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this);
logger.debug("index_plan_info: JsonIndexGroupScan:{} - indexName:{}: rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, rowsFromDisk {}, diskCost:{}", System.identityHashCode(this), scanSpec.getIndexDesc().getIndexName(), rowCount, avgRowSize, numBlocks, totalBlocks, rowsFromDisk, diskCost);
return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);
}
use of org.apache.drill.exec.planner.cost.PluginCost in project drill by apache.
the class JsonTableGroupScan method init.
private void init() {
try {
// Get the fullTableRowCount only once i.e. if not already obtained before.
if (fullTableRowCount == 0) {
final Table t = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName());
final MetaTable metaTable = t.getMetaTable();
// For condition null, we get full table stats.
com.mapr.db.scan.ScanStats stats = metaTable.getScanStats();
fullTableRowCount = stats.getEstimatedNumRows();
fullTableEstimatedSize = stats.getEstimatedSize();
// If we get 0 rowCount, fallback to getting rowCount using old admin API.
if (fullTableRowCount == 0) {
PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
final int numColumns = (columns == null || columns.isEmpty() || Utilities.isStarQuery(columns)) ? STAR_COLS : columns.size();
MapRDBTableStats tableStats = new MapRDBTableStats(formatPlugin.getFsConf(), scanSpec.getTableName());
fullTableRowCount = tableStats.getNumRows();
fullTableEstimatedSize = fullTableRowCount * numColumns * avgColumnSize;
}
}
} catch (Exception e) {
throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName() + (scanSpec.getIndexDesc() == null ? "" : (", index: " + scanSpec.getIndexName())), e);
}
}
Aggregations