Search in sources :

Example 1 with PluginCost

use of org.apache.drill.exec.planner.cost.PluginCost in project drill by apache.

the class RestrictedJsonTableGroupScan method getScanStats.

@Override
public ScanStats getScanStats() {
    // TODO: ideally here we should use the rowcount from index scan, and multiply a factor of restricted scan
    double rowCount;
    PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
    final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
    int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size();
    // Get the restricted group scan row count - same as the right side index rows
    rowCount = computeRestrictedScanRowcount();
    // Get the average row size of the primary table
    double avgRowSize = stats.getAvgRowSize(null, true);
    if (avgRowSize == Statistics.AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) {
        avgRowSize = avgColumnSize * numColumns;
    }
    // restricted scan does random lookups and each row may belong to a different block, with the number
    // of blocks upper bounded by the total num blocks in the primary table
    double totalBlocksPrimary = Math.ceil((avgRowSize * fullTableRowCount) / pluginCostModel.getBlockSize(this));
    double numBlocks = Math.min(totalBlocksPrimary, rowCount);
    double diskCost = numBlocks * pluginCostModel.getRandomBlockReadCost(this);
    // For non-covering plans, the dominating cost would be of the join back. Reduce it using the factor
    // for biasing towards non-covering plans.
    diskCost *= stats.getRowKeyJoinBackIOFactor();
    logger.debug("RestrictedJsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", System.identityHashCode(this), rowCount, avgRowSize, numBlocks, totalBlocksPrimary, diskCost);
    return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);
}
Also used : PluginCost(org.apache.drill.exec.planner.cost.PluginCost) ScanStats(org.apache.drill.exec.physical.base.ScanStats)

Example 2 with PluginCost

use of org.apache.drill.exec.planner.cost.PluginCost in project drill by apache.

the class JsonTableGroupScan method fullTableScanStats.

private ScanStats fullTableScanStats() {
    PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
    final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
    final int numColumns = (columns == null || columns.isEmpty()) ? STAR_COLS : columns.size();
    // index will be NULL for FTS
    double rowCount = stats.getRowCount(scanSpec.getCondition(), null);
    // rowcount based on _id predicate. If NO _id predicate present in condition, then the
    // rowcount should be same as totalRowCount. Equality b/w the two rowcounts should not be
    // construed as NO _id predicate since stats are approximate.
    double leadingRowCount = stats.getLeadingRowCount(scanSpec.getCondition(), null);
    double avgRowSize = stats.getAvgRowSize(null, true);
    double totalRowCount = stats.getRowCount(null, null);
    logger.debug("GroupScan {} with stats {}: rowCount={}, condition={}, totalRowCount={}, fullTableRowCount={}", System.identityHashCode(this), System.identityHashCode(stats), rowCount, scanSpec.getCondition() == null ? "null" : scanSpec.getCondition(), totalRowCount, fullTableRowCount);
    // If UNKNOWN, or DB stats sync issues(manifests as 0 rows) use defaults.
    if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0) {
        rowCount = (scanSpec.getSerializedFilter() != null ? .5 : 1) * fullTableRowCount;
    }
    // If limit pushdown has occurred - factor it in the rowcount
    if (this.maxRecordsToRead > 0) {
        rowCount = Math.min(rowCount, this.maxRecordsToRead);
    }
    if (totalRowCount == ROWCOUNT_UNKNOWN || totalRowCount == 0) {
        logger.debug("did not get valid totalRowCount, will take this: {}", fullTableRowCount);
        totalRowCount = fullTableRowCount;
    }
    if (avgRowSize == AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) {
        avgRowSize = fullTableEstimatedSize / fullTableRowCount;
    }
    double totalBlocks = getNumOfBlocks(totalRowCount, fullTableEstimatedSize, avgRowSize, pluginCostModel);
    double numBlocks = Math.min(totalBlocks, getNumOfBlocks(leadingRowCount, fullTableEstimatedSize, avgRowSize, pluginCostModel));
    double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this);
    /*
     * Table scan cost made INFINITE in order to pick index plans. Use the MAX possible rowCount for
     * costing purposes.
     * NOTE: Full table rowCounts are specified with the NULL condition.
     * e.g. forcedRowCountMap<NULL, 1000>
     */
    if (// Forced full table rowcount and it is HUGE
    forcedRowCountMap.get(null) != null && forcedRowCountMap.get(null) == ROWCOUNT_HUGE) {
        rowCount = ROWCOUNT_HUGE;
        diskCost = ROWCOUNT_HUGE;
    }
    logger.debug("JsonGroupScan:{} rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, diskCost:{}", this.getOperatorId(), rowCount, avgRowSize, numBlocks, totalBlocks, diskCost);
    return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);
}
Also used : PluginCost(org.apache.drill.exec.planner.cost.PluginCost) ScanStats(org.apache.drill.exec.physical.base.ScanStats)

Example 3 with PluginCost

use of org.apache.drill.exec.planner.cost.PluginCost in project drill by apache.

the class JsonTableGroupScan method indexScanStats.

private ScanStats indexScanStats() {
    if (!this.getIndexHint().equals("") && this.getIndexHint().equals(getIndexDesc().getIndexName())) {
        logger.debug("JsonIndexGroupScan:{} forcing index {} by making tiny cost", this, this.getIndexHint());
        return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, 1, 1, 0);
    }
    int totalColNum = STAR_COLS;
    PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
    final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
    boolean filterPushed = (scanSpec.getSerializedFilter() != null);
    if (scanSpec != null && scanSpec.getIndexDesc() != null) {
        totalColNum = scanSpec.getIndexDesc().getIncludedFields().size() + scanSpec.getIndexDesc().getIndexedFields().size() + 1;
    }
    int numColumns = (columns == null || columns.isEmpty()) ? totalColNum : columns.size();
    String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName());
    double rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier);
    // rowcount based on index leading columns predicate.
    double leadingRowCount = stats.getLeadingRowCount(scanSpec.getCondition(), idxIdentifier);
    double avgRowSize = stats.getAvgRowSize(idxIdentifier, false);
    // If UNKNOWN, use defaults
    if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0) {
        rowCount = (filterPushed ? 0.0005f : 0.001f) * fullTableRowCount / scanSpec.getIndexDesc().getIndexedFields().size();
    }
    // If limit pushdown has occurred - factor it in the rowcount
    if (this.maxRecordsToRead > 0) {
        rowCount = Math.min(rowCount, this.maxRecordsToRead);
    }
    if (leadingRowCount == ROWCOUNT_UNKNOWN || leadingRowCount == 0) {
        leadingRowCount = rowCount;
    }
    if (avgRowSize == AVG_ROWSIZE_UNKNOWN || avgRowSize == 0) {
        avgRowSize = avgColumnSize * numColumns;
    }
    double rowsFromDisk = leadingRowCount;
    if (!filterPushed) {
        // both start and stop rows are empty, indicating this is a full scan so
        // use the total rows for calculating disk i/o
        rowsFromDisk = fullTableRowCount;
    }
    double totalBlocks = Math.ceil((avgRowSize * fullTableRowCount) / pluginCostModel.getBlockSize(this));
    double numBlocks = Math.ceil(((avgRowSize * rowsFromDisk) / pluginCostModel.getBlockSize(this)));
    numBlocks = Math.min(totalBlocks, numBlocks);
    double diskCost = numBlocks * pluginCostModel.getSequentialBlockReadCost(this);
    logger.debug("index_plan_info: JsonIndexGroupScan:{} - indexName:{}: rowCount:{}, avgRowSize:{}, blocks:{}, totalBlocks:{}, rowsFromDisk {}, diskCost:{}", System.identityHashCode(this), scanSpec.getIndexDesc().getIndexName(), rowCount, avgRowSize, numBlocks, totalBlocks, rowsFromDisk, diskCost);
    return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);
}
Also used : PluginCost(org.apache.drill.exec.planner.cost.PluginCost) ScanStats(org.apache.drill.exec.physical.base.ScanStats)

Example 4 with PluginCost

use of org.apache.drill.exec.planner.cost.PluginCost in project drill by apache.

the class JsonTableGroupScan method init.

private void init() {
    try {
        // Get the fullTableRowCount only once i.e. if not already obtained before.
        if (fullTableRowCount == 0) {
            final Table t = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName());
            final MetaTable metaTable = t.getMetaTable();
            // For condition null, we get full table stats.
            com.mapr.db.scan.ScanStats stats = metaTable.getScanStats();
            fullTableRowCount = stats.getEstimatedNumRows();
            fullTableEstimatedSize = stats.getEstimatedSize();
            // If we get 0 rowCount, fallback to getting rowCount using old admin API.
            if (fullTableRowCount == 0) {
                PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
                final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
                final int numColumns = (columns == null || columns.isEmpty() || Utilities.isStarQuery(columns)) ? STAR_COLS : columns.size();
                MapRDBTableStats tableStats = new MapRDBTableStats(formatPlugin.getFsConf(), scanSpec.getTableName());
                fullTableRowCount = tableStats.getNumRows();
                fullTableEstimatedSize = fullTableRowCount * numColumns * avgColumnSize;
            }
        }
    } catch (Exception e) {
        throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName() + (scanSpec.getIndexDesc() == null ? "" : (", index: " + scanSpec.getIndexName())), e);
    }
}
Also used : MapRDBTableStats(org.apache.drill.exec.store.mapr.db.MapRDBTableStats) Table(com.mapr.db.Table) MetaTable(com.mapr.db.MetaTable) PluginCost(org.apache.drill.exec.planner.cost.PluginCost) MetaTable(com.mapr.db.MetaTable) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Aggregations

PluginCost (org.apache.drill.exec.planner.cost.PluginCost)4 ScanStats (org.apache.drill.exec.physical.base.ScanStats)3 MetaTable (com.mapr.db.MetaTable)1 Table (com.mapr.db.Table)1 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)1 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)1 MapRDBTableStats (org.apache.drill.exec.store.mapr.db.MapRDBTableStats)1