Search in sources :

Example 1 with MetaTable

use of com.mapr.db.MetaTable in project drill by apache.

the class JsonTableGroupScan method getRegionsToScan.

protected NavigableMap<TabletFragmentInfo, String> getRegionsToScan(int scanRangeSizeMB) {
    // If regionsToScan already computed, just return.
    double estimatedRowCount = ROWCOUNT_UNKNOWN;
    if (doNotAccessRegionsToScan == null) {
        final Table t = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName());
        final MetaTable metaTable = t.getMetaTable();
        QueryCondition scanSpecCondition = scanSpec.getCondition();
        List<ScanRange> scanRanges = (scanSpecCondition == null) ? metaTable.getScanRanges(scanRangeSizeMB) : metaTable.getScanRanges(scanSpecCondition, scanRangeSizeMB);
        logger.debug("getRegionsToScan() with scanSpec {}: table={}, index={}, condition={}, sizeMB={}, #ScanRanges={}", System.identityHashCode(scanSpec), scanSpec.getTableName(), scanSpec.getIndexName(), scanSpec.getCondition() == null ? "null" : scanSpec.getCondition(), scanRangeSizeMB, scanRanges == null ? "null" : scanRanges.size());
        final TreeMap<TabletFragmentInfo, String> regionsToScan = new TreeMap<>();
        if (isIndexScan()) {
            String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName());
            if (stats.isStatsAvailable()) {
                estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier);
            }
        } else {
            if (stats.isStatsAvailable()) {
                estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), null);
            }
        }
        // If limit pushdown has occurred - factor it in the rowcount
        if (this.maxRecordsToRead > 0) {
            estimatedRowCount = Math.min(estimatedRowCount, this.maxRecordsToRead);
        }
        // If the estimated row count > 0 then scan ranges must be > 0
        Preconditions.checkState(estimatedRowCount == ROWCOUNT_UNKNOWN || estimatedRowCount == 0 || (scanRanges != null && scanRanges.size() > 0), String.format("#Scan ranges should be greater than 0 since estimated rowcount=[%f]", estimatedRowCount));
        if (scanRanges != null && scanRanges.size() > 0) {
            // set the start-row of the scanspec as the start-row of the first scan range
            ScanRange firstRange = scanRanges.get(0);
            QueryCondition firstCondition = firstRange.getCondition();
            byte[] firstStartRow = ((ConditionImpl) firstCondition).getRowkeyRanges().get(0).getStartRow();
            scanSpec.setStartRow(firstStartRow);
            // set the stop-row of ScanSpec as the stop-row of the last scan range
            ScanRange lastRange = scanRanges.get(scanRanges.size() - 1);
            QueryCondition lastCondition = lastRange.getCondition();
            List<RowkeyRange> rowkeyRanges = ((ConditionImpl) lastCondition).getRowkeyRanges();
            byte[] lastStopRow = rowkeyRanges.get(rowkeyRanges.size() - 1).getStopRow();
            scanSpec.setStopRow(lastStopRow);
            for (ScanRange range : scanRanges) {
                TabletInfoImpl tabletInfoImpl = (TabletInfoImpl) range;
                regionsToScan.put(new TabletFragmentInfo(tabletInfoImpl), range.getLocations()[0]);
            }
        }
        setRegionsToScan(regionsToScan);
    }
    return doNotAccessRegionsToScan;
}
Also used : TabletInfoImpl(com.mapr.db.impl.TabletInfoImpl) Table(com.mapr.db.Table) MetaTable(com.mapr.db.MetaTable) ScanRange(com.mapr.db.scan.ScanRange) TreeMap(java.util.TreeMap) ConditionImpl(com.mapr.db.impl.ConditionImpl) MetaTable(com.mapr.db.MetaTable) QueryCondition(org.ojai.store.QueryCondition) TabletFragmentInfo(org.apache.drill.exec.store.mapr.db.TabletFragmentInfo) RowkeyRange(com.mapr.db.impl.ConditionNode.RowkeyRange)

Example 2 with MetaTable

use of com.mapr.db.MetaTable in project drill by apache.

the class JsonTableGroupScan method getFirstKeyEstimatedStatsInternal.

/**
 * Get the estimated statistics after applying the {@link QueryCondition} condition
 * @param condition filter to apply
 * @param index to use for generating the estimate
 * @param scanRel the current scan rel
 * @return {@link MapRDBStatisticsPayload} statistics
 */
private MapRDBStatisticsPayload getFirstKeyEstimatedStatsInternal(QueryCondition condition, IndexDesc index, RelNode scanRel) {
    // If no index is specified, get it from the primary table
    if (index == null && scanSpec.isSecondaryIndex()) {
        // table = MapRDB.getTable(scanSpec.getPrimaryTablePath());
        throw new UnsupportedOperationException("getFirstKeyEstimatedStats should be invoked on primary table");
    }
    // Get the index table or primary table and use the DB API to get the estimated number of rows. For size estimates,
    // we assume that all the columns would be read from the disk.
    final Table table = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), index, getUserName());
    if (table != null) {
        // Factor reflecting confidence in the DB estimates. If a table has few tablets, the tablet-level stats
        // might be off. The decay scalingFactor will reduce estimates when one tablet represents a significant percentage
        // of the entire table.
        double scalingFactor = 1.0;
        boolean isFullScan = false;
        final MetaTable metaTable = table.getMetaTable();
        com.mapr.db.scan.ScanStats stats = (condition == null) ? metaTable.getScanStats() : metaTable.getScanStats(condition);
        if (index == null && condition != null) {
            // Given table condition might not be on leading column. Check if the rowcount matches full table rows.
            // In that case no leading key present or does not prune enough. Treat it like so.
            com.mapr.db.scan.ScanStats noConditionPTabStats = metaTable.getScanStats();
            if (stats.getEstimatedNumRows() == noConditionPTabStats.getEstimatedNumRows()) {
                isFullScan = true;
            }
        }
        // should be selected. So the scalingFactor should not reduce the returned rows
        if (condition != null && !isFullScan) {
            double forcedScalingFactor = PrelUtil.getSettings(scanRel.getCluster()).getIndexStatsRowCountScalingFactor();
            // For 2 or less matching tablets, the error is assumed to be 50%. The Sqrt gives the decaying scalingFactor
            if (stats.getTabletCount() > 2) {
                double accuracy = 1.0 - (2.0 / stats.getTabletCount());
                scalingFactor = Math.min(1.0, 1.0 / Math.sqrt(1.0 / accuracy));
            } else {
                scalingFactor = 0.5;
            }
            if (forcedScalingFactor < 1.0 && metaTable.getScanStats().getTabletCount() < PluginConstants.JSON_TABLE_NUM_TABLETS_PER_INDEX_DEFAULT) {
                // User forced confidence scalingFactor for small tables (assumed as less than 32 tablets (~512 MB))
                scalingFactor = forcedScalingFactor;
            }
        }
        logger.info("index_plan_info: getEstimatedRowCount obtained from DB Client for {}: indexName: {}, indexInfo: {}, " + "condition: {} rowCount: {}, avgRowSize: {}, estimatedSize {}, tabletCount {}, totalTabletCount {}, " + "scalingFactor {}", this, (index == null ? "null" : index.getIndexName()), (index == null ? "null" : index.getIndexInfo()), (condition == null ? "null" : condition.toString()), stats.getEstimatedNumRows(), (stats.getEstimatedNumRows() == 0 ? 0 : stats.getEstimatedSize() / stats.getEstimatedNumRows()), stats.getEstimatedSize(), stats.getTabletCount(), metaTable.getScanStats().getTabletCount(), scalingFactor);
        return new MapRDBStatisticsPayload(scalingFactor * stats.getEstimatedNumRows(), scalingFactor * stats.getEstimatedNumRows(), ((stats.getEstimatedNumRows() == 0 ? 0 : (double) stats.getEstimatedSize() / stats.getEstimatedNumRows())));
    } else {
        logger.info("index_plan_info: getEstimatedRowCount: {} indexName: {}, indexInfo: {}, " + "condition: {} rowCount: UNKNOWN, avgRowSize: UNKNOWN", this, (index == null ? "null" : index.getIndexName()), (index == null ? "null" : index.getIndexInfo()), (condition == null ? "null" : condition.toString()));
        return new MapRDBStatisticsPayload(ROWCOUNT_UNKNOWN, ROWCOUNT_UNKNOWN, AVG_ROWSIZE_UNKNOWN);
    }
}
Also used : Table(com.mapr.db.Table) MetaTable(com.mapr.db.MetaTable) MapRDBStatisticsPayload(org.apache.drill.exec.planner.index.MapRDBStatisticsPayload) MetaTable(com.mapr.db.MetaTable)

Example 3 with MetaTable

use of com.mapr.db.MetaTable in project drill by apache.

the class JsonTableGroupScan method getAverageRowSizeStats.

/**
 * Get the estimated average rowsize. DO NOT call this API directly.
 * Call the stats API instead which modifies the counts based on preference options.
 * @param index to use for generating the estimate
 * @return row count post filtering
 */
public MapRDBStatisticsPayload getAverageRowSizeStats(IndexDescriptor index) {
    IndexDesc indexDesc = null;
    double avgRowSize = AVG_ROWSIZE_UNKNOWN;
    if (index != null) {
        indexDesc = (IndexDesc) ((MapRDBIndexDescriptor) index).getOriginalDesc();
    }
    // If no index is specified, get it from the primary table
    if (indexDesc == null && scanSpec.isSecondaryIndex()) {
        throw new UnsupportedOperationException("getAverageRowSizeStats should be invoked on primary table");
    }
    // Get the index table or primary table and use the DB API to get the estimated number of rows. For size estimates,
    // we assume that all the columns would be read from the disk.
    final Table table = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), indexDesc, getUserName());
    if (table != null) {
        final MetaTable metaTable = table.getMetaTable();
        if (metaTable != null) {
            avgRowSize = metaTable.getAverageRowSize();
        }
    }
    logger.debug("index_plan_info: getEstimatedRowCount obtained from DB Client for {}: indexName: {}, indexInfo: {}, " + "avgRowSize: {}, estimatedSize {}", this, (indexDesc == null ? "null" : indexDesc.getIndexName()), (indexDesc == null ? "null" : indexDesc.getIndexInfo()), avgRowSize, fullTableEstimatedSize);
    return new MapRDBStatisticsPayload(ROWCOUNT_UNKNOWN, ROWCOUNT_UNKNOWN, avgRowSize);
}
Also used : Table(com.mapr.db.Table) MetaTable(com.mapr.db.MetaTable) MapRDBStatisticsPayload(org.apache.drill.exec.planner.index.MapRDBStatisticsPayload) IndexDesc(com.mapr.db.index.IndexDesc) MetaTable(com.mapr.db.MetaTable) MapRDBIndexDescriptor(org.apache.drill.exec.planner.index.MapRDBIndexDescriptor)

Example 4 with MetaTable

use of com.mapr.db.MetaTable in project drill by apache.

the class JsonTableGroupScan method init.

private void init() {
    try {
        // Get the fullTableRowCount only once i.e. if not already obtained before.
        if (fullTableRowCount == 0) {
            final Table t = this.formatPlugin.getJsonTableCache().getTable(scanSpec.getTableName(), scanSpec.getIndexDesc(), getUserName());
            final MetaTable metaTable = t.getMetaTable();
            // For condition null, we get full table stats.
            com.mapr.db.scan.ScanStats stats = metaTable.getScanStats();
            fullTableRowCount = stats.getEstimatedNumRows();
            fullTableEstimatedSize = stats.getEstimatedSize();
            // If we get 0 rowCount, fallback to getting rowCount using old admin API.
            if (fullTableRowCount == 0) {
                PluginCost pluginCostModel = formatPlugin.getPluginCostModel();
                final int avgColumnSize = pluginCostModel.getAverageColumnSize(this);
                final int numColumns = (columns == null || columns.isEmpty() || Utilities.isStarQuery(columns)) ? STAR_COLS : columns.size();
                MapRDBTableStats tableStats = new MapRDBTableStats(formatPlugin.getFsConf(), scanSpec.getTableName());
                fullTableRowCount = tableStats.getNumRows();
                fullTableEstimatedSize = fullTableRowCount * numColumns * avgColumnSize;
            }
        }
    } catch (Exception e) {
        throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName() + (scanSpec.getIndexDesc() == null ? "" : (", index: " + scanSpec.getIndexName())), e);
    }
}
Also used : MapRDBTableStats(org.apache.drill.exec.store.mapr.db.MapRDBTableStats) Table(com.mapr.db.Table) MetaTable(com.mapr.db.MetaTable) PluginCost(org.apache.drill.exec.planner.cost.PluginCost) MetaTable(com.mapr.db.MetaTable) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) ExecutionSetupException(org.apache.drill.common.exceptions.ExecutionSetupException)

Aggregations

MetaTable (com.mapr.db.MetaTable)4 Table (com.mapr.db.Table)4 MapRDBStatisticsPayload (org.apache.drill.exec.planner.index.MapRDBStatisticsPayload)2 ConditionImpl (com.mapr.db.impl.ConditionImpl)1 RowkeyRange (com.mapr.db.impl.ConditionNode.RowkeyRange)1 TabletInfoImpl (com.mapr.db.impl.TabletInfoImpl)1 IndexDesc (com.mapr.db.index.IndexDesc)1 ScanRange (com.mapr.db.scan.ScanRange)1 TreeMap (java.util.TreeMap)1 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)1 ExecutionSetupException (org.apache.drill.common.exceptions.ExecutionSetupException)1 PluginCost (org.apache.drill.exec.planner.cost.PluginCost)1 MapRDBIndexDescriptor (org.apache.drill.exec.planner.index.MapRDBIndexDescriptor)1 MapRDBTableStats (org.apache.drill.exec.store.mapr.db.MapRDBTableStats)1 TabletFragmentInfo (org.apache.drill.exec.store.mapr.db.TabletFragmentInfo)1 QueryCondition (org.ojai.store.QueryCondition)1