Search in sources :

Example 26 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class MapRDBPushLimitIntoScan method doPushLimitIntoGroupScan.

protected void doPushLimitIntoGroupScan(RelOptRuleCall call, LimitPrel limit, final ProjectPrel project, ScanPrel scan, GroupScan groupScan) {
    try {
        final GroupScan newGroupScan = getGroupScanWithLimit(groupScan, limit);
        if (newGroupScan == null) {
            return;
        }
        final ScanPrel newScan = new ScanPrel(scan.getCluster(), scan.getTraitSet(), newGroupScan, scan.getRowType(), scan.getTable());
        final RelNode newChild;
        if (project != null) {
            final ProjectPrel newProject = new ProjectPrel(project.getCluster(), project.getTraitSet(), newScan, project.getProjects(), project.getRowType());
            newChild = newProject;
        } else {
            newChild = newScan;
        }
        call.transformTo(newChild);
        logger.debug("pushLimitIntoGroupScan: Converted to a new ScanPrel " + newScan.getGroupScan());
    } catch (Exception e) {
        logger.warn("pushLimitIntoGroupScan: Exception while trying limit pushdown!", e);
    }
}
Also used : RestrictedJsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.RestrictedJsonTableGroupScan) BinaryTableGroupScan(org.apache.drill.exec.store.mapr.db.binary.BinaryTableGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) ProjectPrel(org.apache.drill.exec.planner.physical.ProjectPrel) ScanPrel(org.apache.drill.exec.planner.physical.ScanPrel) RelNode(org.apache.calcite.rel.RelNode)

Example 27 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class MapRDBStatistics method populateStats.

/**
 * This is the core statistics function for populating the statistics. The statistics populated correspond to the query
 * condition. Based on different types of plans, we would need statistics for different combinations of predicates. Currently,
 * we do not have a tree-walker for {@link QueryCondition}. Hence, instead of using the individual predicates stats, to construct
 * the stats for the overall predicates, we rely on using the final predicates. Hence, this has a limitation(susceptible) to
 * predicate modification post stats generation. Statistics computed/stored are rowcounts, leading rowcounts, average rowsize.
 * Rowcounts and leading rowcounts (i.e. corresponding to predicates on the leading index columns) are stored in the statsCache.
 * Average rowsizes are stored in the fiStatsCache (FI stands for Filter Independent).
 *
 * @param condition - The condition for which to obtain statistics
 * @param indexes - The collection of indexes to use for getting statistics
 * @param scanRel - The current scanRel
 * @param context - The index plan call context
 */
private void populateStats(RexNode condition, IndexCollection indexes, DrillScanRelBase scanRel, IndexCallContext context) {
    JsonTableGroupScan jTabGrpScan;
    Map<IndexDescriptor, IndexConditionInfo> firstKeyIdxConditionMap;
    Map<IndexDescriptor, IndexConditionInfo> idxConditionMap;
    /* Map containing the individual base conditions of an ANDed/ORed condition and their selectivities.
     * This is used to compute the overall selectivity of a complex ANDed/ORed condition using its base
     * conditions. Helps prevent over/under estimates and guessed selectivity for ORed predicates.
     */
    Map<String, Double> baseConditionMap;
    GroupScan grpScan = IndexPlanUtils.getGroupScan(scanRel);
    if ((scanRel instanceof DrillScanRel || scanRel instanceof ScanPrel) && grpScan instanceof JsonTableGroupScan) {
        jTabGrpScan = (JsonTableGroupScan) grpScan;
    } else {
        logger.debug("Statistics: populateStats exit early - not an instance of JsonTableGroupScan!");
        return;
    }
    if (condition == null) {
        populateStatsForNoFilter(jTabGrpScan, indexes, scanRel, context);
        statsAvailable = true;
        return;
    }
    RexBuilder builder = scanRel.getCluster().getRexBuilder();
    PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster());
    // Get the stats payload for full table (has total rows in the table)
    StatisticsPayload ftsPayload = jTabGrpScan.getFirstKeyEstimatedStats(null, null, scanRel);
    // Get the average row size for table and all indexes
    addToCache(null, jTabGrpScan.getAverageRowSizeStats(null), ftsPayload);
    if (ftsPayload == null || ftsPayload.getRowCount() == 0) {
        return;
    }
    for (IndexDescriptor idx : indexes) {
        StatisticsPayload idxRowSizePayload = jTabGrpScan.getAverageRowSizeStats(idx);
        addToCache(idx, idxRowSizePayload, ftsPayload);
    }
    /* Only use indexes with distinct first key */
    IndexCollection distFKeyIndexes = distinctFKeyIndexes(indexes, scanRel);
    IndexConditionInfo.Builder infoBuilder = IndexConditionInfo.newBuilder(condition, distFKeyIndexes, builder, scanRel);
    idxConditionMap = infoBuilder.getIndexConditionMap();
    firstKeyIdxConditionMap = infoBuilder.getFirstKeyIndexConditionMap();
    baseConditionMap = new HashMap<>();
    for (IndexDescriptor idx : firstKeyIdxConditionMap.keySet()) {
        if (IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
            continue;
        }
        RexNode idxCondition = firstKeyIdxConditionMap.get(idx).indexCondition;
        /* Use the pre-processed condition only for getting actual statistic from MapR-DB APIs. Use the
       * original condition everywhere else (cache store/lookups) since the RexNode condition and its
       * corresponding QueryCondition will be used to get statistics. e.g. we convert LIKE into RANGE
       * condition to get statistics. However, statistics are always asked for LIKE and NOT the RANGE
       */
        RexNode preProcIdxCondition = convertToStatsCondition(idxCondition, idx, context, scanRel, Arrays.asList(SqlKind.CAST, SqlKind.LIKE));
        RelDataType newRowType;
        FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
        if (functionInfo.hasFunctional()) {
            newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
        } else {
            newRowType = scanRel.getRowType();
        }
        QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(preProcIdxCondition, newRowType, settings, builder));
        // Cap rows/size at total rows in case of issues with DB APIs
        StatisticsPayload idxPayload = jTabGrpScan.getFirstKeyEstimatedStats(queryCondition, idx, scanRel);
        double rowCount = Math.min(idxPayload.getRowCount(), ftsPayload.getRowCount());
        double leadingRowCount = Math.min(idxPayload.getLeadingRowCount(), rowCount);
        double avgRowSize = Math.min(idxPayload.getAvgRowSize(), ftsPayload.getAvgRowSize());
        StatisticsPayload payload = new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize);
        addToCache(idxCondition, idx, context, payload, jTabGrpScan, scanRel, newRowType);
        addBaseConditions(idxCondition, payload, false, baseConditionMap, scanRel.getRowType());
    }
    /* Add the row count for index conditions on all indexes. Stats are only computed for leading
     * keys but index conditions can be pushed and would be required for access path costing
     */
    for (IndexDescriptor idx : idxConditionMap.keySet()) {
        if (IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
            continue;
        }
        Map<LogicalExpression, RexNode> leadingPrefixMap = Maps.newHashMap();
        double rowCount, leadingRowCount, avgRowSize;
        RexNode idxCondition = idxConditionMap.get(idx).indexCondition;
        // Ignore conditions which always evaluate to true
        if (idxCondition.isAlwaysTrue()) {
            continue;
        }
        RexNode idxIncColCondition = idxConditionMap.get(idx).remainderCondition;
        RexNode idxRemColCondition = IndexPlanUtils.getLeadingPrefixMap(leadingPrefixMap, idx.getIndexColumns(), infoBuilder, idxCondition);
        RexNode idxLeadColCondition = IndexPlanUtils.getLeadingColumnsFilter(IndexPlanUtils.getLeadingFilters(leadingPrefixMap, idx.getIndexColumns()), builder);
        RexNode idxTotRemColCondition = IndexPlanUtils.getTotalRemainderFilter(idxRemColCondition, idxIncColCondition, builder);
        RexNode idxTotColCondition = IndexPlanUtils.getTotalFilter(idxLeadColCondition, idxTotRemColCondition, builder);
        FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
        RelDataType newRowType = scanRel.getRowType();
        if (functionInfo.hasFunctional()) {
            newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
        }
        /* For non-covering plans we would need the index leading condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxLeadColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        leadingRowCount = rowCount;
        avgRowSize = fIStatsCache.get(buildUniqueIndexIdentifier(idx)).getAvgRowSize();
        addToCache(idxLeadColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* For covering plans we would need the full condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxTotColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        addToCache(idxTotColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* For intersect plans we would need the index condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        addToCache(idxCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* Add the rowCount for condition on only included columns - no leading columns here! */
        if (idxIncColCondition != null) {
            rowCount = ftsPayload.getRowCount() * computeSelectivity(idxIncColCondition, null, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
            addToCache(idxIncColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, rowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        }
    }
    // Add the rowCount for the complete condition - based on table
    double rowCount = ftsPayload.getRowCount() * computeSelectivity(condition, null, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
    // Here, ftsLeadingKey rowcount is based on _id predicates
    StatisticsPayload ftsLeadingKeyPayload = jTabGrpScan.getFirstKeyEstimatedStats(jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(condition, scanRel.getRowType(), settings, builder)), null, scanRel);
    addToCache(condition, null, null, new MapRDBStatisticsPayload(rowCount, ftsLeadingKeyPayload.getRowCount(), ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
    // Add the full table rows while we are at it - represented by <NULL> RexNode, <NULL> QueryCondition.
    // No ftsLeadingKey so leadingKeyRowcount = totalRowCount
    addToCache(null, null, null, new MapRDBStatisticsPayload(ftsPayload.getRowCount(), ftsPayload.getRowCount(), ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
    // mark stats has been statsAvailable
    statsAvailable = true;
}
Also used : DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) ScanPrel(org.apache.drill.exec.planner.physical.ScanPrel) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelDataType(org.apache.calcite.rel.type.RelDataType) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) RexBuilder(org.apache.calcite.rex.RexBuilder) QueryCondition(org.ojai.store.QueryCondition) RexNode(org.apache.calcite.rex.RexNode)

Example 28 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class MapRDBStatistics method initialize.

public boolean initialize(RexNode condition, DrillScanRelBase scanRel, IndexCallContext context) {
    GroupScan scan = IndexPlanUtils.getGroupScan(scanRel);
    PlannerSettings settings = PrelUtil.getPlannerSettings(scanRel.getCluster().getPlanner());
    rowKeyJoinBackIOFactor = settings.getIndexRowKeyJoinCostFactor();
    if (scan instanceof DbGroupScan) {
        String conditionAsStr = convertRexToString(condition, scanRel.getRowType());
        if (statsCache.get(conditionAsStr) == null) {
            IndexCollection indexes = ((DbGroupScan) scan).getSecondaryIndexCollection(scanRel);
            populateStats(condition, indexes, scanRel, context);
            logger.info("index_plan_info: initialize: scanRel #{} and groupScan {} got fulltable {}, statsCache: {}, fiStatsCache: {}", scanRel.getId(), System.identityHashCode(scan), fullTableScanPayload, statsCache, fIStatsCache);
            return true;
        }
    }
    return false;
}
Also used : DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan)

Example 29 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class IcebergPluginImplementor method implement.

@Override
public void implement(PluginProjectRel project) throws IOException {
    visitChild(project.getInput());
    DrillParseContext context = new DrillParseContext(PrelUtil.getPlannerSettings(project.getCluster().getPlanner()));
    RelNode input = project.getInput();
    List<SchemaPath> projects = project.getProjects().stream().map(e -> (SchemaPath) DrillOptiq.toDrill(context, input, e)).collect(Collectors.toList());
    groupScan = groupScan.clone(projects);
}
Also used : Project(org.apache.calcite.rel.core.Project) IcebergGroupScan(org.apache.drill.exec.store.iceberg.IcebergGroupScan) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) Filter(org.apache.calcite.rel.core.Filter) RelSubset(org.apache.calcite.plan.volcano.RelSubset) AbstractPluginImplementor(org.apache.drill.exec.store.plan.AbstractPluginImplementor) BigDecimal(java.math.BigDecimal) DrillOptiq(org.apache.drill.exec.planner.logical.DrillOptiq) Expression(org.apache.iceberg.expressions.Expression) RexNode(org.apache.calcite.rex.RexNode) PluginProjectRel(org.apache.drill.exec.store.plan.rel.PluginProjectRel) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext) PrelUtil(org.apache.drill.exec.planner.physical.PrelUtil) PluginLimitRel(org.apache.drill.exec.store.plan.rel.PluginLimitRel) PluginFilterRel(org.apache.drill.exec.store.plan.rel.PluginFilterRel) Binder(org.apache.iceberg.expressions.Binder) RexLiteral(org.apache.calcite.rex.RexLiteral) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) RelNode(org.apache.calcite.rel.RelNode) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) Collectors(java.util.stream.Collectors) ValidationException(org.apache.iceberg.exceptions.ValidationException) List(java.util.List) DrillLimitRelBase(org.apache.drill.exec.planner.common.DrillLimitRelBase) GroupScan(org.apache.drill.exec.physical.base.GroupScan) Util(org.apache.calcite.util.Util) StoragePluginTableScan(org.apache.drill.exec.store.plan.rel.StoragePluginTableScan) RelNode(org.apache.calcite.rel.RelNode) SchemaPath(org.apache.drill.common.expression.SchemaPath) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext)

Example 30 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class IcebergPluginImplementor method canImplement.

@Override
public boolean canImplement(Filter filter) {
    RexNode condition = filter.getCondition();
    LogicalExpression logicalExpression = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(filter.getCluster().getPlanner())), filter.getInput(), condition);
    Expression expression = logicalExpression.accept(DrillExprToIcebergTranslator.INSTANCE, null);
    if (expression != null) {
        try {
            GroupScan scan = findGroupScan(filter);
            if (scan instanceof IcebergGroupScan) {
                IcebergGroupScan groupScan = (IcebergGroupScan) scan;
                // ensures that expression compatible with table schema
                expression = Binder.bind(groupScan.getTableScan().schema().asStruct(), expression, true);
            } else {
                return false;
            }
        } catch (ValidationException e) {
            return false;
        }
    }
    return expression != null;
}
Also used : IcebergGroupScan(org.apache.drill.exec.store.iceberg.IcebergGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) ValidationException(org.apache.iceberg.exceptions.ValidationException) Expression(org.apache.iceberg.expressions.Expression) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext) IcebergGroupScan(org.apache.drill.exec.store.iceberg.IcebergGroupScan) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

GroupScan (org.apache.drill.exec.physical.base.GroupScan)33 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)19 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)12 TableScan (org.apache.calcite.rel.core.TableScan)10 RexNode (org.apache.calcite.rex.RexNode)9 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)8 RelNode (org.apache.calcite.rel.RelNode)8 DrillFilterRel (org.apache.drill.exec.planner.logical.DrillFilterRel)8 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)6 DrillProjectRel (org.apache.drill.exec.planner.logical.DrillProjectRel)6 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)5 DrillParseContext (org.apache.drill.exec.planner.logical.DrillParseContext)5 ScanPrel (org.apache.drill.exec.planner.physical.ScanPrel)5 AggregateCall (org.apache.calcite.rel.core.AggregateCall)4 RelDataType (org.apache.calcite.rel.type.RelDataType)4 DbGroupScan (org.apache.drill.exec.physical.base.DbGroupScan)4 FileGroupScan (org.apache.drill.exec.physical.base.FileGroupScan)4 ParquetPartitionDescriptor (org.apache.drill.exec.planner.ParquetPartitionDescriptor)4