Examples with GroupScan - org.apache.drill.exec.physical.base.GroupScan

Example 26 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class MapRDBPushLimitIntoScan method doPushLimitIntoGroupScan.

protected void doPushLimitIntoGroupScan(RelOptRuleCall call, LimitPrel limit, final ProjectPrel project, ScanPrel scan, GroupScan groupScan) {
    try {
        final GroupScan newGroupScan = getGroupScanWithLimit(groupScan, limit);
        if (newGroupScan == null) {
            return;
        }
        final ScanPrel newScan = new ScanPrel(scan.getCluster(), scan.getTraitSet(), newGroupScan, scan.getRowType(), scan.getTable());
        final RelNode newChild;
        if (project != null) {
            final ProjectPrel newProject = new ProjectPrel(project.getCluster(), project.getTraitSet(), newScan, project.getProjects(), project.getRowType());
            newChild = newProject;
        } else {
            newChild = newScan;
        }
        call.transformTo(newChild);
        logger.debug("pushLimitIntoGroupScan: Converted to a new ScanPrel " + newScan.getGroupScan());
    } catch (Exception e) {
        logger.warn("pushLimitIntoGroupScan: Exception while trying limit pushdown!", e);
    }
}

Also used : RestrictedJsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.RestrictedJsonTableGroupScan) BinaryTableGroupScan(org.apache.drill.exec.store.mapr.db.binary.BinaryTableGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) ProjectPrel(org.apache.drill.exec.planner.physical.ProjectPrel) ScanPrel(org.apache.drill.exec.planner.physical.ScanPrel) RelNode(org.apache.calcite.rel.RelNode)

Example 27 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class MapRDBStatistics method populateStats.

/**
 * This is the core statistics function for populating the statistics. The statistics populated correspond to the query
 * condition. Based on different types of plans, we would need statistics for different combinations of predicates. Currently,
 * we do not have a tree-walker for {@link QueryCondition}. Hence, instead of using the individual predicates stats, to construct
 * the stats for the overall predicates, we rely on using the final predicates. Hence, this has a limitation(susceptible) to
 * predicate modification post stats generation. Statistics computed/stored are rowcounts, leading rowcounts, average rowsize.
 * Rowcounts and leading rowcounts (i.e. corresponding to predicates on the leading index columns) are stored in the statsCache.
 * Average rowsizes are stored in the fiStatsCache (FI stands for Filter Independent).
 *
 * @param condition - The condition for which to obtain statistics
 * @param indexes - The collection of indexes to use for getting statistics
 * @param scanRel - The current scanRel
 * @param context - The index plan call context
 */
private void populateStats(RexNode condition, IndexCollection indexes, DrillScanRelBase scanRel, IndexCallContext context) {
    JsonTableGroupScan jTabGrpScan;
    Map<IndexDescriptor, IndexConditionInfo> firstKeyIdxConditionMap;
    Map<IndexDescriptor, IndexConditionInfo> idxConditionMap;
    /* Map containing the individual base conditions of an ANDed/ORed condition and their selectivities.
     * This is used to compute the overall selectivity of a complex ANDed/ORed condition using its base
     * conditions. Helps prevent over/under estimates and guessed selectivity for ORed predicates.
     */
    Map<String, Double> baseConditionMap;
    GroupScan grpScan = IndexPlanUtils.getGroupScan(scanRel);
    if ((scanRel instanceof DrillScanRel || scanRel instanceof ScanPrel) && grpScan instanceof JsonTableGroupScan) {
        jTabGrpScan = (JsonTableGroupScan) grpScan;
    } else {
        logger.debug("Statistics: populateStats exit early - not an instance of JsonTableGroupScan!");
        return;
    }
    if (condition == null) {
        populateStatsForNoFilter(jTabGrpScan, indexes, scanRel, context);
        statsAvailable = true;
        return;
    }
    RexBuilder builder = scanRel.getCluster().getRexBuilder();
    PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster());
    // Get the stats payload for full table (has total rows in the table)
    StatisticsPayload ftsPayload = jTabGrpScan.getFirstKeyEstimatedStats(null, null, scanRel);
    // Get the average row size for table and all indexes
    addToCache(null, jTabGrpScan.getAverageRowSizeStats(null), ftsPayload);
    if (ftsPayload == null || ftsPayload.getRowCount() == 0) {
        return;
    }
    for (IndexDescriptor idx : indexes) {
        StatisticsPayload idxRowSizePayload = jTabGrpScan.getAverageRowSizeStats(idx);
        addToCache(idx, idxRowSizePayload, ftsPayload);
    }
    /* Only use indexes with distinct first key */
    IndexCollection distFKeyIndexes = distinctFKeyIndexes(indexes, scanRel);
    IndexConditionInfo.Builder infoBuilder = IndexConditionInfo.newBuilder(condition, distFKeyIndexes, builder, scanRel);
    idxConditionMap = infoBuilder.getIndexConditionMap();
    firstKeyIdxConditionMap = infoBuilder.getFirstKeyIndexConditionMap();
    baseConditionMap = new HashMap<>();
    for (IndexDescriptor idx : firstKeyIdxConditionMap.keySet()) {
        if (IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
            continue;
        }
        RexNode idxCondition = firstKeyIdxConditionMap.get(idx).indexCondition;
        /* Use the pre-processed condition only for getting actual statistic from MapR-DB APIs. Use the
       * original condition everywhere else (cache store/lookups) since the RexNode condition and its
       * corresponding QueryCondition will be used to get statistics. e.g. we convert LIKE into RANGE
       * condition to get statistics. However, statistics are always asked for LIKE and NOT the RANGE
       */
        RexNode preProcIdxCondition = convertToStatsCondition(idxCondition, idx, context, scanRel, Arrays.asList(SqlKind.CAST, SqlKind.LIKE));
        RelDataType newRowType;
        FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
        if (functionInfo.hasFunctional()) {
            newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
        } else {
            newRowType = scanRel.getRowType();
        }
        QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(preProcIdxCondition, newRowType, settings, builder));
        // Cap rows/size at total rows in case of issues with DB APIs
        StatisticsPayload idxPayload = jTabGrpScan.getFirstKeyEstimatedStats(queryCondition, idx, scanRel);
        double rowCount = Math.min(idxPayload.getRowCount(), ftsPayload.getRowCount());
        double leadingRowCount = Math.min(idxPayload.getLeadingRowCount(), rowCount);
        double avgRowSize = Math.min(idxPayload.getAvgRowSize(), ftsPayload.getAvgRowSize());
        StatisticsPayload payload = new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize);
        addToCache(idxCondition, idx, context, payload, jTabGrpScan, scanRel, newRowType);
        addBaseConditions(idxCondition, payload, false, baseConditionMap, scanRel.getRowType());
    }
    /* Add the row count for index conditions on all indexes. Stats are only computed for leading
     * keys but index conditions can be pushed and would be required for access path costing
     */
    for (IndexDescriptor idx : idxConditionMap.keySet()) {
        if (IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
            continue;
        }
        Map<LogicalExpression, RexNode> leadingPrefixMap = Maps.newHashMap();
        double rowCount, leadingRowCount, avgRowSize;
        RexNode idxCondition = idxConditionMap.get(idx).indexCondition;
        // Ignore conditions which always evaluate to true
        if (idxCondition.isAlwaysTrue()) {
            continue;
        }
        RexNode idxIncColCondition = idxConditionMap.get(idx).remainderCondition;
        RexNode idxRemColCondition = IndexPlanUtils.getLeadingPrefixMap(leadingPrefixMap, idx.getIndexColumns(), infoBuilder, idxCondition);
        RexNode idxLeadColCondition = IndexPlanUtils.getLeadingColumnsFilter(IndexPlanUtils.getLeadingFilters(leadingPrefixMap, idx.getIndexColumns()), builder);
        RexNode idxTotRemColCondition = IndexPlanUtils.getTotalRemainderFilter(idxRemColCondition, idxIncColCondition, builder);
        RexNode idxTotColCondition = IndexPlanUtils.getTotalFilter(idxLeadColCondition, idxTotRemColCondition, builder);
        FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
        RelDataType newRowType = scanRel.getRowType();
        if (functionInfo.hasFunctional()) {
            newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
        }
        /* For non-covering plans we would need the index leading condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxLeadColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        leadingRowCount = rowCount;
        avgRowSize = fIStatsCache.get(buildUniqueIndexIdentifier(idx)).getAvgRowSize();
        addToCache(idxLeadColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* For covering plans we would need the full condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxTotColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        addToCache(idxTotColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* For intersect plans we would need the index condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        addToCache(idxCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* Add the rowCount for condition on only included columns - no leading columns here! */
        if (idxIncColCondition != null) {
            rowCount = ftsPayload.getRowCount() * computeSelectivity(idxIncColCondition, null, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
            addToCache(idxIncColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, rowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        }
    }
    // Add the rowCount for the complete condition - based on table
    double rowCount = ftsPayload.getRowCount() * computeSelectivity(condition, null, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
    // Here, ftsLeadingKey rowcount is based on _id predicates
    StatisticsPayload ftsLeadingKeyPayload = jTabGrpScan.getFirstKeyEstimatedStats(jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(condition, scanRel.getRowType(), settings, builder)), null, scanRel);
    addToCache(condition, null, null, new MapRDBStatisticsPayload(rowCount, ftsLeadingKeyPayload.getRowCount(), ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
    // Add the full table rows while we are at it - represented by <NULL> RexNode, <NULL> QueryCondition.
    // No ftsLeadingKey so leadingKeyRowcount = totalRowCount
    addToCache(null, null, null, new MapRDBStatisticsPayload(ftsPayload.getRowCount(), ftsPayload.getRowCount(), ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
    // mark stats has been statsAvailable
    statsAvailable = true;
}

Also used : DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) ScanPrel(org.apache.drill.exec.planner.physical.ScanPrel) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelDataType(org.apache.calcite.rel.type.RelDataType) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) RexBuilder(org.apache.calcite.rex.RexBuilder) QueryCondition(org.ojai.store.QueryCondition) RexNode(org.apache.calcite.rex.RexNode)

Example 28 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class MapRDBStatistics method initialize.

public boolean initialize(RexNode condition, DrillScanRelBase scanRel, IndexCallContext context) {
    GroupScan scan = IndexPlanUtils.getGroupScan(scanRel);
    PlannerSettings settings = PrelUtil.getPlannerSettings(scanRel.getCluster().getPlanner());
    rowKeyJoinBackIOFactor = settings.getIndexRowKeyJoinCostFactor();
    if (scan instanceof DbGroupScan) {
        String conditionAsStr = convertRexToString(condition, scanRel.getRowType());
        if (statsCache.get(conditionAsStr) == null) {
            IndexCollection indexes = ((DbGroupScan) scan).getSecondaryIndexCollection(scanRel);
            populateStats(condition, indexes, scanRel, context);
            logger.info("index_plan_info: initialize: scanRel #{} and groupScan {} got fulltable {}, statsCache: {}, fiStatsCache: {}", scanRel.getId(), System.identityHashCode(scan), fullTableScanPayload, statsCache, fIStatsCache);
            return true;
        }
    }
    return false;
}

Also used : DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan)

Example 29 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class IcebergPluginImplementor method implement.

@Override
public void implement(PluginProjectRel project) throws IOException {
    visitChild(project.getInput());
    DrillParseContext context = new DrillParseContext(PrelUtil.getPlannerSettings(project.getCluster().getPlanner()));
    RelNode input = project.getInput();
    List<SchemaPath> projects = project.getProjects().stream().map(e -> (SchemaPath) DrillOptiq.toDrill(context, input, e)).collect(Collectors.toList());
    groupScan = groupScan.clone(projects);
}

Also used : Project(org.apache.calcite.rel.core.Project) IcebergGroupScan(org.apache.drill.exec.store.iceberg.IcebergGroupScan) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) Filter(org.apache.calcite.rel.core.Filter) RelSubset(org.apache.calcite.plan.volcano.RelSubset) AbstractPluginImplementor(org.apache.drill.exec.store.plan.AbstractPluginImplementor) BigDecimal(java.math.BigDecimal) DrillOptiq(org.apache.drill.exec.planner.logical.DrillOptiq) Expression(org.apache.iceberg.expressions.Expression) RexNode(org.apache.calcite.rex.RexNode) PluginProjectRel(org.apache.drill.exec.store.plan.rel.PluginProjectRel) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext) PrelUtil(org.apache.drill.exec.planner.physical.PrelUtil) PluginLimitRel(org.apache.drill.exec.store.plan.rel.PluginLimitRel) PluginFilterRel(org.apache.drill.exec.store.plan.rel.PluginFilterRel) Binder(org.apache.iceberg.expressions.Binder) RexLiteral(org.apache.calcite.rex.RexLiteral) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) RelNode(org.apache.calcite.rel.RelNode) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) Collectors(java.util.stream.Collectors) ValidationException(org.apache.iceberg.exceptions.ValidationException) List(java.util.List) DrillLimitRelBase(org.apache.drill.exec.planner.common.DrillLimitRelBase) GroupScan(org.apache.drill.exec.physical.base.GroupScan) Util(org.apache.calcite.util.Util) StoragePluginTableScan(org.apache.drill.exec.store.plan.rel.StoragePluginTableScan) RelNode(org.apache.calcite.rel.RelNode) SchemaPath(org.apache.drill.common.expression.SchemaPath) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext)

Example 30 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class IcebergPluginImplementor method canImplement.

@Override
public boolean canImplement(Filter filter) {
    RexNode condition = filter.getCondition();
    LogicalExpression logicalExpression = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(filter.getCluster().getPlanner())), filter.getInput(), condition);
    Expression expression = logicalExpression.accept(DrillExprToIcebergTranslator.INSTANCE, null);
    if (expression != null) {
        try {
            GroupScan scan = findGroupScan(filter);
            if (scan instanceof IcebergGroupScan) {
                IcebergGroupScan groupScan = (IcebergGroupScan) scan;
                // ensures that expression compatible with table schema
                expression = Binder.bind(groupScan.getTableScan().schema().asStruct(), expression, true);
            } else {
                return false;
            }
        } catch (ValidationException e) {
            return false;
        }
    }
    return expression != null;
}

Also used : IcebergGroupScan(org.apache.drill.exec.store.iceberg.IcebergGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) ValidationException(org.apache.iceberg.exceptions.ValidationException) Expression(org.apache.iceberg.expressions.Expression) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) DrillParseContext(org.apache.drill.exec.planner.logical.DrillParseContext) IcebergGroupScan(org.apache.drill.exec.store.iceberg.IcebergGroupScan) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

GroupScan (org.apache.drill.exec.physical.base.GroupScan)33 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)19 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)12 TableScan (org.apache.calcite.rel.core.TableScan)10 RexNode (org.apache.calcite.rex.RexNode)9 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)8 RelNode (org.apache.calcite.rel.RelNode)8 DrillFilterRel (org.apache.drill.exec.planner.logical.DrillFilterRel)8 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)6 DrillProjectRel (org.apache.drill.exec.planner.logical.DrillProjectRel)6 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)5 DrillParseContext (org.apache.drill.exec.planner.logical.DrillParseContext)5 ScanPrel (org.apache.drill.exec.planner.physical.ScanPrel)5 AggregateCall (org.apache.calcite.rel.core.AggregateCall)4 RelDataType (org.apache.calcite.rel.type.RelDataType)4 DbGroupScan (org.apache.drill.exec.physical.base.DbGroupScan)4 FileGroupScan (org.apache.drill.exec.physical.base.FileGroupScan)4 ParquetPartitionDescriptor (org.apache.drill.exec.planner.ParquetPartitionDescriptor)4