Search in sources :

Example 11 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.

the class ParquetPartitionDescriptor method createTableScan.

@Override
public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws Exception {
    List<String> newFiles = Lists.newArrayList();
    for (final PartitionLocation location : newPartitionLocation) {
        newFiles.add(location.getEntirePartitionLocation());
    }
    final GroupScan newGroupScan = createNewGroupScan(newFiles, cacheFileRoot, wasAllPartitionsPruned, metaContext);
    return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), newGroupScan, scanRel.getRowType(), scanRel.getColumns(), true);
}
Also used : FileGroupScan(org.apache.drill.exec.physical.base.FileGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel)

Example 12 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by axbaretto.

the class HivePushPartitionFilterIntoScan method getFilterOnProject.

public static final StoragePluginOptimizerRule getFilterOnProject(OptimizerRulesContext optimizerRulesContext, final String defaultPartitionValue) {
    return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.some(DrillProjectRel.class, RelOptHelper.any(DrillScanRel.class))), "HivePushPartitionFilterIntoScan:Filter_On_Project_Hive", optimizerRulesContext) {

        @Override
        public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
            return new HivePartitionDescriptor(settings, (DrillScanRel) scanRel, getOptimizerRulesContext().getManagedBuffer(), defaultPartitionValue);
        }

        @Override
        public boolean matches(RelOptRuleCall call) {
            final DrillScanRel scan = (DrillScanRel) call.rel(2);
            GroupScan groupScan = scan.getGroupScan();
            // this rule is applicable only for Hive based partition pruning
            if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
                return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
            } else {
                return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown();
            }
        }

        @Override
        public void onMatch(RelOptRuleCall call) {
            final DrillFilterRel filterRel = call.rel(0);
            final DrillProjectRel projectRel = call.rel(1);
            final DrillScanRel scanRel = call.rel(2);
            doOnMatch(call, filterRel, projectRel, scanRel);
        }
    };
}
Also used : GroupScan(org.apache.drill.exec.physical.base.GroupScan) TableScan(org.apache.calcite.rel.core.TableScan) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) PruneScanRule(org.apache.drill.exec.planner.logical.partition.PruneScanRule) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillProjectRel(org.apache.drill.exec.planner.logical.DrillProjectRel) HiveScan(org.apache.drill.exec.store.hive.HiveScan) DrillFilterRel(org.apache.drill.exec.planner.logical.DrillFilterRel) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) HivePartitionDescriptor(org.apache.drill.exec.planner.sql.HivePartitionDescriptor)

Example 13 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by axbaretto.

the class HivePushPartitionFilterIntoScan method getFilterOnScan.

public static final StoragePluginOptimizerRule getFilterOnScan(OptimizerRulesContext optimizerRulesContext, final String defaultPartitionValue) {
    return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.any(DrillScanRel.class)), "HivePushPartitionFilterIntoScan:Filter_On_Scan_Hive", optimizerRulesContext) {

        @Override
        public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
            return new HivePartitionDescriptor(settings, (DrillScanRel) scanRel, getOptimizerRulesContext().getManagedBuffer(), defaultPartitionValue);
        }

        @Override
        public boolean matches(RelOptRuleCall call) {
            final DrillScanRel scan = (DrillScanRel) call.rel(1);
            GroupScan groupScan = scan.getGroupScan();
            // this rule is applicable only for Hive based partition pruning
            if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
                return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
            } else {
                return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown();
            }
        }

        @Override
        public void onMatch(RelOptRuleCall call) {
            final DrillFilterRel filterRel = call.rel(0);
            final DrillScanRel scanRel = call.rel(1);
            doOnMatch(call, filterRel, null, scanRel);
        }
    };
}
Also used : GroupScan(org.apache.drill.exec.physical.base.GroupScan) TableScan(org.apache.calcite.rel.core.TableScan) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) PruneScanRule(org.apache.drill.exec.planner.logical.partition.PruneScanRule) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) HiveScan(org.apache.drill.exec.store.hive.HiveScan) DrillFilterRel(org.apache.drill.exec.planner.logical.DrillFilterRel) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) HivePartitionDescriptor(org.apache.drill.exec.planner.sql.HivePartitionDescriptor)

Example 14 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by axbaretto.

the class ParquetPruneScanRule method getFilterOnProjectParquet.

public static final RelOptRule getFilterOnProjectParquet(OptimizerRulesContext optimizerRulesContext) {
    return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.some(DrillProjectRel.class, RelOptHelper.any(DrillScanRel.class))), "PruneScanRule:Filter_On_Project_Parquet", optimizerRulesContext) {

        @Override
        public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
            return new ParquetPartitionDescriptor(settings, (DrillScanRel) scanRel);
        }

        @Override
        public boolean matches(RelOptRuleCall call) {
            final DrillScanRel scan = call.rel(2);
            GroupScan groupScan = scan.getGroupScan();
            // this rule is applicable only for parquet based partition pruning
            if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
                return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
            } else {
                return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown();
            }
        }

        @Override
        public void onMatch(RelOptRuleCall call) {
            final DrillFilterRel filterRel = call.rel(0);
            final DrillProjectRel projectRel = call.rel(1);
            final DrillScanRel scanRel = call.rel(2);
            doOnMatch(call, filterRel, projectRel, scanRel);
        }
    };
}
Also used : FileGroupScan(org.apache.drill.exec.physical.base.FileGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) TableScan(org.apache.calcite.rel.core.TableScan) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillProjectRel(org.apache.drill.exec.planner.logical.DrillProjectRel) DrillFilterRel(org.apache.drill.exec.planner.logical.DrillFilterRel) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) ParquetPartitionDescriptor(org.apache.drill.exec.planner.ParquetPartitionDescriptor)

Example 15 with GroupScan

use of org.apache.drill.exec.physical.base.GroupScan in project drill by axbaretto.

the class ConvertCountToDirectScan method collectCounts.

/**
 * Collects counts for each aggregation call.
 * Will return empty result map if was not able to determine count for at least one aggregation call,
 *
 * For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
 * For star, not null expressions and implicit columns sets count to total record number.
 * For other cases obtains counts from group scan operator. Also count can not be calculated for parition columns.
 *
 * @param agg aggregate relational expression
 * @param scan scan relational expression
 * @param project project relational expression
 * @return result map where key is count column name, value is count value
 */
private Map<String, Long> collectCounts(PlannerSettings settings, DrillAggregateRel agg, DrillScanRel scan, DrillProjectRel project) {
    final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
    final GroupScan oldGrpScan = scan.getGroupScan();
    final long totalRecordCount = oldGrpScan.getScanStats(settings).getRecordCount();
    final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
    for (int i = 0; i < agg.getAggCallList().size(); i++) {
        AggregateCall aggCall = agg.getAggCallList().get(i);
        // for (AggregateCall aggCall : agg.getAggCallList()) {
        long cnt;
        // rule can be applied only for count function, return empty counts
        if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
            return ImmutableMap.of();
        }
        if (containsStarOrNotNullInput(aggCall, agg)) {
            cnt = totalRecordCount;
        } else if (aggCall.getArgList().size() == 1) {
            // count(columnName) ==> Agg ( Scan )) ==> columnValueCount
            int index = aggCall.getArgList().get(0);
            if (project != null) {
                // return count of "col2" in Scan's metadata, if found.
                if (!(project.getProjects().get(index) instanceof RexInputRef)) {
                    // do not apply for all other cases.
                    return ImmutableMap.of();
                }
                index = ((RexInputRef) project.getProjects().get(index)).getIndex();
            }
            String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
            // for implicit column count will the same as total record count
            if (implicitColumnsNames.contains(columnName)) {
                cnt = totalRecordCount;
            } else {
                SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
                if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
                    return ImmutableMap.of();
                }
                cnt = oldGrpScan.getColumnValueCount(simplePath);
                if (cnt == GroupScan.NO_COLUMN_STATS) {
                    // if column stats is not available don't apply this rule, return empty counts
                    return ImmutableMap.of();
                }
            }
        } else {
            return ImmutableMap.of();
        }
        String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
        result.put(name, cnt);
    }
    return ImmutableMap.copyOf(result);
}
Also used : MetadataDirectGroupScan(org.apache.drill.exec.store.direct.MetadataDirectGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SchemaPath(org.apache.drill.common.expression.SchemaPath) RexInputRef(org.apache.calcite.rex.RexInputRef) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

GroupScan (org.apache.drill.exec.physical.base.GroupScan)33 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)19 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)12 TableScan (org.apache.calcite.rel.core.TableScan)10 RexNode (org.apache.calcite.rex.RexNode)9 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)8 RelNode (org.apache.calcite.rel.RelNode)8 DrillFilterRel (org.apache.drill.exec.planner.logical.DrillFilterRel)8 LogicalExpression (org.apache.drill.common.expression.LogicalExpression)7 SchemaPath (org.apache.drill.common.expression.SchemaPath)6 DrillProjectRel (org.apache.drill.exec.planner.logical.DrillProjectRel)6 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)5 DrillParseContext (org.apache.drill.exec.planner.logical.DrillParseContext)5 ScanPrel (org.apache.drill.exec.planner.physical.ScanPrel)5 AggregateCall (org.apache.calcite.rel.core.AggregateCall)4 RelDataType (org.apache.calcite.rel.type.RelDataType)4 DbGroupScan (org.apache.drill.exec.physical.base.DbGroupScan)4 FileGroupScan (org.apache.drill.exec.physical.base.FileGroupScan)4 ParquetPartitionDescriptor (org.apache.drill.exec.planner.ParquetPartitionDescriptor)4