Search in sources :

Example 6 with DrillScanRel

use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.

the class ParquetPruneScanRule method getFilterOnProjectParquet.

public static final RelOptRule getFilterOnProjectParquet(OptimizerRulesContext optimizerRulesContext) {
    return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.some(DrillProjectRel.class, RelOptHelper.any(DrillScanRel.class))), "PruneScanRule:Filter_On_Project_Parquet", optimizerRulesContext) {

        @Override
        public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
            return new ParquetPartitionDescriptor(settings, (DrillScanRel) scanRel);
        }

        @Override
        public boolean matches(RelOptRuleCall call) {
            final DrillScanRel scan = call.rel(2);
            GroupScan groupScan = scan.getGroupScan();
            // this rule is applicable only for parquet based partition pruning
            if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
                return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
            } else {
                return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown();
            }
        }

        @Override
        public void onMatch(RelOptRuleCall call) {
            final DrillFilterRel filterRel = call.rel(0);
            final DrillProjectRel projectRel = call.rel(1);
            final DrillScanRel scanRel = call.rel(2);
            doOnMatch(call, filterRel, projectRel, scanRel);
        }
    };
}
Also used : FileGroupScan(org.apache.drill.exec.physical.base.FileGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) TableScan(org.apache.calcite.rel.core.TableScan) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillProjectRel(org.apache.drill.exec.planner.logical.DrillProjectRel) DrillFilterRel(org.apache.drill.exec.planner.logical.DrillFilterRel) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) ParquetPartitionDescriptor(org.apache.drill.exec.planner.ParquetPartitionDescriptor)

Example 7 with DrillScanRel

use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.

the class ConvertCountToDirectScan method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final DrillAggregateRel agg = (DrillAggregateRel) call.rel(0);
    final DrillScanRel scan = (DrillScanRel) call.rel(call.rels.length - 1);
    final DrillProjectRel proj = call.rels.length == 3 ? (DrillProjectRel) call.rel(1) : null;
    final GroupScan oldGrpScan = scan.getGroupScan();
    final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
    //    4) No distinct agg call.
    if (!(oldGrpScan.getScanStats(settings).getGroupScanProperty().hasExactRowCount() && agg.getGroupCount() == 0 && agg.getAggCallList().size() == 1 && !agg.containsDistinctCall())) {
        return;
    }
    AggregateCall aggCall = agg.getAggCallList().get(0);
    if (aggCall.getAggregation().getName().equals("COUNT")) {
        long cnt = 0;
        //  count(Not-null-input) ==> rowCount
        if (aggCall.getArgList().isEmpty() || (aggCall.getArgList().size() == 1 && !agg.getInput().getRowType().getFieldList().get(aggCall.getArgList().get(0).intValue()).getType().isNullable())) {
            cnt = (long) oldGrpScan.getScanStats(settings).getRecordCount();
        } else if (aggCall.getArgList().size() == 1) {
            // count(columnName) ==> Agg ( Scan )) ==> columnValueCount
            int index = aggCall.getArgList().get(0);
            if (proj != null) {
                if (proj.getProjects().get(index) instanceof RexInputRef) {
                    index = ((RexInputRef) proj.getProjects().get(index)).getIndex();
                } else {
                    // do not apply for all other cases.
                    return;
                }
            }
            String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
            cnt = oldGrpScan.getColumnValueCount(SchemaPath.getSimplePath(columnName));
            if (cnt == GroupScan.NO_COLUMN_STATS) {
                // if column stats are not available don't apply this rule
                return;
            }
        } else {
            // do nothing.
            return;
        }
        RelDataType scanRowType = getCountDirectScanRowType(agg.getCluster().getTypeFactory());
        final ScanPrel newScan = ScanPrel.create(scan, scan.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), getCountDirectScan(cnt), scanRowType);
        List<RexNode> exprs = Lists.newArrayList();
        exprs.add(RexInputRef.of(0, scanRowType));
        final ProjectPrel newProj = new ProjectPrel(agg.getCluster(), agg.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), newScan, exprs, agg.getRowType());
        call.transformTo(newProj);
    }
}
Also used : DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) DrillProjectRel(org.apache.drill.exec.planner.logical.DrillProjectRel) DrillAggregateRel(org.apache.drill.exec.planner.logical.DrillAggregateRel) RelDataType(org.apache.calcite.rel.type.RelDataType) DirectGroupScan(org.apache.drill.exec.store.direct.DirectGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Example 8 with DrillScanRel

use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.

the class ParquetPartitionDescriptor method createTableScan.

@Override
public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws Exception {
    List<String> newFiles = Lists.newArrayList();
    for (final PartitionLocation location : newPartitionLocation) {
        newFiles.add(location.getEntirePartitionLocation());
    }
    final GroupScan newGroupScan = createNewGroupScan(newFiles, cacheFileRoot, wasAllPartitionsPruned, metaContext);
    return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), newGroupScan, scanRel.getRowType(), scanRel.getColumns(), true);
}
Also used : FileGroupScan(org.apache.drill.exec.physical.base.FileGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel)

Example 9 with DrillScanRel

use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.

the class FileSystemPartitionDescriptor method getFileLocationsAndStatus.

protected Pair<Collection<String>, Boolean> getFileLocationsAndStatus() {
    Collection<String> fileLocations = null;
    Pair<Collection<String>, Boolean> fileLocationsAndStatus = null;
    boolean isExpandedPartial = false;
    if (scanRel instanceof DrillScanRel) {
        // If a particular GroupScan provides files, get the list of files from there rather than
        // DrillTable because GroupScan would have the updated version of the selection
        final DrillScanRel drillScan = (DrillScanRel) scanRel;
        if (drillScan.getGroupScan().hasFiles()) {
            fileLocations = drillScan.getGroupScan().getFiles();
            isExpandedPartial = false;
        } else {
            FileSelection selection = ((FormatSelection) table.getSelection()).getSelection();
            fileLocations = selection.getFiles();
            isExpandedPartial = selection.isExpandedPartial();
        }
    } else if (scanRel instanceof EnumerableTableScan) {
        FileSelection selection = ((FormatSelection) table.getSelection()).getSelection();
        fileLocations = selection.getFiles();
        isExpandedPartial = selection.isExpandedPartial();
    }
    fileLocationsAndStatus = Pair.of(fileLocations, isExpandedPartial);
    return fileLocationsAndStatus;
}
Also used : FileSelection(org.apache.drill.exec.store.dfs.FileSelection) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) EnumerableTableScan(org.apache.calcite.adapter.enumerable.EnumerableTableScan) DirPrunedEnumerableTableScan(org.apache.drill.exec.planner.logical.DirPrunedEnumerableTableScan) Collection(java.util.Collection) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection)

Example 10 with DrillScanRel

use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.

the class ConvertHiveParquetScanToDrillParquetScan method createNativeScanRel.

/**
   * Helper method which creates a DrillScalRel with native HiveScan.
   */
private DrillScanRel createNativeScanRel(final Map<String, String> partitionColMapping, final DrillScanRel hiveScanRel) throws Exception {
    final RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
    final RelDataType varCharType = typeFactory.createSqlType(SqlTypeName.VARCHAR);
    final List<String> nativeScanColNames = Lists.newArrayList();
    final List<RelDataType> nativeScanColTypes = Lists.newArrayList();
    for (RelDataTypeField field : hiveScanRel.getRowType().getFieldList()) {
        final String dirColName = partitionColMapping.get(field.getName());
        if (dirColName != null) {
            // partition column
            nativeScanColNames.add(dirColName);
            nativeScanColTypes.add(varCharType);
        } else {
            nativeScanColNames.add(field.getName());
            nativeScanColTypes.add(field.getType());
        }
    }
    final RelDataType nativeScanRowType = typeFactory.createStructType(nativeScanColTypes, nativeScanColNames);
    // Create the list of projected columns set in HiveScan. The order of this list may not be same as the order of
    // columns in HiveScan row type. Note: If the HiveScan.getColumn() contains a '*', we just need to add it as it is,
    // unlike above where we expanded the '*'. HiveScan and related (subscan) can handle '*'.
    final List<SchemaPath> nativeScanCols = Lists.newArrayList();
    for (SchemaPath colName : hiveScanRel.getColumns()) {
        final String partitionCol = partitionColMapping.get(colName.getAsUnescapedPath());
        if (partitionCol != null) {
            nativeScanCols.add(SchemaPath.getSimplePath(partitionCol));
        } else {
            nativeScanCols.add(colName);
        }
    }
    final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
    final HiveDrillNativeParquetScan nativeHiveScan = new HiveDrillNativeParquetScan(hiveScan.getUserName(), hiveScan.hiveReadEntry, hiveScan.storagePlugin, nativeScanCols, null);
    return new DrillScanRel(hiveScanRel.getCluster(), hiveScanRel.getTraitSet(), hiveScanRel.getTable(), nativeHiveScan, nativeScanRowType, nativeScanCols);
}
Also used : RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) SchemaPath(org.apache.drill.common.expression.SchemaPath) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) HiveScan(org.apache.drill.exec.store.hive.HiveScan) RelDataType(org.apache.calcite.rel.type.RelDataType) HiveDrillNativeParquetScan(org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan)

Aggregations

DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)12 GroupScan (org.apache.drill.exec.physical.base.GroupScan)7 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)5 HiveScan (org.apache.drill.exec.store.hive.HiveScan)5 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)4 TableScan (org.apache.calcite.rel.core.TableScan)4 FileGroupScan (org.apache.drill.exec.physical.base.FileGroupScan)4 DrillFilterRel (org.apache.drill.exec.planner.logical.DrillFilterRel)4 DrillProjectRel (org.apache.drill.exec.planner.logical.DrillProjectRel)4 ParquetGroupScan (org.apache.drill.exec.store.parquet.ParquetGroupScan)3 EnumerableTableScan (org.apache.calcite.adapter.enumerable.EnumerableTableScan)2 RelDataType (org.apache.calcite.rel.type.RelDataType)2 ParquetPartitionDescriptor (org.apache.drill.exec.planner.ParquetPartitionDescriptor)2 DirPrunedEnumerableTableScan (org.apache.drill.exec.planner.logical.DirPrunedEnumerableTableScan)2 PruneScanRule (org.apache.drill.exec.planner.logical.partition.PruneScanRule)2 HivePartitionDescriptor (org.apache.drill.exec.planner.sql.HivePartitionDescriptor)2 FileSelection (org.apache.drill.exec.store.dfs.FileSelection)2 FormatSelection (org.apache.drill.exec.store.dfs.FormatSelection)2 Collection (java.util.Collection)1 RelTraitSet (org.apache.calcite.plan.RelTraitSet)1