Search in sources :

Example 51 with TableScan

use of org.apache.calcite.rel.core.TableScan in project drill by apache.

the class FileMetadataInfoCollector method getTableScan.

private TableScan getTableScan(PlannerSettings settings, TableScan scanRel, List<String> scanFiles) {
    FileSystemPartitionDescriptor descriptor = new FileSystemPartitionDescriptor(settings, scanRel);
    List<PartitionLocation> newPartitions = Lists.newArrayList(descriptor.iterator()).stream().flatMap(Collection::stream).flatMap(p -> p.getPartitionLocationRecursive().stream()).filter(p -> scanFiles.contains(p.getEntirePartitionLocation().toUri().getPath())).collect(Collectors.toList());
    try {
        if (!newPartitions.isEmpty()) {
            return descriptor.createTableScan(newPartitions, false);
        } else {
            DrillTable drillTable = descriptor.getTable();
            SchemalessScan scan = new SchemalessScan(drillTable.getUserName(), ((FormatSelection) descriptor.getTable().getSelection()).getSelection().getSelectionRoot());
            return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), scan, scanRel.getRowType(), DrillScanRel.getProjectedColumns(scanRel.getTable(), true), true);
        }
    } catch (Exception e) {
        throw new RuntimeException("Error happened during recreation of pruned scan", e);
    }
}
Also used : SchemalessScan(org.apache.drill.exec.physical.base.SchemalessScan) MetadataType(org.apache.drill.metastore.metadata.MetadataType) TableScan(org.apache.calcite.rel.core.TableScan) Arrays(java.util.Arrays) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) FileSystem(org.apache.hadoop.fs.FileSystem) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) Streams(org.apache.drill.shaded.guava.com.google.common.collect.Streams) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) FileStatus(org.apache.hadoop.fs.FileStatus) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) ColumnExplorer(org.apache.drill.exec.store.ColumnExplorer) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) ImpersonationUtil(org.apache.drill.exec.util.ImpersonationUtil) Path(org.apache.hadoop.fs.Path) FileSelection(org.apache.drill.exec.store.dfs.FileSelection) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) PartitionLocation(org.apache.drill.exec.planner.PartitionLocation) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) DrillFileSystemUtil(org.apache.drill.exec.util.DrillFileSystemUtil) List(java.util.List) Lists(org.apache.drill.shaded.guava.com.google.common.collect.Lists) FileSystemPartitionDescriptor(org.apache.drill.exec.planner.FileSystemPartitionDescriptor) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Optional(java.util.Optional) Collections(java.util.Collections) FileSystemPartitionDescriptor(org.apache.drill.exec.planner.FileSystemPartitionDescriptor) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) PartitionLocation(org.apache.drill.exec.planner.PartitionLocation) SchemalessScan(org.apache.drill.exec.physical.base.SchemalessScan) IOException(java.io.IOException)

Example 52 with TableScan

use of org.apache.calcite.rel.core.TableScan in project drill by apache.

the class ParquetPruneScanRule method getFilterOnProjectParquet.

public static RelOptRule getFilterOnProjectParquet(OptimizerRulesContext optimizerRulesContext) {
    return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.some(DrillProjectRel.class, RelOptHelper.any(DrillScanRel.class))), "PruneScanRule:Filter_On_Project_Parquet", optimizerRulesContext) {

        @Override
        public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
            return new ParquetPartitionDescriptor(settings, (DrillScanRel) scanRel);
        }

        @Override
        public boolean matches(RelOptRuleCall call) {
            final DrillScanRel scan = call.rel(2);
            GroupScan groupScan = scan.getGroupScan();
            // this rule is applicable only for parquet based partition pruning
            if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
                return groupScan instanceof AbstractParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
            } else {
                return groupScan instanceof AbstractParquetGroupScan && groupScan.supportsPartitionFilterPushdown();
            }
        }

        @Override
        public void onMatch(RelOptRuleCall call) {
            final DrillFilterRel filterRel = call.rel(0);
            final DrillProjectRel projectRel = call.rel(1);
            final DrillScanRel scanRel = call.rel(2);
            doOnMatch(call, filterRel, projectRel, scanRel);
        }
    };
}
Also used : AbstractParquetGroupScan(org.apache.drill.exec.store.parquet.AbstractParquetGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) TableScan(org.apache.calcite.rel.core.TableScan) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) AbstractParquetGroupScan(org.apache.drill.exec.store.parquet.AbstractParquetGroupScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillProjectRel(org.apache.drill.exec.planner.logical.DrillProjectRel) DrillFilterRel(org.apache.drill.exec.planner.logical.DrillFilterRel) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) ParquetPartitionDescriptor(org.apache.drill.exec.planner.ParquetPartitionDescriptor)

Aggregations

TableScan (org.apache.calcite.rel.core.TableScan)51 RelNode (org.apache.calcite.rel.RelNode)19 ArrayList (java.util.ArrayList)13 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)13 Project (org.apache.calcite.rel.core.Project)12 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)11 Filter (org.apache.calcite.rel.core.Filter)10 IOException (java.io.IOException)9 RexNode (org.apache.calcite.rex.RexNode)9 GroupScan (org.apache.drill.exec.physical.base.GroupScan)9 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)8 DrillFilterRel (org.apache.drill.exec.planner.logical.DrillFilterRel)8 Aggregate (org.apache.calcite.rel.core.Aggregate)7 LogicalProject (org.apache.calcite.rel.logical.LogicalProject)7 RexBuilder (org.apache.calcite.rex.RexBuilder)6 RelOptCluster (org.apache.calcite.plan.RelOptCluster)5 RelShuttleImpl (org.apache.calcite.rel.RelShuttleImpl)5 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)5 RelDataType (org.apache.calcite.rel.type.RelDataType)5 SchemaPath (org.apache.drill.common.expression.SchemaPath)5