Search in sources :

Example 31 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class MapRDBStatistics method populateStats.

/**
 * This is the core statistics function for populating the statistics. The statistics populated correspond to the query
 * condition. Based on different types of plans, we would need statistics for different combinations of predicates. Currently,
 * we do not have a tree-walker for {@link QueryCondition}. Hence, instead of using the individual predicates stats, to construct
 * the stats for the overall predicates, we rely on using the final predicates. Hence, this has a limitation(susceptible) to
 * predicate modification post stats generation. Statistics computed/stored are rowcounts, leading rowcounts, average rowsize.
 * Rowcounts and leading rowcounts (i.e. corresponding to predicates on the leading index columns) are stored in the statsCache.
 * Average rowsizes are stored in the fiStatsCache (FI stands for Filter Independent).
 *
 * @param condition - The condition for which to obtain statistics
 * @param indexes - The collection of indexes to use for getting statistics
 * @param scanRel - The current scanRel
 * @param context - The index plan call context
 */
private void populateStats(RexNode condition, IndexCollection indexes, DrillScanRelBase scanRel, IndexCallContext context) {
    JsonTableGroupScan jTabGrpScan;
    Map<IndexDescriptor, IndexConditionInfo> firstKeyIdxConditionMap;
    Map<IndexDescriptor, IndexConditionInfo> idxConditionMap;
    /* Map containing the individual base conditions of an ANDed/ORed condition and their selectivities.
     * This is used to compute the overall selectivity of a complex ANDed/ORed condition using its base
     * conditions. Helps prevent over/under estimates and guessed selectivity for ORed predicates.
     */
    Map<String, Double> baseConditionMap;
    GroupScan grpScan = IndexPlanUtils.getGroupScan(scanRel);
    if ((scanRel instanceof DrillScanRel || scanRel instanceof ScanPrel) && grpScan instanceof JsonTableGroupScan) {
        jTabGrpScan = (JsonTableGroupScan) grpScan;
    } else {
        logger.debug("Statistics: populateStats exit early - not an instance of JsonTableGroupScan!");
        return;
    }
    if (condition == null) {
        populateStatsForNoFilter(jTabGrpScan, indexes, scanRel, context);
        statsAvailable = true;
        return;
    }
    RexBuilder builder = scanRel.getCluster().getRexBuilder();
    PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster());
    // Get the stats payload for full table (has total rows in the table)
    StatisticsPayload ftsPayload = jTabGrpScan.getFirstKeyEstimatedStats(null, null, scanRel);
    // Get the average row size for table and all indexes
    addToCache(null, jTabGrpScan.getAverageRowSizeStats(null), ftsPayload);
    if (ftsPayload == null || ftsPayload.getRowCount() == 0) {
        return;
    }
    for (IndexDescriptor idx : indexes) {
        StatisticsPayload idxRowSizePayload = jTabGrpScan.getAverageRowSizeStats(idx);
        addToCache(idx, idxRowSizePayload, ftsPayload);
    }
    /* Only use indexes with distinct first key */
    IndexCollection distFKeyIndexes = distinctFKeyIndexes(indexes, scanRel);
    IndexConditionInfo.Builder infoBuilder = IndexConditionInfo.newBuilder(condition, distFKeyIndexes, builder, scanRel);
    idxConditionMap = infoBuilder.getIndexConditionMap();
    firstKeyIdxConditionMap = infoBuilder.getFirstKeyIndexConditionMap();
    baseConditionMap = new HashMap<>();
    for (IndexDescriptor idx : firstKeyIdxConditionMap.keySet()) {
        if (IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
            continue;
        }
        RexNode idxCondition = firstKeyIdxConditionMap.get(idx).indexCondition;
        /* Use the pre-processed condition only for getting actual statistic from MapR-DB APIs. Use the
       * original condition everywhere else (cache store/lookups) since the RexNode condition and its
       * corresponding QueryCondition will be used to get statistics. e.g. we convert LIKE into RANGE
       * condition to get statistics. However, statistics are always asked for LIKE and NOT the RANGE
       */
        RexNode preProcIdxCondition = convertToStatsCondition(idxCondition, idx, context, scanRel, Arrays.asList(SqlKind.CAST, SqlKind.LIKE));
        RelDataType newRowType;
        FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
        if (functionInfo.hasFunctional()) {
            newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
        } else {
            newRowType = scanRel.getRowType();
        }
        QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(preProcIdxCondition, newRowType, settings, builder));
        // Cap rows/size at total rows in case of issues with DB APIs
        StatisticsPayload idxPayload = jTabGrpScan.getFirstKeyEstimatedStats(queryCondition, idx, scanRel);
        double rowCount = Math.min(idxPayload.getRowCount(), ftsPayload.getRowCount());
        double leadingRowCount = Math.min(idxPayload.getLeadingRowCount(), rowCount);
        double avgRowSize = Math.min(idxPayload.getAvgRowSize(), ftsPayload.getAvgRowSize());
        StatisticsPayload payload = new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize);
        addToCache(idxCondition, idx, context, payload, jTabGrpScan, scanRel, newRowType);
        addBaseConditions(idxCondition, payload, false, baseConditionMap, scanRel.getRowType());
    }
    /* Add the row count for index conditions on all indexes. Stats are only computed for leading
     * keys but index conditions can be pushed and would be required for access path costing
     */
    for (IndexDescriptor idx : idxConditionMap.keySet()) {
        if (IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
            continue;
        }
        Map<LogicalExpression, RexNode> leadingPrefixMap = Maps.newHashMap();
        double rowCount, leadingRowCount, avgRowSize;
        RexNode idxCondition = idxConditionMap.get(idx).indexCondition;
        // Ignore conditions which always evaluate to true
        if (idxCondition.isAlwaysTrue()) {
            continue;
        }
        RexNode idxIncColCondition = idxConditionMap.get(idx).remainderCondition;
        RexNode idxRemColCondition = IndexPlanUtils.getLeadingPrefixMap(leadingPrefixMap, idx.getIndexColumns(), infoBuilder, idxCondition);
        RexNode idxLeadColCondition = IndexPlanUtils.getLeadingColumnsFilter(IndexPlanUtils.getLeadingFilters(leadingPrefixMap, idx.getIndexColumns()), builder);
        RexNode idxTotRemColCondition = IndexPlanUtils.getTotalRemainderFilter(idxRemColCondition, idxIncColCondition, builder);
        RexNode idxTotColCondition = IndexPlanUtils.getTotalFilter(idxLeadColCondition, idxTotRemColCondition, builder);
        FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
        RelDataType newRowType = scanRel.getRowType();
        if (functionInfo.hasFunctional()) {
            newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
        }
        /* For non-covering plans we would need the index leading condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxLeadColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        leadingRowCount = rowCount;
        avgRowSize = fIStatsCache.get(buildUniqueIndexIdentifier(idx)).getAvgRowSize();
        addToCache(idxLeadColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* For covering plans we would need the full condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxTotColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        addToCache(idxTotColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* For intersect plans we would need the index condition */
        rowCount = ftsPayload.getRowCount() * computeSelectivity(idxCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
        addToCache(idxCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        /* Add the rowCount for condition on only included columns - no leading columns here! */
        if (idxIncColCondition != null) {
            rowCount = ftsPayload.getRowCount() * computeSelectivity(idxIncColCondition, null, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
            addToCache(idxIncColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, rowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
        }
    }
    // Add the rowCount for the complete condition - based on table
    double rowCount = ftsPayload.getRowCount() * computeSelectivity(condition, null, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
    // Here, ftsLeadingKey rowcount is based on _id predicates
    StatisticsPayload ftsLeadingKeyPayload = jTabGrpScan.getFirstKeyEstimatedStats(jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(condition, scanRel.getRowType(), settings, builder)), null, scanRel);
    addToCache(condition, null, null, new MapRDBStatisticsPayload(rowCount, ftsLeadingKeyPayload.getRowCount(), ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
    // Add the full table rows while we are at it - represented by <NULL> RexNode, <NULL> QueryCondition.
    // No ftsLeadingKey so leadingKeyRowcount = totalRowCount
    addToCache(null, null, null, new MapRDBStatisticsPayload(ftsPayload.getRowCount(), ftsPayload.getRowCount(), ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
    // mark stats has been statsAvailable
    statsAvailable = true;
}
Also used : DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) ScanPrel(org.apache.drill.exec.planner.physical.ScanPrel) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelDataType(org.apache.calcite.rel.type.RelDataType) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) RexBuilder(org.apache.calcite.rex.RexBuilder) QueryCondition(org.ojai.store.QueryCondition) RexNode(org.apache.calcite.rex.RexNode)

Example 32 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class MapRDBStatistics method initialize.

public boolean initialize(RexNode condition, DrillScanRelBase scanRel, IndexCallContext context) {
    GroupScan scan = IndexPlanUtils.getGroupScan(scanRel);
    PlannerSettings settings = PrelUtil.getPlannerSettings(scanRel.getCluster().getPlanner());
    rowKeyJoinBackIOFactor = settings.getIndexRowKeyJoinCostFactor();
    if (scan instanceof DbGroupScan) {
        String conditionAsStr = convertRexToString(condition, scanRel.getRowType());
        if (statsCache.get(conditionAsStr) == null) {
            IndexCollection indexes = ((DbGroupScan) scan).getSecondaryIndexCollection(scanRel);
            populateStats(condition, indexes, scanRel, context);
            logger.info("index_plan_info: initialize: scanRel #{} and groupScan {} got fulltable {}, statsCache: {}, fiStatsCache: {}", scanRel.getId(), System.identityHashCode(scan), fullTableScanPayload, statsCache, fIStatsCache);
            return true;
        }
    }
    return false;
}
Also used : DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) JsonTableGroupScan(org.apache.drill.exec.store.mapr.db.json.JsonTableGroupScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan)

Example 33 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class FileMetadataInfoCollector method init.

private void init(FormatSelection selection, PlannerSettings settings, Supplier<TableScan> tableScanSupplier, List<SchemaPath> interestingColumns, int segmentColumnsCount) throws IOException {
    List<SchemaPath> metastoreInterestingColumns = Optional.ofNullable(basicRequests.interestingColumnsAndPartitionKeys(tableInfo).interestingColumns()).map(metastoreInterestingColumnNames -> metastoreInterestingColumnNames.stream().map(SchemaPath::parseFromString).collect(Collectors.toList())).orElse(null);
    Map<String, Long> filesNamesLastModifiedTime = basicRequests.filesLastModifiedTime(tableInfo, null, null);
    List<String> newFiles = new ArrayList<>();
    List<String> updatedFiles = new ArrayList<>();
    List<String> removedFiles = new ArrayList<>(filesNamesLastModifiedTime.keySet());
    List<String> allFiles = new ArrayList<>();
    for (FileStatus fileStatus : getFileStatuses(selection)) {
        String path = Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toUri().getPath();
        Long lastModificationTime = filesNamesLastModifiedTime.get(path);
        if (lastModificationTime == null) {
            newFiles.add(path);
        } else if (lastModificationTime < fileStatus.getModificationTime()) {
            updatedFiles.add(path);
        }
        removedFiles.remove(path);
        allFiles.add(path);
    }
    String selectionRoot = selection.getSelection().getSelectionRoot().toUri().getPath();
    if (!Objects.equals(metastoreInterestingColumns, interestingColumns) && metastoreInterestingColumns != null && (interestingColumns == null || !metastoreInterestingColumns.containsAll(interestingColumns)) || TableStatisticsKind.ANALYZE_METADATA_LEVEL.getValue(basicRequests.tableMetadata(tableInfo)).compareTo(metadataLevel) != 0) {
        // do not update table scan and lists of segments / files / row groups,
        // metadata should be recalculated
        tableScan = tableScanSupplier.get();
        metadataToRemove.addAll(getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0));
        return;
    }
    // checks whether there are no new, updated and removed files
    if (!newFiles.isEmpty() || !updatedFiles.isEmpty() || !removedFiles.isEmpty()) {
        List<String> scanFiles = new ArrayList<>(newFiles);
        scanFiles.addAll(updatedFiles);
        // updates scan to read updated / new files
        tableScan = getTableScan(settings, tableScanSupplier.get(), scanFiles);
        // iterates from the end;
        // takes deepest updated segments;
        // finds their parents:
        // - fetches all segments for parent level;
        // - filters segments to leave parents only;
        // obtains all child segments;
        // filters child segments for filtered parent segments
        int lastSegmentIndex = segmentColumnsCount - 1;
        List<String> scanAndRemovedFiles = new ArrayList<>(scanFiles);
        scanAndRemovedFiles.addAll(removedFiles);
        // 1. Obtain files info for files from the same folder without removed files
        // 2. Get segments for obtained files + segments for removed files
        // 3. Get parent segments
        // 4. Get other segments for the same parent segment
        // 5. Remove segments which have only removed files (matched for removedFileInfo and don't match to filesInfo)
        // 6. Do the same for parent segments
        List<MetadataInfo> allFilesInfo = getMetadataInfoList(selectionRoot, allFiles, MetadataType.FILE, 0);
        // first pass: collect updated segments even without files, they will be removed later
        List<MetadataInfo> leafSegments = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, lastSegmentIndex);
        List<MetadataInfo> removedFilesMetadata = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.FILE, 0);
        List<MetadataInfo> scanFilesInfo = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.FILE, 0);
        // files from scan + files from the same folder without removed files
        filesInfo = leafSegments.stream().filter(parent -> scanFilesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).flatMap(parent -> allFilesInfo.stream().filter(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
        Multimap<Integer, MetadataInfo> allSegments = populateSegments(removedFiles, allFiles, selectionRoot, lastSegmentIndex, leafSegments, removedFilesMetadata);
        List<MetadataInfo> allRowGroupsInfo = getAllRowGroupsMetadataInfos(allFiles);
        rowGroupsInfo = allRowGroupsInfo.stream().filter(child -> filesInfo.stream().map(MetadataInfo::identifier).anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent, child.identifier()))).collect(Collectors.toList());
        List<MetadataInfo> segmentsToUpdate = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, 0);
        allMetaToHandle = Streams.concat(allSegments.values().stream(), allFilesInfo.stream(), allRowGroupsInfo.stream()).filter(child -> segmentsToUpdate.stream().anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).filter(parent -> removedFilesMetadata.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) || filesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
        // removed top-level segments are handled separately since their metadata is not overridden when producing writing to the Metastore
        List<MetadataInfo> removedTopSegments = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0).stream().filter(parent -> removedFilesMetadata.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) && allFilesInfo.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
        metadataToRemove.addAll(removedTopSegments);
        segmentsToUpdate.stream().filter(segment -> !removedTopSegments.contains(segment)).forEach(allMetaToHandle::add);
    } else {
        // table metadata may still be actual
        outdated = false;
    }
}
Also used : SchemalessScan(org.apache.drill.exec.physical.base.SchemalessScan) MetadataType(org.apache.drill.metastore.metadata.MetadataType) TableScan(org.apache.calcite.rel.core.TableScan) Arrays(java.util.Arrays) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) FileSystem(org.apache.hadoop.fs.FileSystem) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) Streams(org.apache.drill.shaded.guava.com.google.common.collect.Streams) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) FileStatus(org.apache.hadoop.fs.FileStatus) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) ColumnExplorer(org.apache.drill.exec.store.ColumnExplorer) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) ImpersonationUtil(org.apache.drill.exec.util.ImpersonationUtil) Path(org.apache.hadoop.fs.Path) FileSelection(org.apache.drill.exec.store.dfs.FileSelection) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) PartitionLocation(org.apache.drill.exec.planner.PartitionLocation) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) DrillFileSystemUtil(org.apache.drill.exec.util.DrillFileSystemUtil) List(java.util.List) Lists(org.apache.drill.shaded.guava.com.google.common.collect.Lists) FileSystemPartitionDescriptor(org.apache.drill.exec.planner.FileSystemPartitionDescriptor) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Optional(java.util.Optional) Collections(java.util.Collections) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) SchemaPath(org.apache.drill.common.expression.SchemaPath)

Example 34 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class FileMetadataInfoCollector method getTableScan.

private TableScan getTableScan(PlannerSettings settings, TableScan scanRel, List<String> scanFiles) {
    FileSystemPartitionDescriptor descriptor = new FileSystemPartitionDescriptor(settings, scanRel);
    List<PartitionLocation> newPartitions = Lists.newArrayList(descriptor.iterator()).stream().flatMap(Collection::stream).flatMap(p -> p.getPartitionLocationRecursive().stream()).filter(p -> scanFiles.contains(p.getEntirePartitionLocation().toUri().getPath())).collect(Collectors.toList());
    try {
        if (!newPartitions.isEmpty()) {
            return descriptor.createTableScan(newPartitions, false);
        } else {
            DrillTable drillTable = descriptor.getTable();
            SchemalessScan scan = new SchemalessScan(drillTable.getUserName(), ((FormatSelection) descriptor.getTable().getSelection()).getSelection().getSelectionRoot());
            return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), scan, scanRel.getRowType(), DrillScanRel.getProjectedColumns(scanRel.getTable(), true), true);
        }
    } catch (Exception e) {
        throw new RuntimeException("Error happened during recreation of pruned scan", e);
    }
}
Also used : SchemalessScan(org.apache.drill.exec.physical.base.SchemalessScan) MetadataType(org.apache.drill.metastore.metadata.MetadataType) TableScan(org.apache.calcite.rel.core.TableScan) Arrays(java.util.Arrays) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) FileSystem(org.apache.hadoop.fs.FileSystem) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) Streams(org.apache.drill.shaded.guava.com.google.common.collect.Streams) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) FileStatus(org.apache.hadoop.fs.FileStatus) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) ColumnExplorer(org.apache.drill.exec.store.ColumnExplorer) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) ImpersonationUtil(org.apache.drill.exec.util.ImpersonationUtil) Path(org.apache.hadoop.fs.Path) FileSelection(org.apache.drill.exec.store.dfs.FileSelection) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) PartitionLocation(org.apache.drill.exec.planner.PartitionLocation) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) DrillFileSystemUtil(org.apache.drill.exec.util.DrillFileSystemUtil) List(java.util.List) Lists(org.apache.drill.shaded.guava.com.google.common.collect.Lists) FileSystemPartitionDescriptor(org.apache.drill.exec.planner.FileSystemPartitionDescriptor) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Optional(java.util.Optional) Collections(java.util.Collections) FileSystemPartitionDescriptor(org.apache.drill.exec.planner.FileSystemPartitionDescriptor) DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) PartitionLocation(org.apache.drill.exec.planner.PartitionLocation) SchemalessScan(org.apache.drill.exec.physical.base.SchemalessScan) IOException(java.io.IOException)

Example 35 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class DrillRelMdRowCount method getRowCount.

@Override
public Double getRowCount(TableScan rel, RelMetadataQuery mq) {
    DrillTable table = Utilities.getDrillTable(rel.getTable());
    PlannerSettings settings = PrelUtil.getSettings(rel.getCluster());
    // If guessing, return selectivity from RelMDRowCount
    if (DrillRelOptUtil.guessRows(rel)) {
        return super.getRowCount(rel, mq);
    }
    // Return rowcount from statistics, if available. Otherwise, delegate to parent.
    try {
        if (table != null && table.getGroupScan().getTableMetadata() != null && TableStatisticsKind.HAS_DESCRIPTIVE_STATISTICS.getValue(table.getGroupScan().getTableMetadata())) {
            /* For GroupScan rely on accurate count from the scan, if available, instead of
           * statistics since partition pruning/filter pushdown might have occurred.
           * e.g. ParquetGroupScan returns accurate rowcount. The other way would be to
           * iterate over the rowgroups present in the GroupScan to compute the rowcount.
           */
            if (!table.getGroupScan().getScanStats(settings).getGroupScanProperty().hasExactRowCount()) {
                return TableStatisticsKind.EST_ROW_COUNT.getValue(table.getGroupScan().getTableMetadata());
            } else {
                if (!(rel instanceof DrillScanRelBase)) {
                    return table.getGroupScan().getScanStats(settings).getRecordCount();
                }
            }
        }
    } catch (IOException ex) {
        return super.getRowCount(rel, mq);
    }
    return super.getRowCount(rel, mq);
}
Also used : DrillTable(org.apache.drill.exec.planner.logical.DrillTable) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillScanRelBase(org.apache.drill.exec.planner.common.DrillScanRelBase) IOException(java.io.IOException)

Aggregations

PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)38 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)16 RexNode (org.apache.calcite.rex.RexNode)13 GroupScan (org.apache.drill.exec.physical.base.GroupScan)13 TableScan (org.apache.calcite.rel.core.TableScan)12 RexBuilder (org.apache.calcite.rex.RexBuilder)12 RelDataType (org.apache.calcite.rel.type.RelDataType)11 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)10 IOException (java.io.IOException)9 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)8 SchemaPath (org.apache.drill.common.expression.SchemaPath)8 DrillFilterRel (org.apache.drill.exec.planner.logical.DrillFilterRel)8 HiveScan (org.apache.drill.exec.store.hive.HiveScan)8 RelNode (org.apache.calcite.rel.RelNode)7 AggregateCall (org.apache.calcite.rel.core.AggregateCall)6 SqlCountAggFunction (org.apache.calcite.sql.fun.SqlCountAggFunction)6 DrillProjectRel (org.apache.drill.exec.planner.logical.DrillProjectRel)6 List (java.util.List)5 FormatSelection (org.apache.drill.exec.store.dfs.FormatSelection)5 ArrayList (java.util.ArrayList)4