Search in sources :

Example 26 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class PlanningBase method testSqlPlan.

protected void testSqlPlan(String sqlCommands) throws Exception {
    final DrillbitContext dbContext = mock(DrillbitContext.class);
    final QueryContext context = mock(QueryContext.class);
    final String[] sqlStrings = sqlCommands.split(";");
    final LocalPersistentStoreProvider provider = new LocalPersistentStoreProvider(config);
    provider.start();
    final ScanResult scanResult = ClassPathScanner.fromPrescan(config);
    final LogicalPlanPersistence logicalPlanPersistence = new LogicalPlanPersistence(config, scanResult);
    final SystemOptionManager systemOptions = new SystemOptionManager(logicalPlanPersistence, provider, config);
    systemOptions.init();
    final UserSession userSession = UserSession.Builder.newBuilder().withOptionManager(systemOptions).build();
    final SessionOptionManager sessionOptions = userSession.getOptions();
    final QueryOptionManager queryOptions = new QueryOptionManager(sessionOptions);
    final ExecutionControls executionControls = new ExecutionControls(queryOptions, DrillbitEndpoint.getDefaultInstance());
    when(dbContext.getMetrics()).thenReturn(new MetricRegistry());
    when(dbContext.getAllocator()).thenReturn(allocator);
    when(dbContext.getConfig()).thenReturn(config);
    when(dbContext.getOptionManager()).thenReturn(systemOptions);
    when(dbContext.getStoreProvider()).thenReturn(provider);
    when(dbContext.getClasspathScan()).thenReturn(scanResult);
    when(dbContext.getLpPersistence()).thenReturn(logicalPlanPersistence);
    final StoragePluginRegistry registry = new StoragePluginRegistryImpl(dbContext);
    registry.init();
    final FunctionImplementationRegistry functionRegistry = new FunctionImplementationRegistry(config);
    final DrillOperatorTable table = new DrillOperatorTable(functionRegistry, systemOptions);
    SchemaConfig schemaConfig = SchemaConfig.newBuilder("foo", context).build();
    SchemaPlus root = DynamicSchema.createRootSchema(registry, schemaConfig, new AliasRegistryProvider(dbContext));
    when(context.getNewDefaultSchema()).thenReturn(root);
    when(context.getLpPersistence()).thenReturn(new LogicalPlanPersistence(config, ClassPathScanner.fromPrescan(config)));
    when(context.getStorage()).thenReturn(registry);
    when(context.getFunctionRegistry()).thenReturn(functionRegistry);
    when(context.getSession()).thenReturn(UserSession.Builder.newBuilder().withOptionManager(sessionOptions).setSupportComplexTypes(true).build());
    when(context.getCurrentEndpoint()).thenReturn(DrillbitEndpoint.getDefaultInstance());
    when(context.getActiveEndpoints()).thenReturn(ImmutableList.of(DrillbitEndpoint.getDefaultInstance()));
    when(context.getPlannerSettings()).thenReturn(new PlannerSettings(queryOptions, functionRegistry));
    when(context.getOptions()).thenReturn(queryOptions);
    when(context.getConfig()).thenReturn(config);
    when(context.getDrillOperatorTable()).thenReturn(table);
    when(context.getAllocator()).thenReturn(allocator);
    when(context.getExecutionControls()).thenReturn(executionControls);
    when(context.getLpPersistence()).thenReturn(logicalPlanPersistence);
    // mocks for org.apache.drill.TestTpchPlanning#tpch06 test.
    // With changes for decimal types, subtract udf for decimals is used.
    when(context.getManagedBuffer()).thenReturn(allocator.buffer(4));
    when(context.getConstantValueHolder(eq("0.03"), eq(TypeProtos.MinorType.VARDECIMAL), Matchers.<Function<DrillBuf, ValueHolder>>any())).thenReturn(ValueHolderHelper.getVarDecimalHolder(allocator.buffer(4), "0.03"));
    when(context.getConstantValueHolder(eq("0.01"), eq(TypeProtos.MinorType.VARDECIMAL), Matchers.<Function<DrillBuf, ValueHolder>>any())).thenReturn(ValueHolderHelper.getVarDecimalHolder(allocator.buffer(4), "0.01"));
    when(context.getOption(anyString())).thenCallRealMethod();
    for (final String sql : sqlStrings) {
        if (sql.trim().isEmpty()) {
            continue;
        }
        @SuppressWarnings("unused") final PhysicalPlan p = DrillSqlWorker.getPlan(context, sql);
    }
}
Also used : DrillbitContext(org.apache.drill.exec.server.DrillbitContext) SessionOptionManager(org.apache.drill.exec.server.options.SessionOptionManager) SchemaConfig(org.apache.drill.exec.store.SchemaConfig) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) LocalPersistentStoreProvider(org.apache.drill.exec.store.sys.store.provider.LocalPersistentStoreProvider) SystemOptionManager(org.apache.drill.exec.server.options.SystemOptionManager) QueryOptionManager(org.apache.drill.exec.server.options.QueryOptionManager) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) LogicalPlanPersistence(org.apache.drill.common.config.LogicalPlanPersistence) AliasRegistryProvider(org.apache.drill.exec.alias.AliasRegistryProvider) UserSession(org.apache.drill.exec.rpc.user.UserSession) FunctionImplementationRegistry(org.apache.drill.exec.expr.fn.FunctionImplementationRegistry) DrillOperatorTable(org.apache.drill.exec.planner.sql.DrillOperatorTable) DrillBuf(io.netty.buffer.DrillBuf) StoragePluginRegistry(org.apache.drill.exec.store.StoragePluginRegistry) ScanResult(org.apache.drill.common.scanner.persistence.ScanResult) PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) MetricRegistry(com.codahale.metrics.MetricRegistry) SchemaPlus(org.apache.calcite.schema.SchemaPlus) QueryContext(org.apache.drill.exec.ops.QueryContext) ValueHolder(org.apache.drill.exec.expr.holders.ValueHolder) ExecutionControls(org.apache.drill.exec.testing.ExecutionControls) StoragePluginRegistryImpl(org.apache.drill.exec.store.StoragePluginRegistryImpl)

Example 27 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class ConvertHiveParquetScanToDrillParquetScan method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    try {
        final DrillScanRel hiveScanRel = call.rel(0);
        final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
        final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
        final String partitionColumnLabel = settings.getFsPartitionColumnLabel();
        final Table hiveTable = hiveScan.getHiveReadEntry().getTable();
        final HiveReadEntry hiveReadEntry = hiveScan.getHiveReadEntry();
        final HiveMetadataProvider hiveMetadataProvider = new HiveMetadataProvider(hiveScan.getUserName(), hiveReadEntry, hiveScan.getHiveConf());
        final List<HiveMetadataProvider.LogicalInputSplit> logicalInputSplits = hiveMetadataProvider.getInputSplits(hiveReadEntry);
        if (logicalInputSplits.isEmpty()) {
            // table is empty, use original scan
            return;
        }
        final Map<String, String> partitionColMapping = getPartitionColMapping(hiveTable, partitionColumnLabel);
        final DrillScanRel nativeScanRel = createNativeScanRel(partitionColMapping, hiveScanRel, logicalInputSplits, settings.getOptions());
        if (hiveScanRel.getRowType().getFieldCount() == 0) {
            call.transformTo(nativeScanRel);
        } else {
            final DrillProjectRel projectRel = createProjectRel(hiveScanRel, partitionColMapping, nativeScanRel);
            call.transformTo(projectRel);
        }
        /*
        Drill native scan should take precedence over Hive since it's more efficient and faster.
        Hive does not always give correct costing (i.e. for external tables Hive does not have number of rows
        and we calculate them approximately). On the contrary, Drill calculates number of rows exactly
        and thus Hive Scan can be chosen instead of Drill native scan because costings allegedly lower for Hive.
        To ensure Drill native scan will be chosen, reduce Hive scan importance to 0.
       */
        call.getPlanner().setImportance(hiveScanRel, 0.0);
    } catch (final Exception e) {
        logger.warn("Failed to convert HiveScan to HiveDrillNativeParquetScan", e);
    }
}
Also used : DrillScanRel(org.apache.drill.exec.planner.logical.DrillScanRel) HiveReadEntry(org.apache.drill.exec.store.hive.HiveReadEntry) Table(org.apache.hadoop.hive.metastore.api.Table) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DrillProjectRel(org.apache.drill.exec.planner.logical.DrillProjectRel) HiveScan(org.apache.drill.exec.store.hive.HiveScan) HiveMetadataProvider(org.apache.drill.exec.store.hive.HiveMetadataProvider) IOException(java.io.IOException)

Example 28 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class ConvertMetadataAggregateToDirectScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    MetadataAggRel agg = call.rel(0);
    DrillScanRel scan = call.rel(1);
    GroupScan oldGrpScan = scan.getGroupScan();
    PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
    // Only apply the rule for parquet group scan and for the case when required column metadata is present
    if (!(oldGrpScan instanceof ParquetGroupScan) || (oldGrpScan.getTableMetadata().getInterestingColumns() != null && !oldGrpScan.getTableMetadata().getInterestingColumns().containsAll(agg.getContext().interestingColumns()))) {
        return;
    }
    try {
        DirectGroupScan directScan = buildDirectScan(agg.getContext().interestingColumns(), scan, settings);
        if (directScan == null) {
            logger.warn("Unable to use parquet metadata for ANALYZE since some required metadata is absent within parquet metadata");
            return;
        }
        RelNode converted = new DrillDirectScanRel(scan.getCluster(), scan.getTraitSet().plus(DrillRel.DRILL_LOGICAL), directScan, scan.getRowType());
        if (agg.getContext().metadataLevel() != MetadataType.ROW_GROUP) {
            MetadataAggregateContext updatedContext = agg.getContext().toBuilder().createNewAggregations(false).build();
            converted = new MetadataAggRel(agg.getCluster(), agg.getTraitSet(), converted, updatedContext);
        }
        call.transformTo(converted);
    } catch (Exception e) {
        logger.warn("Unable to use parquet metadata for ANALYZE: {}", e.getMessage(), e);
    }
}
Also used : ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) DirectGroupScan(org.apache.drill.exec.store.direct.DirectGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelNode(org.apache.calcite.rel.RelNode) DirectGroupScan(org.apache.drill.exec.store.direct.DirectGroupScan) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext) IOException(java.io.IOException)

Example 29 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class IndexSelector method getCandidateIndexes.

/**
 * Run the index selection algorithm and return the top N indexes
 */
public void getCandidateIndexes(IndexConditionInfo.Builder infoBuilder, List<IndexGroup> coveringIndexes, List<IndexGroup> nonCoveringIndexes, List<IndexGroup> intersectIndexes) {
    RelOptPlanner planner = indexContext.getCall().getPlanner();
    PlannerSettings settings = PrelUtil.getPlannerSettings(planner);
    List<IndexGroup> candidateIndexes = Lists.newArrayList();
    logger.info("index_plan_info: Analyzing {} indexes for prefix matches: {}", indexPropList.size(), indexPropList);
    // analysis phase
    for (IndexProperties p : indexPropList) {
        analyzePrefixMatches(p);
        // can satisfy required collation
        if (p.numLeadingFilters() > 0 || p.satisfiesCollation()) {
            double selThreshold = p.isCovering() ? settings.getIndexCoveringSelThreshold() : settings.getIndexNonCoveringSelThreshold();
            // all when full table scan is disable to avoid a CannotPlanException
            if (settings.isDisableFullTableScan() || p.getLeadingSelectivity() <= selThreshold) {
                IndexGroup index = new IndexGroup();
                index.addIndexProp(p);
                candidateIndexes.add(index);
            } else {
                if (p.getLeadingSelectivity() > selThreshold) {
                    logger.debug("Skipping index {}. The leading selectivity {} is larger than threshold {}", p.getIndexDesc().getIndexName(), p.getLeadingSelectivity(), selThreshold);
                }
            }
        }
    }
    if (candidateIndexes.size() == 0) {
        logger.info("index_plan_info: No suitable indexes found !");
        return;
    }
    int max_candidate_indexes = (int) PrelUtil.getPlannerSettings(planner).getIndexMaxChosenIndexesPerTable();
    // to be exercised even for few indexes
    if (candidateIndexes.size() > 1) {
        Collections.sort(candidateIndexes, new IndexComparator(planner, builder));
    }
    // Generate index intersections for ranking
    addIndexIntersections(candidateIndexes, infoBuilder, settings.getMaxIndexesToIntersect());
    // Sort again after intersect plan is added to the list
    if (candidateIndexes.size() > 1) {
        Collections.sort(candidateIndexes, new IndexComparator(planner, builder));
    }
    logger.info("index_plan_info: The top ranked indexes are: ");
    int count = 0;
    boolean foundCovering = false;
    boolean foundCoveringCollation = false;
    boolean foundNonCoveringCollation = false;
    // pick the best N indexes
    for (int i = 0; i < candidateIndexes.size(); i++) {
        IndexGroup index = candidateIndexes.get(i);
        if (index.numIndexes() == 1 && index.getIndexProps().get(0).isCovering()) {
            IndexProperties indexProps = index.getIndexProps().get(0);
            if (foundCoveringCollation) {
                // if previously we already found a higher ranked covering index that satisfies collation,
                // then skip this one (note that selectivity and cost considerations were already handled
                // by the ranking phase)
                logger.debug("index_plan_info: Skipping covering index {} because a higher ranked covering index with collation already exists.", indexProps.getIndexDesc().getIndexName());
                continue;
            }
            coveringIndexes.add(index);
            logger.info("index_plan_info: name: {}, covering, collation: {}, leadingSelectivity: {}, cost: {}", indexProps.getIndexDesc().getIndexName(), indexProps.satisfiesCollation(), indexProps.getLeadingSelectivity(), indexProps.getSelfCost(planner));
            count++;
            foundCovering = true;
            if (indexProps.satisfiesCollation()) {
                foundCoveringCollation = true;
            }
        } else if (index.numIndexes() == 1) {
            // non-covering
            IndexProperties indexProps = index.getIndexProps().get(0);
            // non-covering index does not have collation
            if (foundCoveringCollation || (foundCovering && !indexProps.satisfiesCollation())) {
                logger.debug("index_plan_info: Skipping non-covering index {} because it does not have collation and a higher ranked covering index already exists.", indexProps.getIndexDesc().getIndexName());
                continue;
            }
            if (indexProps.satisfiesCollation()) {
                foundNonCoveringCollation = true;
            }
            // all other non-covering indexes can be added to the list because 2 or more non-covering index could
            // be considered for intersection later; currently the index selector is not costing the index intersection
            nonCoveringIndexes.add(index);
            logger.info("index_plan_info: name: {}, non-covering, collation: {}, leadingSelectivity: {}, cost: {}", indexProps.getIndexDesc().getIndexName(), indexProps.satisfiesCollation(), indexProps.getLeadingSelectivity(), indexProps.getSelfCost(planner));
            count++;
        } else {
            // intersect indexes
            if (foundCoveringCollation || (foundCovering && !index.getIndexProps().get(index.numIndexes() - 1).satisfiesCollation()) || foundNonCoveringCollation) {
                continue;
            }
            IndexGroup intersectIndex = new IndexGroup();
            double isectLeadingSel = 1.0;
            String isectName = "Intersect-" + count;
            for (IndexProperties indexProps : index.getIndexProps()) {
                intersectIndex.addIndexProp(indexProps);
                isectLeadingSel *= indexProps.getLeadingSelectivity();
                logger.info("name: {}, {}, collation: {}, leadingSelectivity: {}, cost: {}", indexProps.getIndexDesc().getIndexName(), isectName, indexProps.satisfiesCollation(), indexProps.getLeadingSelectivity(), indexProps.getSelfCost(planner));
            }
            logger.info("name: {}, intersect-idx, collation: {}, leadingSelectivity: {}, cost: {}", isectName, index.getIndexProps().get(index.numIndexes() - 1).satisfiesCollation(), isectLeadingSel, index.getIndexProps().get(0).getIntersectCost(index, builder, planner));
            intersectIndexes.add(intersectIndex);
        }
        if (count == max_candidate_indexes) {
            break;
        }
    }
}
Also used : PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelOptPlanner(org.apache.calcite.plan.RelOptPlanner)

Example 30 with PlannerSettings

use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.

the class ConvertCountToDirectScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final Aggregate agg = call.rel(0);
    final TableScan scan = call.rel(call.rels.length - 1);
    final Project project = call.rels.length == 3 ? (Project) call.rel(1) : null;
    // 3) Additional checks are done further below ..
    if (agg.getGroupCount() > 0 || agg.containsDistinctCall()) {
        return;
    }
    DrillTable drillTable = DrillRelOptUtil.getDrillTable(scan);
    if (drillTable == null) {
        logger.debug("Rule does not apply since an eligible drill table instance was not found.");
        return;
    }
    Object selection = drillTable.getSelection();
    if (!(selection instanceof FormatSelection)) {
        logger.debug("Rule does not apply since only Parquet file format is eligible.");
        return;
    }
    PlannerSettings settings = call.getPlanner().getContext().unwrap(PlannerSettings.class);
    // Rule is applicable only if the statistics for row count and null count are available from the metadata,
    FormatSelection formatSelection = (FormatSelection) selection;
    // Rule cannot be applied if the selection had wildcard since the totalrowcount cannot be read from the parent directory
    if (formatSelection.getSelection().hadWildcard()) {
        logger.debug("Rule does not apply when there is a wild card since the COUNT could not be determined from metadata.");
        return;
    }
    Pair<Boolean, Metadata_V4.MetadataSummary> status = checkMetadataForScanStats(settings, drillTable, formatSelection);
    if (!status.getLeft()) {
        logger.debug("Rule does not apply since MetadataSummary metadata was not found.");
        return;
    }
    Metadata_V4.MetadataSummary metadataSummary = status.getRight();
    Map<String, Long> result = collectCounts(settings, metadataSummary, agg, scan, project);
    logger.trace("Calculated the following aggregate counts: {}", result);
    // if counts could not be determined, rule won't be applied
    if (result.isEmpty()) {
        logger.debug("Rule does not apply since one or more COUNTs could not be determined from metadata.");
        return;
    }
    Path summaryFileName = Metadata.getSummaryFileName(formatSelection.getSelection().getSelectionRoot());
    final RelDataType scanRowType = CountToDirectScanUtils.constructDataType(agg, result.keySet());
    final DynamicPojoRecordReader<Long> reader = new DynamicPojoRecordReader<>(CountToDirectScanUtils.buildSchema(scanRowType.getFieldNames()), Collections.singletonList(new ArrayList<>(result.values())));
    final ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, 1, 1, scanRowType.getFieldCount());
    final MetadataDirectGroupScan directScan = new MetadataDirectGroupScan(reader, summaryFileName, 1, scanStats, true, false);
    final DrillDirectScanRel newScan = new DrillDirectScanRel(scan.getCluster(), scan.getTraitSet().plus(DrillRel.DRILL_LOGICAL), directScan, scanRowType);
    final DrillProjectRel newProject = new DrillProjectRel(agg.getCluster(), agg.getTraitSet().plus(DrillRel.DRILL_LOGICAL), newScan, CountToDirectScanUtils.prepareFieldExpressions(scanRowType), agg.getRowType());
    call.transformTo(newProject);
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableScan(org.apache.calcite.rel.core.TableScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DynamicPojoRecordReader(org.apache.drill.exec.store.pojo.DynamicPojoRecordReader) ArrayList(java.util.ArrayList) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) RelDataType(org.apache.calcite.rel.type.RelDataType) Project(org.apache.calcite.rel.core.Project) MetadataDirectGroupScan(org.apache.drill.exec.store.direct.MetadataDirectGroupScan) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) Aggregate(org.apache.calcite.rel.core.Aggregate) ScanStats(org.apache.drill.exec.physical.base.ScanStats)

Aggregations

PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)38 DrillScanRel (org.apache.drill.exec.planner.logical.DrillScanRel)16 RexNode (org.apache.calcite.rex.RexNode)13 GroupScan (org.apache.drill.exec.physical.base.GroupScan)13 TableScan (org.apache.calcite.rel.core.TableScan)12 RexBuilder (org.apache.calcite.rex.RexBuilder)12 RelDataType (org.apache.calcite.rel.type.RelDataType)11 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)10 IOException (java.io.IOException)9 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)8 SchemaPath (org.apache.drill.common.expression.SchemaPath)8 DrillFilterRel (org.apache.drill.exec.planner.logical.DrillFilterRel)8 HiveScan (org.apache.drill.exec.store.hive.HiveScan)8 RelNode (org.apache.calcite.rel.RelNode)7 AggregateCall (org.apache.calcite.rel.core.AggregateCall)6 SqlCountAggFunction (org.apache.calcite.sql.fun.SqlCountAggFunction)6 DrillProjectRel (org.apache.drill.exec.planner.logical.DrillProjectRel)6 List (java.util.List)5 FormatSelection (org.apache.drill.exec.store.dfs.FormatSelection)5 ArrayList (java.util.ArrayList)4