Examples with DrillAnalyzeRel - org.apache.drill.exec.planner.logical.DrillAnalyzeRel

Example 1 with DrillAnalyzeRel

use of org.apache.drill.exec.planner.logical.DrillAnalyzeRel in project drill by apache.

the class MetastoreAnalyzeTableHandler method convertToDrel.

/**
 * Converts to Drill logical plan
 */
private DrillRel convertToDrel(RelNode relNode, SqlMetastoreAnalyzeTable sqlAnalyzeTable, DrillTableInfo drillTableInfo) throws ForemanSetupException, IOException {
    RelBuilder relBuilder = LOGICAL_BUILDER.create(relNode.getCluster(), null);
    DrillTable table = drillTableInfo.drillTable();
    AnalyzeInfoProvider analyzeInfoProvider = table.getGroupScan().getAnalyzeInfoProvider();
    List<String> schemaPath = drillTableInfo.schemaPath();
    String pluginName = schemaPath.get(0);
    String workspaceName = Strings.join(schemaPath.subList(1, schemaPath.size()), AbstractSchema.SCHEMA_SEPARATOR);
    String tableName = drillTableInfo.tableName();
    TableInfo tableInfo = TableInfo.builder().name(tableName).owner(table.getUserName()).type(analyzeInfoProvider.getTableTypeName()).storagePlugin(pluginName).workspace(workspaceName).build();
    ColumnNamesOptions columnNamesOptions = new ColumnNamesOptions(context.getOptions());
    List<String> segmentColumns = analyzeInfoProvider.getSegmentColumns(table, columnNamesOptions).stream().map(SchemaPath::getRootSegmentPath).collect(Collectors.toList());
    List<NamedExpression> segmentExpressions = segmentColumns.stream().map(partitionName -> new NamedExpression(SchemaPath.getSimplePath(partitionName), FieldReference.getWithQuotedRef(partitionName))).collect(Collectors.toList());
    List<MetadataInfo> rowGroupsInfo = Collections.emptyList();
    List<MetadataInfo> filesInfo = Collections.emptyList();
    Multimap<Integer, MetadataInfo> segments = ArrayListMultimap.create();
    BasicTablesRequests basicRequests;
    try {
        basicRequests = context.getMetastoreRegistry().get().tables().basicRequests();
    } catch (MetastoreException e) {
        logger.error("Error when obtaining Metastore instance for table {}", tableName, e);
        DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, e.getMessage()).build());
        return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
    }
    MetadataType metadataLevel = getMetadataType(sqlAnalyzeTable);
    List<SchemaPath> interestingColumns = sqlAnalyzeTable.getFieldNames();
    MetastoreTableInfo metastoreTableInfo = basicRequests.metastoreTableInfo(tableInfo);
    List<MetadataInfo> allMetaToHandle = null;
    List<MetadataInfo> metadataToRemove = new ArrayList<>();
    // whether incremental analyze may be produced
    if (metastoreTableInfo.isExists()) {
        RelNode finalRelNode = relNode;
        CheckedSupplier<TableScan, SqlUnsupportedException> tableScanSupplier = () -> DrillRelOptUtil.findScan(convertToDrel(finalRelNode.getInput(0)));
        MetadataInfoCollector metadataInfoCollector = analyzeInfoProvider.getMetadataInfoCollector(basicRequests, tableInfo, (FormatSelection) table.getSelection(), context.getPlannerSettings(), tableScanSupplier, interestingColumns, metadataLevel, segmentColumns.size());
        if (!metadataInfoCollector.isOutdated()) {
            DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, "Table metadata is up to date, analyze wasn't performed.").build());
            return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
        }
        // updates scan to read updated / new files, pass removed files into metadata handler
        relNode = relNode.copy(relNode.getTraitSet(), Collections.singletonList(metadataInfoCollector.getPrunedScan()));
        filesInfo = metadataInfoCollector.getFilesInfo();
        segments = metadataInfoCollector.getSegmentsInfo();
        rowGroupsInfo = metadataInfoCollector.getRowGroupsInfo();
        allMetaToHandle = metadataInfoCollector.getAllMetaToHandle();
        metadataToRemove = metadataInfoCollector.getMetadataToRemove();
    }
    // Step 2: constructs plan for producing analyze
    DrillRel convertedRelNode = convertToRawDrel(relNode);
    boolean createNewAggregations = true;
    // List of columns for which statistics should be collected: interesting columns + segment columns
    List<SchemaPath> statisticsColumns = interestingColumns == null ? null : new ArrayList<>(interestingColumns);
    if (statisticsColumns != null) {
        segmentColumns.stream().map(SchemaPath::getSimplePath).forEach(statisticsColumns::add);
    }
    SchemaPath locationField = analyzeInfoProvider.getLocationField(columnNamesOptions);
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.ROW_GROUP) && metadataLevel.includes(MetadataType.ROW_GROUP)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(rowGroupsInfo).metadataType(MetadataType.ROW_GROUP).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getRowGroupAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, handlerContext);
        createNewAggregations = false;
        locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.FILE) && metadataLevel.includes(MetadataType.FILE)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(filesInfo).metadataType(MetadataType.FILE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getFileAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
        locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
        createNewAggregations = false;
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.SEGMENT) && metadataLevel.includes(MetadataType.SEGMENT)) {
        for (int i = segmentExpressions.size(); i > 0; i--) {
            MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(new ArrayList<>(segments.get(i - 1))).metadataType(MetadataType.SEGMENT).depthLevel(i).segmentColumns(segmentColumns.subList(0, i)).build();
            convertedRelNode = getSegmentAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, i, handlerContext);
            locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
            createNewAggregations = false;
        }
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.TABLE) && metadataLevel.includes(MetadataType.TABLE)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(Collections.emptyList()).metadataType(MetadataType.TABLE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getTableAggRelNode(convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
    } else {
        throw new IllegalStateException("Analyze table with NONE level");
    }
    boolean useStatistics = context.getOptions().getOption(PlannerSettings.STATISTICS_USE);
    SqlNumericLiteral samplePercentLiteral = sqlAnalyzeTable.getSamplePercent();
    double samplePercent = samplePercentLiteral == null ? 100.0 : samplePercentLiteral.intValue(true);
    // Step 3: adds rel nodes for producing statistics analyze if required
    RelNode analyzeRel = useStatistics ? new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertToRawDrel(relNode), samplePercent) : convertToRawDrel(relBuilder.values(new String[] { "" }, "").build());
    MetadataControllerContext metadataControllerContext = MetadataControllerContext.builder().tableInfo(tableInfo).metastoreTableInfo(metastoreTableInfo).location(((FormatSelection) table.getSelection()).getSelection().getSelectionRoot()).interestingColumns(interestingColumns).segmentColumns(segmentColumns).metadataToHandle(allMetaToHandle).metadataToRemove(metadataToRemove).analyzeMetadataLevel(metadataLevel).build();
    convertedRelNode = new MetadataControllerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, analyzeRel, metadataControllerContext);
    return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}

Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) UserException(org.apache.drill.common.exceptions.UserException) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) LoggerFactory(org.slf4j.LoggerFactory) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) MetadataHandlerContext(org.apache.drill.exec.metastore.analyze.MetadataHandlerContext) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) SqlNode(org.apache.calcite.sql.SqlNode) RelBuilder(org.apache.calcite.tools.RelBuilder) FieldReference(org.apache.drill.common.expression.FieldReference) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) SqlSelect(org.apache.calcite.sql.SqlSelect) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) List(java.util.List) ValidationException(org.apache.calcite.tools.ValidationException) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException) SqlNumericLiteral(org.apache.calcite.sql.SqlNumericLiteral) MetadataInfoCollector(org.apache.drill.exec.metastore.analyze.MetadataInfoCollector) ExecConstants(org.apache.drill.exec.ExecConstants) SqlMetastoreAnalyzeTable(org.apache.drill.exec.planner.sql.parser.SqlMetastoreAnalyzeTable) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext) TableScan(org.apache.calcite.rel.core.TableScan) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreException(org.apache.drill.metastore.exceptions.MetastoreException) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) Pointer(org.apache.drill.exec.util.Pointer) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) ArrayList(java.util.ArrayList) SqlLiteral(org.apache.calcite.sql.SqlLiteral) SqlUnsupportedException(org.apache.drill.exec.work.foreman.SqlUnsupportedException) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) MetadataAggRel(org.apache.drill.exec.planner.logical.MetadataAggRel) DrillRelOptUtil(org.apache.drill.exec.planner.common.DrillRelOptUtil) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) MetadataHandlerRel(org.apache.drill.exec.planner.logical.MetadataHandlerRel) CheckedSupplier(org.apache.drill.common.util.function.CheckedSupplier) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) Logger(org.slf4j.Logger) DrillAnalyzeRel(org.apache.drill.exec.planner.logical.DrillAnalyzeRel) IOException(java.io.IOException) RelNode(org.apache.calcite.rel.RelNode) Prel(org.apache.drill.exec.planner.physical.Prel) AbstractSchema(org.apache.drill.exec.store.AbstractSchema) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelConversionException(org.apache.calcite.tools.RelConversionException) Strings(org.apache.parquet.Strings) DrillScreenRel(org.apache.drill.exec.planner.logical.DrillScreenRel) PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) LOGICAL_BUILDER(org.apache.drill.exec.planner.logical.DrillRelFactories.LOGICAL_BUILDER) SqlNodeList(org.apache.calcite.sql.SqlNodeList) MetadataControllerRel(org.apache.drill.exec.planner.logical.MetadataControllerRel) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) AnalyzeInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) AnalyzeInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider) ArrayList(java.util.ArrayList) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) DrillAnalyzeRel(org.apache.drill.exec.planner.logical.DrillAnalyzeRel) SchemaPath(org.apache.drill.common.expression.SchemaPath) SqlUnsupportedException(org.apache.drill.exec.work.foreman.SqlUnsupportedException) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) MetadataHandlerContext(org.apache.drill.exec.metastore.analyze.MetadataHandlerContext) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) MetadataControllerRel(org.apache.drill.exec.planner.logical.MetadataControllerRel) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) TableScan(org.apache.calcite.rel.core.TableScan) RelBuilder(org.apache.calcite.tools.RelBuilder) MetastoreException(org.apache.drill.metastore.exceptions.MetastoreException) MetadataType(org.apache.drill.metastore.metadata.MetadataType) DrillScreenRel(org.apache.drill.exec.planner.logical.DrillScreenRel) RelNode(org.apache.calcite.rel.RelNode) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) MetadataInfoCollector(org.apache.drill.exec.metastore.analyze.MetadataInfoCollector) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) SqlNumericLiteral(org.apache.calcite.sql.SqlNumericLiteral)

Example 2 with DrillAnalyzeRel

use of org.apache.drill.exec.planner.logical.DrillAnalyzeRel in project drill by apache.

the class AnalyzeTableHandler method convertToDrel.

/* Converts to Drill logical plan */
protected DrillRel convertToDrel(RelNode relNode, AbstractSchema schema, String analyzeTableName, double samplePercent) throws SqlUnsupportedException {
    DrillRel convertedRelNode = convertToRawDrel(relNode);
    final RelNode analyzeRel = new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, samplePercent);
    final RelNode writerRel = new DrillWriterRel(analyzeRel.getCluster(), analyzeRel.getTraitSet(), analyzeRel, schema.appendToStatsTable(analyzeTableName));
    return new DrillScreenRel(writerRel.getCluster(), writerRel.getTraitSet(), writerRel);
}

Also used : DrillAnalyzeRel(org.apache.drill.exec.planner.logical.DrillAnalyzeRel) RelNode(org.apache.calcite.rel.RelNode) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) DrillWriterRel(org.apache.drill.exec.planner.logical.DrillWriterRel) DrillScreenRel(org.apache.drill.exec.planner.logical.DrillScreenRel)

Example 3 with DrillAnalyzeRel

use of org.apache.drill.exec.planner.logical.DrillAnalyzeRel in project drill by apache.

the class AnalyzePrule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final DrillAnalyzeRel analyze = call.rel(0);
    final RelNode input = call.rel(1);
    final SingleRel newAnalyze;
    final RelTraitSet singleDistTrait = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON);
    // Generate parallel ANALYZE plan:
    // Writer<-Unpivot<-StatsAgg(Phase2)<-Exchange<-StatsAgg(Phase1)<-Scan
    final RelTraitSet traits = input.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.DEFAULT);
    RelNode convertedInput = convert(input, traits);
    final List<String> mapFields1 = Lists.newArrayList(PHASE_1_FUNCTIONS);
    final Map<String, String> mapFields2 = Maps.newHashMap(PHASE_2_FUNCTIONS);
    final List<String> mapFields3 = Lists.newArrayList(UNPIVOT_FUNCTIONS);
    mapFields1.add(0, Statistic.COLNAME);
    mapFields1.add(1, Statistic.COLTYPE);
    mapFields2.put(Statistic.COLNAME, Statistic.COLNAME);
    mapFields2.put(Statistic.COLTYPE, Statistic.COLTYPE);
    mapFields3.add(0, Statistic.COLNAME);
    mapFields3.add(1, Statistic.COLTYPE);
    // STATSAGG->EXCHANGE->STATSMERGE->UNPIVOT
    if (analyze.getSamplePercent() < 100.0) {
        // If a sample samplePercent is specified add a filter for Bernoulli sampling
        RexBuilder builder = convertedInput.getCluster().getRexBuilder();
        RexNode sampleCondition;
        if (PrelUtil.getSettings(convertedInput.getCluster()).getOptions().getOption(ExecConstants.DETERMINISTIC_SAMPLING_VALIDATOR)) {
            sampleCondition = builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, builder.makeCall(SqlStdOperatorTable.RAND, builder.makeExactLiteral(BigDecimal.valueOf(1))), builder.makeExactLiteral(BigDecimal.valueOf(analyze.getSamplePercent() / 100.0)));
        } else {
            sampleCondition = builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, builder.makeCall(SqlStdOperatorTable.RAND), builder.makeExactLiteral(BigDecimal.valueOf(analyze.getSamplePercent() / 100.0)));
        }
        convertedInput = new FilterPrel(convertedInput.getCluster(), convertedInput.getTraitSet(), convertedInput, sampleCondition);
    }
    final StatsAggPrel statsAggPrel = new StatsAggPrel(analyze.getCluster(), traits, convertedInput, PHASE_1_FUNCTIONS);
    UnionExchangePrel exch = new UnionExchangePrel(statsAggPrel.getCluster(), singleDistTrait, statsAggPrel);
    final StatsMergePrel statsMergePrel = new StatsMergePrel(exch.getCluster(), singleDistTrait, exch, mapFields2, analyze.getSamplePercent());
    newAnalyze = new UnpivotMapsPrel(statsMergePrel.getCluster(), singleDistTrait, statsMergePrel, mapFields3);
    call.transformTo(newAnalyze);
}

Also used : RelTraitSet(org.apache.calcite.plan.RelTraitSet) SingleRel(org.apache.calcite.rel.SingleRel) DrillAnalyzeRel(org.apache.drill.exec.planner.logical.DrillAnalyzeRel) RelNode(org.apache.calcite.rel.RelNode) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

RelNode (org.apache.calcite.rel.RelNode)3 DrillAnalyzeRel (org.apache.drill.exec.planner.logical.DrillAnalyzeRel)3 DrillRel (org.apache.drill.exec.planner.logical.DrillRel)2 DrillScreenRel (org.apache.drill.exec.planner.logical.DrillScreenRel)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 RelTraitSet (org.apache.calcite.plan.RelTraitSet)1 SingleRel (org.apache.calcite.rel.SingleRel)1 TableScan (org.apache.calcite.rel.core.TableScan)1 RelDataType (org.apache.calcite.rel.type.RelDataType)1 RexBuilder (org.apache.calcite.rex.RexBuilder)1 RexNode (org.apache.calcite.rex.RexNode)1 SqlIdentifier (org.apache.calcite.sql.SqlIdentifier)1 SqlLiteral (org.apache.calcite.sql.SqlLiteral)1 SqlNode (org.apache.calcite.sql.SqlNode)1 SqlNodeList (org.apache.calcite.sql.SqlNodeList)1