Search in sources :

Example 1 with MetadataAggregateContext

use of org.apache.drill.exec.metastore.analyze.MetadataAggregateContext in project drill by apache.

the class MetastoreAnalyzeTableHandler method getTableAggRelNode.

private DrillRel getTableAggRelNode(DrillRel convertedRelNode, boolean createNewAggregations, List<SchemaPath> statisticsColumns, SchemaPath locationField, MetadataHandlerContext handlerContext) {
    SchemaPath lastModifiedTimeField = SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL));
    List<SchemaPath> metadataColumns = Arrays.asList(locationField, lastModifiedTimeField);
    MetadataAggregateContext aggregateContext = MetadataAggregateContext.builder().groupByExpressions(Collections.emptyList()).interestingColumns(statisticsColumns).createNewAggregations(createNewAggregations).metadataColumns(metadataColumns).metadataLevel(MetadataType.TABLE).build();
    convertedRelNode = new MetadataAggRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, aggregateContext);
    convertedRelNode = new MetadataHandlerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, handlerContext);
    return convertedRelNode;
}
Also used : MetadataAggRel(org.apache.drill.exec.planner.logical.MetadataAggRel) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataHandlerRel(org.apache.drill.exec.planner.logical.MetadataHandlerRel) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext)

Example 2 with MetadataAggregateContext

use of org.apache.drill.exec.metastore.analyze.MetadataAggregateContext in project drill by apache.

the class MetastoreAnalyzeTableHandler method getSegmentAggRelNode.

private DrillRel getSegmentAggRelNode(List<NamedExpression> segmentExpressions, DrillRel convertedRelNode, boolean createNewAggregations, List<SchemaPath> statisticsColumns, SchemaPath locationField, int segmentLevel, MetadataHandlerContext handlerContext) {
    SchemaPath lastModifiedTimeField = SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL));
    List<SchemaPath> metadataColumns = Arrays.asList(lastModifiedTimeField, locationField);
    List<NamedExpression> groupByExpressions = new ArrayList<>(segmentExpressions);
    MetadataAggregateContext aggregateContext = MetadataAggregateContext.builder().groupByExpressions(groupByExpressions.subList(0, segmentLevel)).interestingColumns(statisticsColumns).createNewAggregations(createNewAggregations).metadataColumns(metadataColumns).metadataLevel(MetadataType.SEGMENT).build();
    convertedRelNode = new MetadataAggRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, aggregateContext);
    convertedRelNode = new MetadataHandlerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, handlerContext);
    return convertedRelNode;
}
Also used : MetadataAggRel(org.apache.drill.exec.planner.logical.MetadataAggRel) SchemaPath(org.apache.drill.common.expression.SchemaPath) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) ArrayList(java.util.ArrayList) MetadataHandlerRel(org.apache.drill.exec.planner.logical.MetadataHandlerRel) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext)

Example 3 with MetadataAggregateContext

use of org.apache.drill.exec.metastore.analyze.MetadataAggregateContext in project drill by apache.

the class MetastoreAnalyzeTableHandler method getFileAggRelNode.

private DrillRel getFileAggRelNode(List<NamedExpression> segmentExpressions, DrillRel convertedRelNode, boolean createNewAggregations, List<SchemaPath> statisticsColumns, SchemaPath locationField, MetadataHandlerContext handlerContext) {
    SchemaPath lastModifiedTimeField = SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL));
    List<SchemaPath> metadataColumns = Arrays.asList(lastModifiedTimeField, locationField);
    NamedExpression locationExpression = new NamedExpression(locationField, FieldReference.getWithQuotedRef(MetastoreAnalyzeConstants.LOCATION_FIELD));
    List<NamedExpression> fileGroupByExpressions = new ArrayList<>(segmentExpressions);
    fileGroupByExpressions.add(locationExpression);
    MetadataAggregateContext aggregateContext = MetadataAggregateContext.builder().groupByExpressions(fileGroupByExpressions).interestingColumns(statisticsColumns).createNewAggregations(createNewAggregations).metadataColumns(metadataColumns).metadataLevel(MetadataType.FILE).build();
    convertedRelNode = new MetadataAggRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, aggregateContext);
    convertedRelNode = new MetadataHandlerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, handlerContext);
    return convertedRelNode;
}
Also used : MetadataAggRel(org.apache.drill.exec.planner.logical.MetadataAggRel) SchemaPath(org.apache.drill.common.expression.SchemaPath) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) ArrayList(java.util.ArrayList) MetadataHandlerRel(org.apache.drill.exec.planner.logical.MetadataHandlerRel) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext)

Example 4 with MetadataAggregateContext

use of org.apache.drill.exec.metastore.analyze.MetadataAggregateContext in project drill by apache.

the class ConvertMetadataAggregateToDirectScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    MetadataAggRel agg = call.rel(0);
    DrillScanRel scan = call.rel(1);
    GroupScan oldGrpScan = scan.getGroupScan();
    PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
    // Only apply the rule for parquet group scan and for the case when required column metadata is present
    if (!(oldGrpScan instanceof ParquetGroupScan) || (oldGrpScan.getTableMetadata().getInterestingColumns() != null && !oldGrpScan.getTableMetadata().getInterestingColumns().containsAll(agg.getContext().interestingColumns()))) {
        return;
    }
    try {
        DirectGroupScan directScan = buildDirectScan(agg.getContext().interestingColumns(), scan, settings);
        if (directScan == null) {
            logger.warn("Unable to use parquet metadata for ANALYZE since some required metadata is absent within parquet metadata");
            return;
        }
        RelNode converted = new DrillDirectScanRel(scan.getCluster(), scan.getTraitSet().plus(DrillRel.DRILL_LOGICAL), directScan, scan.getRowType());
        if (agg.getContext().metadataLevel() != MetadataType.ROW_GROUP) {
            MetadataAggregateContext updatedContext = agg.getContext().toBuilder().createNewAggregations(false).build();
            converted = new MetadataAggRel(agg.getCluster(), agg.getTraitSet(), converted, updatedContext);
        }
        call.transformTo(converted);
    } catch (Exception e) {
        logger.warn("Unable to use parquet metadata for ANALYZE: {}", e.getMessage(), e);
    }
}
Also used : ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) DirectGroupScan(org.apache.drill.exec.store.direct.DirectGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelNode(org.apache.calcite.rel.RelNode) DirectGroupScan(org.apache.drill.exec.store.direct.DirectGroupScan) ParquetGroupScan(org.apache.drill.exec.store.parquet.ParquetGroupScan) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext) IOException(java.io.IOException)

Example 5 with MetadataAggregateContext

use of org.apache.drill.exec.metastore.analyze.MetadataAggregateContext in project drill by apache.

the class MetastoreAnalyzeTableHandler method getRowGroupAggRelNode.

private DrillRel getRowGroupAggRelNode(List<NamedExpression> segmentExpressions, DrillRel convertedRelNode, boolean createNewAggregations, List<SchemaPath> statisticsColumns, MetadataHandlerContext handlerContext) {
    SchemaPath locationField = SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_FQN_COLUMN_LABEL));
    SchemaPath lastModifiedTimeField = SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_LAST_MODIFIED_TIME_COLUMN_LABEL));
    String rowGroupIndexColumn = config.getContext().getOptions().getString(ExecConstants.IMPLICIT_ROW_GROUP_INDEX_COLUMN_LABEL);
    SchemaPath rgiField = SchemaPath.getSimplePath(rowGroupIndexColumn);
    List<NamedExpression> rowGroupGroupByExpressions = getRowGroupExpressions(segmentExpressions, locationField, rowGroupIndexColumn, rgiField);
    SchemaPath rowGroupStartField = SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_ROW_GROUP_START_COLUMN_LABEL));
    SchemaPath rowGroupLengthField = SchemaPath.getSimplePath(config.getContext().getOptions().getString(ExecConstants.IMPLICIT_ROW_GROUP_LENGTH_COLUMN_LABEL));
    List<SchemaPath> metadataColumns = Arrays.asList(lastModifiedTimeField, locationField, rgiField, rowGroupStartField, rowGroupLengthField);
    MetadataAggregateContext aggregateContext = MetadataAggregateContext.builder().groupByExpressions(rowGroupGroupByExpressions).interestingColumns(statisticsColumns).createNewAggregations(createNewAggregations).metadataColumns(metadataColumns).metadataLevel(MetadataType.ROW_GROUP).build();
    convertedRelNode = new MetadataAggRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, aggregateContext);
    convertedRelNode = new MetadataHandlerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, handlerContext);
    return convertedRelNode;
}
Also used : MetadataAggRel(org.apache.drill.exec.planner.logical.MetadataAggRel) SchemaPath(org.apache.drill.common.expression.SchemaPath) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) MetadataHandlerRel(org.apache.drill.exec.planner.logical.MetadataHandlerRel) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext)

Aggregations

MetadataAggregateContext (org.apache.drill.exec.metastore.analyze.MetadataAggregateContext)5 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 MetadataAggRel (org.apache.drill.exec.planner.logical.MetadataAggRel)4 MetadataHandlerRel (org.apache.drill.exec.planner.logical.MetadataHandlerRel)4 NamedExpression (org.apache.drill.common.logical.data.NamedExpression)3 ArrayList (java.util.ArrayList)2 IOException (java.io.IOException)1 RelNode (org.apache.calcite.rel.RelNode)1 GroupScan (org.apache.drill.exec.physical.base.GroupScan)1 PlannerSettings (org.apache.drill.exec.planner.physical.PlannerSettings)1 DirectGroupScan (org.apache.drill.exec.store.direct.DirectGroupScan)1 ParquetGroupScan (org.apache.drill.exec.store.parquet.ParquetGroupScan)1