use of org.apache.drill.exec.planner.logical.DrillAnalyzeRel in project drill by apache.
the class MetastoreAnalyzeTableHandler method convertToDrel.
/**
* Converts to Drill logical plan
*/
private DrillRel convertToDrel(RelNode relNode, SqlMetastoreAnalyzeTable sqlAnalyzeTable, DrillTableInfo drillTableInfo) throws ForemanSetupException, IOException {
RelBuilder relBuilder = LOGICAL_BUILDER.create(relNode.getCluster(), null);
DrillTable table = drillTableInfo.drillTable();
AnalyzeInfoProvider analyzeInfoProvider = table.getGroupScan().getAnalyzeInfoProvider();
List<String> schemaPath = drillTableInfo.schemaPath();
String pluginName = schemaPath.get(0);
String workspaceName = Strings.join(schemaPath.subList(1, schemaPath.size()), AbstractSchema.SCHEMA_SEPARATOR);
String tableName = drillTableInfo.tableName();
TableInfo tableInfo = TableInfo.builder().name(tableName).owner(table.getUserName()).type(analyzeInfoProvider.getTableTypeName()).storagePlugin(pluginName).workspace(workspaceName).build();
ColumnNamesOptions columnNamesOptions = new ColumnNamesOptions(context.getOptions());
List<String> segmentColumns = analyzeInfoProvider.getSegmentColumns(table, columnNamesOptions).stream().map(SchemaPath::getRootSegmentPath).collect(Collectors.toList());
List<NamedExpression> segmentExpressions = segmentColumns.stream().map(partitionName -> new NamedExpression(SchemaPath.getSimplePath(partitionName), FieldReference.getWithQuotedRef(partitionName))).collect(Collectors.toList());
List<MetadataInfo> rowGroupsInfo = Collections.emptyList();
List<MetadataInfo> filesInfo = Collections.emptyList();
Multimap<Integer, MetadataInfo> segments = ArrayListMultimap.create();
BasicTablesRequests basicRequests;
try {
basicRequests = context.getMetastoreRegistry().get().tables().basicRequests();
} catch (MetastoreException e) {
logger.error("Error when obtaining Metastore instance for table {}", tableName, e);
DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, e.getMessage()).build());
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
MetadataType metadataLevel = getMetadataType(sqlAnalyzeTable);
List<SchemaPath> interestingColumns = sqlAnalyzeTable.getFieldNames();
MetastoreTableInfo metastoreTableInfo = basicRequests.metastoreTableInfo(tableInfo);
List<MetadataInfo> allMetaToHandle = null;
List<MetadataInfo> metadataToRemove = new ArrayList<>();
// whether incremental analyze may be produced
if (metastoreTableInfo.isExists()) {
RelNode finalRelNode = relNode;
CheckedSupplier<TableScan, SqlUnsupportedException> tableScanSupplier = () -> DrillRelOptUtil.findScan(convertToDrel(finalRelNode.getInput(0)));
MetadataInfoCollector metadataInfoCollector = analyzeInfoProvider.getMetadataInfoCollector(basicRequests, tableInfo, (FormatSelection) table.getSelection(), context.getPlannerSettings(), tableScanSupplier, interestingColumns, metadataLevel, segmentColumns.size());
if (!metadataInfoCollector.isOutdated()) {
DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, "Table metadata is up to date, analyze wasn't performed.").build());
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
// updates scan to read updated / new files, pass removed files into metadata handler
relNode = relNode.copy(relNode.getTraitSet(), Collections.singletonList(metadataInfoCollector.getPrunedScan()));
filesInfo = metadataInfoCollector.getFilesInfo();
segments = metadataInfoCollector.getSegmentsInfo();
rowGroupsInfo = metadataInfoCollector.getRowGroupsInfo();
allMetaToHandle = metadataInfoCollector.getAllMetaToHandle();
metadataToRemove = metadataInfoCollector.getMetadataToRemove();
}
// Step 2: constructs plan for producing analyze
DrillRel convertedRelNode = convertToRawDrel(relNode);
boolean createNewAggregations = true;
// List of columns for which statistics should be collected: interesting columns + segment columns
List<SchemaPath> statisticsColumns = interestingColumns == null ? null : new ArrayList<>(interestingColumns);
if (statisticsColumns != null) {
segmentColumns.stream().map(SchemaPath::getSimplePath).forEach(statisticsColumns::add);
}
SchemaPath locationField = analyzeInfoProvider.getLocationField(columnNamesOptions);
if (analyzeInfoProvider.supportsMetadataType(MetadataType.ROW_GROUP) && metadataLevel.includes(MetadataType.ROW_GROUP)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(rowGroupsInfo).metadataType(MetadataType.ROW_GROUP).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getRowGroupAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, handlerContext);
createNewAggregations = false;
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.FILE) && metadataLevel.includes(MetadataType.FILE)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(filesInfo).metadataType(MetadataType.FILE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getFileAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
createNewAggregations = false;
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.SEGMENT) && metadataLevel.includes(MetadataType.SEGMENT)) {
for (int i = segmentExpressions.size(); i > 0; i--) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(new ArrayList<>(segments.get(i - 1))).metadataType(MetadataType.SEGMENT).depthLevel(i).segmentColumns(segmentColumns.subList(0, i)).build();
convertedRelNode = getSegmentAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, i, handlerContext);
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
createNewAggregations = false;
}
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.TABLE) && metadataLevel.includes(MetadataType.TABLE)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(Collections.emptyList()).metadataType(MetadataType.TABLE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getTableAggRelNode(convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
} else {
throw new IllegalStateException("Analyze table with NONE level");
}
boolean useStatistics = context.getOptions().getOption(PlannerSettings.STATISTICS_USE);
SqlNumericLiteral samplePercentLiteral = sqlAnalyzeTable.getSamplePercent();
double samplePercent = samplePercentLiteral == null ? 100.0 : samplePercentLiteral.intValue(true);
// Step 3: adds rel nodes for producing statistics analyze if required
RelNode analyzeRel = useStatistics ? new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertToRawDrel(relNode), samplePercent) : convertToRawDrel(relBuilder.values(new String[] { "" }, "").build());
MetadataControllerContext metadataControllerContext = MetadataControllerContext.builder().tableInfo(tableInfo).metastoreTableInfo(metastoreTableInfo).location(((FormatSelection) table.getSelection()).getSelection().getSelectionRoot()).interestingColumns(interestingColumns).segmentColumns(segmentColumns).metadataToHandle(allMetaToHandle).metadataToRemove(metadataToRemove).analyzeMetadataLevel(metadataLevel).build();
convertedRelNode = new MetadataControllerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, analyzeRel, metadataControllerContext);
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
use of org.apache.drill.exec.planner.logical.DrillAnalyzeRel in project drill by apache.
the class AnalyzeTableHandler method convertToDrel.
/* Converts to Drill logical plan */
protected DrillRel convertToDrel(RelNode relNode, AbstractSchema schema, String analyzeTableName, double samplePercent) throws SqlUnsupportedException {
DrillRel convertedRelNode = convertToRawDrel(relNode);
final RelNode analyzeRel = new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, samplePercent);
final RelNode writerRel = new DrillWriterRel(analyzeRel.getCluster(), analyzeRel.getTraitSet(), analyzeRel, schema.appendToStatsTable(analyzeTableName));
return new DrillScreenRel(writerRel.getCluster(), writerRel.getTraitSet(), writerRel);
}
use of org.apache.drill.exec.planner.logical.DrillAnalyzeRel in project drill by apache.
the class AnalyzePrule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final DrillAnalyzeRel analyze = call.rel(0);
final RelNode input = call.rel(1);
final SingleRel newAnalyze;
final RelTraitSet singleDistTrait = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON);
// Generate parallel ANALYZE plan:
// Writer<-Unpivot<-StatsAgg(Phase2)<-Exchange<-StatsAgg(Phase1)<-Scan
final RelTraitSet traits = input.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.DEFAULT);
RelNode convertedInput = convert(input, traits);
final List<String> mapFields1 = Lists.newArrayList(PHASE_1_FUNCTIONS);
final Map<String, String> mapFields2 = Maps.newHashMap(PHASE_2_FUNCTIONS);
final List<String> mapFields3 = Lists.newArrayList(UNPIVOT_FUNCTIONS);
mapFields1.add(0, Statistic.COLNAME);
mapFields1.add(1, Statistic.COLTYPE);
mapFields2.put(Statistic.COLNAME, Statistic.COLNAME);
mapFields2.put(Statistic.COLTYPE, Statistic.COLTYPE);
mapFields3.add(0, Statistic.COLNAME);
mapFields3.add(1, Statistic.COLTYPE);
// STATSAGG->EXCHANGE->STATSMERGE->UNPIVOT
if (analyze.getSamplePercent() < 100.0) {
// If a sample samplePercent is specified add a filter for Bernoulli sampling
RexBuilder builder = convertedInput.getCluster().getRexBuilder();
RexNode sampleCondition;
if (PrelUtil.getSettings(convertedInput.getCluster()).getOptions().getOption(ExecConstants.DETERMINISTIC_SAMPLING_VALIDATOR)) {
sampleCondition = builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, builder.makeCall(SqlStdOperatorTable.RAND, builder.makeExactLiteral(BigDecimal.valueOf(1))), builder.makeExactLiteral(BigDecimal.valueOf(analyze.getSamplePercent() / 100.0)));
} else {
sampleCondition = builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, builder.makeCall(SqlStdOperatorTable.RAND), builder.makeExactLiteral(BigDecimal.valueOf(analyze.getSamplePercent() / 100.0)));
}
convertedInput = new FilterPrel(convertedInput.getCluster(), convertedInput.getTraitSet(), convertedInput, sampleCondition);
}
final StatsAggPrel statsAggPrel = new StatsAggPrel(analyze.getCluster(), traits, convertedInput, PHASE_1_FUNCTIONS);
UnionExchangePrel exch = new UnionExchangePrel(statsAggPrel.getCluster(), singleDistTrait, statsAggPrel);
final StatsMergePrel statsMergePrel = new StatsMergePrel(exch.getCluster(), singleDistTrait, exch, mapFields2, analyze.getSamplePercent());
newAnalyze = new UnpivotMapsPrel(statsMergePrel.getCluster(), singleDistTrait, statsMergePrel, mapFields3);
call.transformTo(newAnalyze);
}
Aggregations