use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class PlanningBase method testSqlPlan.
protected void testSqlPlan(String sqlCommands) throws Exception {
final DrillbitContext dbContext = mock(DrillbitContext.class);
final QueryContext context = mock(QueryContext.class);
final String[] sqlStrings = sqlCommands.split(";");
final LocalPersistentStoreProvider provider = new LocalPersistentStoreProvider(config);
provider.start();
final ScanResult scanResult = ClassPathScanner.fromPrescan(config);
final LogicalPlanPersistence logicalPlanPersistence = new LogicalPlanPersistence(config, scanResult);
final SystemOptionManager systemOptions = new SystemOptionManager(logicalPlanPersistence, provider, config);
systemOptions.init();
final UserSession userSession = UserSession.Builder.newBuilder().withOptionManager(systemOptions).build();
final SessionOptionManager sessionOptions = userSession.getOptions();
final QueryOptionManager queryOptions = new QueryOptionManager(sessionOptions);
final ExecutionControls executionControls = new ExecutionControls(queryOptions, DrillbitEndpoint.getDefaultInstance());
when(dbContext.getMetrics()).thenReturn(new MetricRegistry());
when(dbContext.getAllocator()).thenReturn(allocator);
when(dbContext.getConfig()).thenReturn(config);
when(dbContext.getOptionManager()).thenReturn(systemOptions);
when(dbContext.getStoreProvider()).thenReturn(provider);
when(dbContext.getClasspathScan()).thenReturn(scanResult);
when(dbContext.getLpPersistence()).thenReturn(logicalPlanPersistence);
final StoragePluginRegistry registry = new StoragePluginRegistryImpl(dbContext);
registry.init();
final FunctionImplementationRegistry functionRegistry = new FunctionImplementationRegistry(config);
final DrillOperatorTable table = new DrillOperatorTable(functionRegistry, systemOptions);
SchemaConfig schemaConfig = SchemaConfig.newBuilder("foo", context).build();
SchemaPlus root = DynamicSchema.createRootSchema(registry, schemaConfig, new AliasRegistryProvider(dbContext));
when(context.getNewDefaultSchema()).thenReturn(root);
when(context.getLpPersistence()).thenReturn(new LogicalPlanPersistence(config, ClassPathScanner.fromPrescan(config)));
when(context.getStorage()).thenReturn(registry);
when(context.getFunctionRegistry()).thenReturn(functionRegistry);
when(context.getSession()).thenReturn(UserSession.Builder.newBuilder().withOptionManager(sessionOptions).setSupportComplexTypes(true).build());
when(context.getCurrentEndpoint()).thenReturn(DrillbitEndpoint.getDefaultInstance());
when(context.getActiveEndpoints()).thenReturn(ImmutableList.of(DrillbitEndpoint.getDefaultInstance()));
when(context.getPlannerSettings()).thenReturn(new PlannerSettings(queryOptions, functionRegistry));
when(context.getOptions()).thenReturn(queryOptions);
when(context.getConfig()).thenReturn(config);
when(context.getDrillOperatorTable()).thenReturn(table);
when(context.getAllocator()).thenReturn(allocator);
when(context.getExecutionControls()).thenReturn(executionControls);
when(context.getLpPersistence()).thenReturn(logicalPlanPersistence);
// mocks for org.apache.drill.TestTpchPlanning#tpch06 test.
// With changes for decimal types, subtract udf for decimals is used.
when(context.getManagedBuffer()).thenReturn(allocator.buffer(4));
when(context.getConstantValueHolder(eq("0.03"), eq(TypeProtos.MinorType.VARDECIMAL), Matchers.<Function<DrillBuf, ValueHolder>>any())).thenReturn(ValueHolderHelper.getVarDecimalHolder(allocator.buffer(4), "0.03"));
when(context.getConstantValueHolder(eq("0.01"), eq(TypeProtos.MinorType.VARDECIMAL), Matchers.<Function<DrillBuf, ValueHolder>>any())).thenReturn(ValueHolderHelper.getVarDecimalHolder(allocator.buffer(4), "0.01"));
when(context.getOption(anyString())).thenCallRealMethod();
for (final String sql : sqlStrings) {
if (sql.trim().isEmpty()) {
continue;
}
@SuppressWarnings("unused") final PhysicalPlan p = DrillSqlWorker.getPlan(context, sql);
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class ConvertHiveParquetScanToDrillParquetScan method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
try {
final DrillScanRel hiveScanRel = call.rel(0);
final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
final String partitionColumnLabel = settings.getFsPartitionColumnLabel();
final Table hiveTable = hiveScan.getHiveReadEntry().getTable();
final HiveReadEntry hiveReadEntry = hiveScan.getHiveReadEntry();
final HiveMetadataProvider hiveMetadataProvider = new HiveMetadataProvider(hiveScan.getUserName(), hiveReadEntry, hiveScan.getHiveConf());
final List<HiveMetadataProvider.LogicalInputSplit> logicalInputSplits = hiveMetadataProvider.getInputSplits(hiveReadEntry);
if (logicalInputSplits.isEmpty()) {
// table is empty, use original scan
return;
}
final Map<String, String> partitionColMapping = getPartitionColMapping(hiveTable, partitionColumnLabel);
final DrillScanRel nativeScanRel = createNativeScanRel(partitionColMapping, hiveScanRel, logicalInputSplits, settings.getOptions());
if (hiveScanRel.getRowType().getFieldCount() == 0) {
call.transformTo(nativeScanRel);
} else {
final DrillProjectRel projectRel = createProjectRel(hiveScanRel, partitionColMapping, nativeScanRel);
call.transformTo(projectRel);
}
/*
Drill native scan should take precedence over Hive since it's more efficient and faster.
Hive does not always give correct costing (i.e. for external tables Hive does not have number of rows
and we calculate them approximately). On the contrary, Drill calculates number of rows exactly
and thus Hive Scan can be chosen instead of Drill native scan because costings allegedly lower for Hive.
To ensure Drill native scan will be chosen, reduce Hive scan importance to 0.
*/
call.getPlanner().setImportance(hiveScanRel, 0.0);
} catch (final Exception e) {
logger.warn("Failed to convert HiveScan to HiveDrillNativeParquetScan", e);
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class ConvertMetadataAggregateToDirectScanRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
MetadataAggRel agg = call.rel(0);
DrillScanRel scan = call.rel(1);
GroupScan oldGrpScan = scan.getGroupScan();
PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
// Only apply the rule for parquet group scan and for the case when required column metadata is present
if (!(oldGrpScan instanceof ParquetGroupScan) || (oldGrpScan.getTableMetadata().getInterestingColumns() != null && !oldGrpScan.getTableMetadata().getInterestingColumns().containsAll(agg.getContext().interestingColumns()))) {
return;
}
try {
DirectGroupScan directScan = buildDirectScan(agg.getContext().interestingColumns(), scan, settings);
if (directScan == null) {
logger.warn("Unable to use parquet metadata for ANALYZE since some required metadata is absent within parquet metadata");
return;
}
RelNode converted = new DrillDirectScanRel(scan.getCluster(), scan.getTraitSet().plus(DrillRel.DRILL_LOGICAL), directScan, scan.getRowType());
if (agg.getContext().metadataLevel() != MetadataType.ROW_GROUP) {
MetadataAggregateContext updatedContext = agg.getContext().toBuilder().createNewAggregations(false).build();
converted = new MetadataAggRel(agg.getCluster(), agg.getTraitSet(), converted, updatedContext);
}
call.transformTo(converted);
} catch (Exception e) {
logger.warn("Unable to use parquet metadata for ANALYZE: {}", e.getMessage(), e);
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class IndexSelector method getCandidateIndexes.
/**
* Run the index selection algorithm and return the top N indexes
*/
public void getCandidateIndexes(IndexConditionInfo.Builder infoBuilder, List<IndexGroup> coveringIndexes, List<IndexGroup> nonCoveringIndexes, List<IndexGroup> intersectIndexes) {
RelOptPlanner planner = indexContext.getCall().getPlanner();
PlannerSettings settings = PrelUtil.getPlannerSettings(planner);
List<IndexGroup> candidateIndexes = Lists.newArrayList();
logger.info("index_plan_info: Analyzing {} indexes for prefix matches: {}", indexPropList.size(), indexPropList);
// analysis phase
for (IndexProperties p : indexPropList) {
analyzePrefixMatches(p);
// can satisfy required collation
if (p.numLeadingFilters() > 0 || p.satisfiesCollation()) {
double selThreshold = p.isCovering() ? settings.getIndexCoveringSelThreshold() : settings.getIndexNonCoveringSelThreshold();
// all when full table scan is disable to avoid a CannotPlanException
if (settings.isDisableFullTableScan() || p.getLeadingSelectivity() <= selThreshold) {
IndexGroup index = new IndexGroup();
index.addIndexProp(p);
candidateIndexes.add(index);
} else {
if (p.getLeadingSelectivity() > selThreshold) {
logger.debug("Skipping index {}. The leading selectivity {} is larger than threshold {}", p.getIndexDesc().getIndexName(), p.getLeadingSelectivity(), selThreshold);
}
}
}
}
if (candidateIndexes.size() == 0) {
logger.info("index_plan_info: No suitable indexes found !");
return;
}
int max_candidate_indexes = (int) PrelUtil.getPlannerSettings(planner).getIndexMaxChosenIndexesPerTable();
// to be exercised even for few indexes
if (candidateIndexes.size() > 1) {
Collections.sort(candidateIndexes, new IndexComparator(planner, builder));
}
// Generate index intersections for ranking
addIndexIntersections(candidateIndexes, infoBuilder, settings.getMaxIndexesToIntersect());
// Sort again after intersect plan is added to the list
if (candidateIndexes.size() > 1) {
Collections.sort(candidateIndexes, new IndexComparator(planner, builder));
}
logger.info("index_plan_info: The top ranked indexes are: ");
int count = 0;
boolean foundCovering = false;
boolean foundCoveringCollation = false;
boolean foundNonCoveringCollation = false;
// pick the best N indexes
for (int i = 0; i < candidateIndexes.size(); i++) {
IndexGroup index = candidateIndexes.get(i);
if (index.numIndexes() == 1 && index.getIndexProps().get(0).isCovering()) {
IndexProperties indexProps = index.getIndexProps().get(0);
if (foundCoveringCollation) {
// if previously we already found a higher ranked covering index that satisfies collation,
// then skip this one (note that selectivity and cost considerations were already handled
// by the ranking phase)
logger.debug("index_plan_info: Skipping covering index {} because a higher ranked covering index with collation already exists.", indexProps.getIndexDesc().getIndexName());
continue;
}
coveringIndexes.add(index);
logger.info("index_plan_info: name: {}, covering, collation: {}, leadingSelectivity: {}, cost: {}", indexProps.getIndexDesc().getIndexName(), indexProps.satisfiesCollation(), indexProps.getLeadingSelectivity(), indexProps.getSelfCost(planner));
count++;
foundCovering = true;
if (indexProps.satisfiesCollation()) {
foundCoveringCollation = true;
}
} else if (index.numIndexes() == 1) {
// non-covering
IndexProperties indexProps = index.getIndexProps().get(0);
// non-covering index does not have collation
if (foundCoveringCollation || (foundCovering && !indexProps.satisfiesCollation())) {
logger.debug("index_plan_info: Skipping non-covering index {} because it does not have collation and a higher ranked covering index already exists.", indexProps.getIndexDesc().getIndexName());
continue;
}
if (indexProps.satisfiesCollation()) {
foundNonCoveringCollation = true;
}
// all other non-covering indexes can be added to the list because 2 or more non-covering index could
// be considered for intersection later; currently the index selector is not costing the index intersection
nonCoveringIndexes.add(index);
logger.info("index_plan_info: name: {}, non-covering, collation: {}, leadingSelectivity: {}, cost: {}", indexProps.getIndexDesc().getIndexName(), indexProps.satisfiesCollation(), indexProps.getLeadingSelectivity(), indexProps.getSelfCost(planner));
count++;
} else {
// intersect indexes
if (foundCoveringCollation || (foundCovering && !index.getIndexProps().get(index.numIndexes() - 1).satisfiesCollation()) || foundNonCoveringCollation) {
continue;
}
IndexGroup intersectIndex = new IndexGroup();
double isectLeadingSel = 1.0;
String isectName = "Intersect-" + count;
for (IndexProperties indexProps : index.getIndexProps()) {
intersectIndex.addIndexProp(indexProps);
isectLeadingSel *= indexProps.getLeadingSelectivity();
logger.info("name: {}, {}, collation: {}, leadingSelectivity: {}, cost: {}", indexProps.getIndexDesc().getIndexName(), isectName, indexProps.satisfiesCollation(), indexProps.getLeadingSelectivity(), indexProps.getSelfCost(planner));
}
logger.info("name: {}, intersect-idx, collation: {}, leadingSelectivity: {}, cost: {}", isectName, index.getIndexProps().get(index.numIndexes() - 1).satisfiesCollation(), isectLeadingSel, index.getIndexProps().get(0).getIntersectCost(index, builder, planner));
intersectIndexes.add(intersectIndex);
}
if (count == max_candidate_indexes) {
break;
}
}
}
use of org.apache.drill.exec.planner.physical.PlannerSettings in project drill by apache.
the class ConvertCountToDirectScanRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final Aggregate agg = call.rel(0);
final TableScan scan = call.rel(call.rels.length - 1);
final Project project = call.rels.length == 3 ? (Project) call.rel(1) : null;
// 3) Additional checks are done further below ..
if (agg.getGroupCount() > 0 || agg.containsDistinctCall()) {
return;
}
DrillTable drillTable = DrillRelOptUtil.getDrillTable(scan);
if (drillTable == null) {
logger.debug("Rule does not apply since an eligible drill table instance was not found.");
return;
}
Object selection = drillTable.getSelection();
if (!(selection instanceof FormatSelection)) {
logger.debug("Rule does not apply since only Parquet file format is eligible.");
return;
}
PlannerSettings settings = call.getPlanner().getContext().unwrap(PlannerSettings.class);
// Rule is applicable only if the statistics for row count and null count are available from the metadata,
FormatSelection formatSelection = (FormatSelection) selection;
// Rule cannot be applied if the selection had wildcard since the totalrowcount cannot be read from the parent directory
if (formatSelection.getSelection().hadWildcard()) {
logger.debug("Rule does not apply when there is a wild card since the COUNT could not be determined from metadata.");
return;
}
Pair<Boolean, Metadata_V4.MetadataSummary> status = checkMetadataForScanStats(settings, drillTable, formatSelection);
if (!status.getLeft()) {
logger.debug("Rule does not apply since MetadataSummary metadata was not found.");
return;
}
Metadata_V4.MetadataSummary metadataSummary = status.getRight();
Map<String, Long> result = collectCounts(settings, metadataSummary, agg, scan, project);
logger.trace("Calculated the following aggregate counts: {}", result);
// if counts could not be determined, rule won't be applied
if (result.isEmpty()) {
logger.debug("Rule does not apply since one or more COUNTs could not be determined from metadata.");
return;
}
Path summaryFileName = Metadata.getSummaryFileName(formatSelection.getSelection().getSelectionRoot());
final RelDataType scanRowType = CountToDirectScanUtils.constructDataType(agg, result.keySet());
final DynamicPojoRecordReader<Long> reader = new DynamicPojoRecordReader<>(CountToDirectScanUtils.buildSchema(scanRowType.getFieldNames()), Collections.singletonList(new ArrayList<>(result.values())));
final ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, 1, 1, scanRowType.getFieldCount());
final MetadataDirectGroupScan directScan = new MetadataDirectGroupScan(reader, summaryFileName, 1, scanStats, true, false);
final DrillDirectScanRel newScan = new DrillDirectScanRel(scan.getCluster(), scan.getTraitSet().plus(DrillRel.DRILL_LOGICAL), directScan, scanRowType);
final DrillProjectRel newProject = new DrillProjectRel(agg.getCluster(), agg.getTraitSet().plus(DrillRel.DRILL_LOGICAL), newScan, CountToDirectScanUtils.prepareFieldExpressions(scanRowType), agg.getRowType());
call.transformTo(newProject);
}
Aggregations