use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.
the class MapRDBPushLimitIntoScan method doPushLimitIntoGroupScan.
protected void doPushLimitIntoGroupScan(RelOptRuleCall call, LimitPrel limit, final ProjectPrel project, ScanPrel scan, GroupScan groupScan) {
try {
final GroupScan newGroupScan = getGroupScanWithLimit(groupScan, limit);
if (newGroupScan == null) {
return;
}
final ScanPrel newScan = new ScanPrel(scan.getCluster(), scan.getTraitSet(), newGroupScan, scan.getRowType(), scan.getTable());
final RelNode newChild;
if (project != null) {
final ProjectPrel newProject = new ProjectPrel(project.getCluster(), project.getTraitSet(), newScan, project.getProjects(), project.getRowType());
newChild = newProject;
} else {
newChild = newScan;
}
call.transformTo(newChild);
logger.debug("pushLimitIntoGroupScan: Converted to a new ScanPrel " + newScan.getGroupScan());
} catch (Exception e) {
logger.warn("pushLimitIntoGroupScan: Exception while trying limit pushdown!", e);
}
}
use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.
the class MapRDBStatistics method populateStats.
/**
* This is the core statistics function for populating the statistics. The statistics populated correspond to the query
* condition. Based on different types of plans, we would need statistics for different combinations of predicates. Currently,
* we do not have a tree-walker for {@link QueryCondition}. Hence, instead of using the individual predicates stats, to construct
* the stats for the overall predicates, we rely on using the final predicates. Hence, this has a limitation(susceptible) to
* predicate modification post stats generation. Statistics computed/stored are rowcounts, leading rowcounts, average rowsize.
* Rowcounts and leading rowcounts (i.e. corresponding to predicates on the leading index columns) are stored in the statsCache.
* Average rowsizes are stored in the fiStatsCache (FI stands for Filter Independent).
*
* @param condition - The condition for which to obtain statistics
* @param indexes - The collection of indexes to use for getting statistics
* @param scanRel - The current scanRel
* @param context - The index plan call context
*/
private void populateStats(RexNode condition, IndexCollection indexes, DrillScanRelBase scanRel, IndexCallContext context) {
JsonTableGroupScan jTabGrpScan;
Map<IndexDescriptor, IndexConditionInfo> firstKeyIdxConditionMap;
Map<IndexDescriptor, IndexConditionInfo> idxConditionMap;
/* Map containing the individual base conditions of an ANDed/ORed condition and their selectivities.
* This is used to compute the overall selectivity of a complex ANDed/ORed condition using its base
* conditions. Helps prevent over/under estimates and guessed selectivity for ORed predicates.
*/
Map<String, Double> baseConditionMap;
GroupScan grpScan = IndexPlanUtils.getGroupScan(scanRel);
if ((scanRel instanceof DrillScanRel || scanRel instanceof ScanPrel) && grpScan instanceof JsonTableGroupScan) {
jTabGrpScan = (JsonTableGroupScan) grpScan;
} else {
logger.debug("Statistics: populateStats exit early - not an instance of JsonTableGroupScan!");
return;
}
if (condition == null) {
populateStatsForNoFilter(jTabGrpScan, indexes, scanRel, context);
statsAvailable = true;
return;
}
RexBuilder builder = scanRel.getCluster().getRexBuilder();
PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster());
// Get the stats payload for full table (has total rows in the table)
StatisticsPayload ftsPayload = jTabGrpScan.getFirstKeyEstimatedStats(null, null, scanRel);
// Get the average row size for table and all indexes
addToCache(null, jTabGrpScan.getAverageRowSizeStats(null), ftsPayload);
if (ftsPayload == null || ftsPayload.getRowCount() == 0) {
return;
}
for (IndexDescriptor idx : indexes) {
StatisticsPayload idxRowSizePayload = jTabGrpScan.getAverageRowSizeStats(idx);
addToCache(idx, idxRowSizePayload, ftsPayload);
}
/* Only use indexes with distinct first key */
IndexCollection distFKeyIndexes = distinctFKeyIndexes(indexes, scanRel);
IndexConditionInfo.Builder infoBuilder = IndexConditionInfo.newBuilder(condition, distFKeyIndexes, builder, scanRel);
idxConditionMap = infoBuilder.getIndexConditionMap();
firstKeyIdxConditionMap = infoBuilder.getFirstKeyIndexConditionMap();
baseConditionMap = new HashMap<>();
for (IndexDescriptor idx : firstKeyIdxConditionMap.keySet()) {
if (IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
continue;
}
RexNode idxCondition = firstKeyIdxConditionMap.get(idx).indexCondition;
/* Use the pre-processed condition only for getting actual statistic from MapR-DB APIs. Use the
* original condition everywhere else (cache store/lookups) since the RexNode condition and its
* corresponding QueryCondition will be used to get statistics. e.g. we convert LIKE into RANGE
* condition to get statistics. However, statistics are always asked for LIKE and NOT the RANGE
*/
RexNode preProcIdxCondition = convertToStatsCondition(idxCondition, idx, context, scanRel, Arrays.asList(SqlKind.CAST, SqlKind.LIKE));
RelDataType newRowType;
FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
if (functionInfo.hasFunctional()) {
newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
} else {
newRowType = scanRel.getRowType();
}
QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(preProcIdxCondition, newRowType, settings, builder));
// Cap rows/size at total rows in case of issues with DB APIs
StatisticsPayload idxPayload = jTabGrpScan.getFirstKeyEstimatedStats(queryCondition, idx, scanRel);
double rowCount = Math.min(idxPayload.getRowCount(), ftsPayload.getRowCount());
double leadingRowCount = Math.min(idxPayload.getLeadingRowCount(), rowCount);
double avgRowSize = Math.min(idxPayload.getAvgRowSize(), ftsPayload.getAvgRowSize());
StatisticsPayload payload = new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize);
addToCache(idxCondition, idx, context, payload, jTabGrpScan, scanRel, newRowType);
addBaseConditions(idxCondition, payload, false, baseConditionMap, scanRel.getRowType());
}
/* Add the row count for index conditions on all indexes. Stats are only computed for leading
* keys but index conditions can be pushed and would be required for access path costing
*/
for (IndexDescriptor idx : idxConditionMap.keySet()) {
if (IndexPlanUtils.conditionIndexed(context.getOrigMarker(), idx) == IndexPlanUtils.ConditionIndexed.NONE) {
continue;
}
Map<LogicalExpression, RexNode> leadingPrefixMap = Maps.newHashMap();
double rowCount, leadingRowCount, avgRowSize;
RexNode idxCondition = idxConditionMap.get(idx).indexCondition;
// Ignore conditions which always evaluate to true
if (idxCondition.isAlwaysTrue()) {
continue;
}
RexNode idxIncColCondition = idxConditionMap.get(idx).remainderCondition;
RexNode idxRemColCondition = IndexPlanUtils.getLeadingPrefixMap(leadingPrefixMap, idx.getIndexColumns(), infoBuilder, idxCondition);
RexNode idxLeadColCondition = IndexPlanUtils.getLeadingColumnsFilter(IndexPlanUtils.getLeadingFilters(leadingPrefixMap, idx.getIndexColumns()), builder);
RexNode idxTotRemColCondition = IndexPlanUtils.getTotalRemainderFilter(idxRemColCondition, idxIncColCondition, builder);
RexNode idxTotColCondition = IndexPlanUtils.getTotalFilter(idxLeadColCondition, idxTotRemColCondition, builder);
FunctionalIndexInfo functionInfo = idx.getFunctionalInfo();
RelDataType newRowType = scanRel.getRowType();
if (functionInfo.hasFunctional()) {
newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo);
}
/* For non-covering plans we would need the index leading condition */
rowCount = ftsPayload.getRowCount() * computeSelectivity(idxLeadColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
leadingRowCount = rowCount;
avgRowSize = fIStatsCache.get(buildUniqueIndexIdentifier(idx)).getAvgRowSize();
addToCache(idxLeadColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
/* For covering plans we would need the full condition */
rowCount = ftsPayload.getRowCount() * computeSelectivity(idxTotColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
addToCache(idxTotColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
/* For intersect plans we would need the index condition */
rowCount = ftsPayload.getRowCount() * computeSelectivity(idxCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
addToCache(idxCondition, idx, context, new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
/* Add the rowCount for condition on only included columns - no leading columns here! */
if (idxIncColCondition != null) {
rowCount = ftsPayload.getRowCount() * computeSelectivity(idxIncColCondition, null, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
addToCache(idxIncColCondition, idx, context, new MapRDBStatisticsPayload(rowCount, rowCount, avgRowSize), jTabGrpScan, scanRel, newRowType);
}
}
// Add the rowCount for the complete condition - based on table
double rowCount = ftsPayload.getRowCount() * computeSelectivity(condition, null, ftsPayload.getRowCount(), scanRel, baseConditionMap).left;
// Here, ftsLeadingKey rowcount is based on _id predicates
StatisticsPayload ftsLeadingKeyPayload = jTabGrpScan.getFirstKeyEstimatedStats(jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(condition, scanRel.getRowType(), settings, builder)), null, scanRel);
addToCache(condition, null, null, new MapRDBStatisticsPayload(rowCount, ftsLeadingKeyPayload.getRowCount(), ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
// Add the full table rows while we are at it - represented by <NULL> RexNode, <NULL> QueryCondition.
// No ftsLeadingKey so leadingKeyRowcount = totalRowCount
addToCache(null, null, null, new MapRDBStatisticsPayload(ftsPayload.getRowCount(), ftsPayload.getRowCount(), ftsPayload.getAvgRowSize()), jTabGrpScan, scanRel, scanRel.getRowType());
// mark stats has been statsAvailable
statsAvailable = true;
}
use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.
the class MapRDBStatistics method initialize.
public boolean initialize(RexNode condition, DrillScanRelBase scanRel, IndexCallContext context) {
GroupScan scan = IndexPlanUtils.getGroupScan(scanRel);
PlannerSettings settings = PrelUtil.getPlannerSettings(scanRel.getCluster().getPlanner());
rowKeyJoinBackIOFactor = settings.getIndexRowKeyJoinCostFactor();
if (scan instanceof DbGroupScan) {
String conditionAsStr = convertRexToString(condition, scanRel.getRowType());
if (statsCache.get(conditionAsStr) == null) {
IndexCollection indexes = ((DbGroupScan) scan).getSecondaryIndexCollection(scanRel);
populateStats(condition, indexes, scanRel, context);
logger.info("index_plan_info: initialize: scanRel #{} and groupScan {} got fulltable {}, statsCache: {}, fiStatsCache: {}", scanRel.getId(), System.identityHashCode(scan), fullTableScanPayload, statsCache, fIStatsCache);
return true;
}
}
return false;
}
use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.
the class IcebergPluginImplementor method implement.
@Override
public void implement(PluginProjectRel project) throws IOException {
visitChild(project.getInput());
DrillParseContext context = new DrillParseContext(PrelUtil.getPlannerSettings(project.getCluster().getPlanner()));
RelNode input = project.getInput();
List<SchemaPath> projects = project.getProjects().stream().map(e -> (SchemaPath) DrillOptiq.toDrill(context, input, e)).collect(Collectors.toList());
groupScan = groupScan.clone(projects);
}
use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.
the class IcebergPluginImplementor method canImplement.
@Override
public boolean canImplement(Filter filter) {
RexNode condition = filter.getCondition();
LogicalExpression logicalExpression = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(filter.getCluster().getPlanner())), filter.getInput(), condition);
Expression expression = logicalExpression.accept(DrillExprToIcebergTranslator.INSTANCE, null);
if (expression != null) {
try {
GroupScan scan = findGroupScan(filter);
if (scan instanceof IcebergGroupScan) {
IcebergGroupScan groupScan = (IcebergGroupScan) scan;
// ensures that expression compatible with table schema
expression = Binder.bind(groupScan.getTableScan().schema().asStruct(), expression, true);
} else {
return false;
}
} catch (ValidationException e) {
return false;
}
}
return expression != null;
}
Aggregations