use of org.apache.drill.exec.physical.base.GroupScan in project drill by apache.
the class ParquetPartitionDescriptor method createTableScan.
@Override
public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws Exception {
List<String> newFiles = Lists.newArrayList();
for (final PartitionLocation location : newPartitionLocation) {
newFiles.add(location.getEntirePartitionLocation());
}
final GroupScan newGroupScan = createNewGroupScan(newFiles, cacheFileRoot, wasAllPartitionsPruned, metaContext);
return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), newGroupScan, scanRel.getRowType(), scanRel.getColumns(), true);
}
use of org.apache.drill.exec.physical.base.GroupScan in project drill by axbaretto.
the class HivePushPartitionFilterIntoScan method getFilterOnProject.
public static final StoragePluginOptimizerRule getFilterOnProject(OptimizerRulesContext optimizerRulesContext, final String defaultPartitionValue) {
return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.some(DrillProjectRel.class, RelOptHelper.any(DrillScanRel.class))), "HivePushPartitionFilterIntoScan:Filter_On_Project_Hive", optimizerRulesContext) {
@Override
public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
return new HivePartitionDescriptor(settings, (DrillScanRel) scanRel, getOptimizerRulesContext().getManagedBuffer(), defaultPartitionValue);
}
@Override
public boolean matches(RelOptRuleCall call) {
final DrillScanRel scan = (DrillScanRel) call.rel(2);
GroupScan groupScan = scan.getGroupScan();
// this rule is applicable only for Hive based partition pruning
if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
} else {
return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown();
}
}
@Override
public void onMatch(RelOptRuleCall call) {
final DrillFilterRel filterRel = call.rel(0);
final DrillProjectRel projectRel = call.rel(1);
final DrillScanRel scanRel = call.rel(2);
doOnMatch(call, filterRel, projectRel, scanRel);
}
};
}
use of org.apache.drill.exec.physical.base.GroupScan in project drill by axbaretto.
the class HivePushPartitionFilterIntoScan method getFilterOnScan.
public static final StoragePluginOptimizerRule getFilterOnScan(OptimizerRulesContext optimizerRulesContext, final String defaultPartitionValue) {
return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.any(DrillScanRel.class)), "HivePushPartitionFilterIntoScan:Filter_On_Scan_Hive", optimizerRulesContext) {
@Override
public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
return new HivePartitionDescriptor(settings, (DrillScanRel) scanRel, getOptimizerRulesContext().getManagedBuffer(), defaultPartitionValue);
}
@Override
public boolean matches(RelOptRuleCall call) {
final DrillScanRel scan = (DrillScanRel) call.rel(1);
GroupScan groupScan = scan.getGroupScan();
// this rule is applicable only for Hive based partition pruning
if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
} else {
return groupScan instanceof HiveScan && groupScan.supportsPartitionFilterPushdown();
}
}
@Override
public void onMatch(RelOptRuleCall call) {
final DrillFilterRel filterRel = call.rel(0);
final DrillScanRel scanRel = call.rel(1);
doOnMatch(call, filterRel, null, scanRel);
}
};
}
use of org.apache.drill.exec.physical.base.GroupScan in project drill by axbaretto.
the class ParquetPruneScanRule method getFilterOnProjectParquet.
public static final RelOptRule getFilterOnProjectParquet(OptimizerRulesContext optimizerRulesContext) {
return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.some(DrillProjectRel.class, RelOptHelper.any(DrillScanRel.class))), "PruneScanRule:Filter_On_Project_Parquet", optimizerRulesContext) {
@Override
public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
return new ParquetPartitionDescriptor(settings, (DrillScanRel) scanRel);
}
@Override
public boolean matches(RelOptRuleCall call) {
final DrillScanRel scan = call.rel(2);
GroupScan groupScan = scan.getGroupScan();
// this rule is applicable only for parquet based partition pruning
if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
} else {
return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown();
}
}
@Override
public void onMatch(RelOptRuleCall call) {
final DrillFilterRel filterRel = call.rel(0);
final DrillProjectRel projectRel = call.rel(1);
final DrillScanRel scanRel = call.rel(2);
doOnMatch(call, filterRel, projectRel, scanRel);
}
};
}
use of org.apache.drill.exec.physical.base.GroupScan in project drill by axbaretto.
the class ConvertCountToDirectScan method collectCounts.
/**
* Collects counts for each aggregation call.
* Will return empty result map if was not able to determine count for at least one aggregation call,
*
* For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
* For star, not null expressions and implicit columns sets count to total record number.
* For other cases obtains counts from group scan operator. Also count can not be calculated for parition columns.
*
* @param agg aggregate relational expression
* @param scan scan relational expression
* @param project project relational expression
* @return result map where key is count column name, value is count value
*/
private Map<String, Long> collectCounts(PlannerSettings settings, DrillAggregateRel agg, DrillScanRel scan, DrillProjectRel project) {
final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
final GroupScan oldGrpScan = scan.getGroupScan();
final long totalRecordCount = oldGrpScan.getScanStats(settings).getRecordCount();
final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
for (int i = 0; i < agg.getAggCallList().size(); i++) {
AggregateCall aggCall = agg.getAggCallList().get(i);
// for (AggregateCall aggCall : agg.getAggCallList()) {
long cnt;
// rule can be applied only for count function, return empty counts
if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
return ImmutableMap.of();
}
if (containsStarOrNotNullInput(aggCall, agg)) {
cnt = totalRecordCount;
} else if (aggCall.getArgList().size() == 1) {
// count(columnName) ==> Agg ( Scan )) ==> columnValueCount
int index = aggCall.getArgList().get(0);
if (project != null) {
// return count of "col2" in Scan's metadata, if found.
if (!(project.getProjects().get(index) instanceof RexInputRef)) {
// do not apply for all other cases.
return ImmutableMap.of();
}
index = ((RexInputRef) project.getProjects().get(index)).getIndex();
}
String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
// for implicit column count will the same as total record count
if (implicitColumnsNames.contains(columnName)) {
cnt = totalRecordCount;
} else {
SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
return ImmutableMap.of();
}
cnt = oldGrpScan.getColumnValueCount(simplePath);
if (cnt == GroupScan.NO_COLUMN_STATS) {
// if column stats is not available don't apply this rule, return empty counts
return ImmutableMap.of();
}
}
} else {
return ImmutableMap.of();
}
String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
result.put(name, cnt);
}
return ImmutableMap.copyOf(result);
}
Aggregations