use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.
the class ParquetPruneScanRule method getFilterOnProjectParquet.
public static final RelOptRule getFilterOnProjectParquet(OptimizerRulesContext optimizerRulesContext) {
return new PruneScanRule(RelOptHelper.some(DrillFilterRel.class, RelOptHelper.some(DrillProjectRel.class, RelOptHelper.any(DrillScanRel.class))), "PruneScanRule:Filter_On_Project_Parquet", optimizerRulesContext) {
@Override
public PartitionDescriptor getPartitionDescriptor(PlannerSettings settings, TableScan scanRel) {
return new ParquetPartitionDescriptor(settings, (DrillScanRel) scanRel);
}
@Override
public boolean matches(RelOptRuleCall call) {
final DrillScanRel scan = call.rel(2);
GroupScan groupScan = scan.getGroupScan();
// this rule is applicable only for parquet based partition pruning
if (PrelUtil.getPlannerSettings(scan.getCluster().getPlanner()).isHepPartitionPruningEnabled()) {
return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown() && !scan.partitionFilterPushdown();
} else {
return groupScan instanceof ParquetGroupScan && groupScan.supportsPartitionFilterPushdown();
}
}
@Override
public void onMatch(RelOptRuleCall call) {
final DrillFilterRel filterRel = call.rel(0);
final DrillProjectRel projectRel = call.rel(1);
final DrillScanRel scanRel = call.rel(2);
doOnMatch(call, filterRel, projectRel, scanRel);
}
};
}
use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.
the class ConvertCountToDirectScan method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final DrillAggregateRel agg = (DrillAggregateRel) call.rel(0);
final DrillScanRel scan = (DrillScanRel) call.rel(call.rels.length - 1);
final DrillProjectRel proj = call.rels.length == 3 ? (DrillProjectRel) call.rel(1) : null;
final GroupScan oldGrpScan = scan.getGroupScan();
final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
// 4) No distinct agg call.
if (!(oldGrpScan.getScanStats(settings).getGroupScanProperty().hasExactRowCount() && agg.getGroupCount() == 0 && agg.getAggCallList().size() == 1 && !agg.containsDistinctCall())) {
return;
}
AggregateCall aggCall = agg.getAggCallList().get(0);
if (aggCall.getAggregation().getName().equals("COUNT")) {
long cnt = 0;
// count(Not-null-input) ==> rowCount
if (aggCall.getArgList().isEmpty() || (aggCall.getArgList().size() == 1 && !agg.getInput().getRowType().getFieldList().get(aggCall.getArgList().get(0).intValue()).getType().isNullable())) {
cnt = (long) oldGrpScan.getScanStats(settings).getRecordCount();
} else if (aggCall.getArgList().size() == 1) {
// count(columnName) ==> Agg ( Scan )) ==> columnValueCount
int index = aggCall.getArgList().get(0);
if (proj != null) {
if (proj.getProjects().get(index) instanceof RexInputRef) {
index = ((RexInputRef) proj.getProjects().get(index)).getIndex();
} else {
// do not apply for all other cases.
return;
}
}
String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
cnt = oldGrpScan.getColumnValueCount(SchemaPath.getSimplePath(columnName));
if (cnt == GroupScan.NO_COLUMN_STATS) {
// if column stats are not available don't apply this rule
return;
}
} else {
// do nothing.
return;
}
RelDataType scanRowType = getCountDirectScanRowType(agg.getCluster().getTypeFactory());
final ScanPrel newScan = ScanPrel.create(scan, scan.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), getCountDirectScan(cnt), scanRowType);
List<RexNode> exprs = Lists.newArrayList();
exprs.add(RexInputRef.of(0, scanRowType));
final ProjectPrel newProj = new ProjectPrel(agg.getCluster(), agg.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(DrillDistributionTrait.SINGLETON), newScan, exprs, agg.getRowType());
call.transformTo(newProj);
}
}
use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.
the class ParquetPartitionDescriptor method createTableScan.
@Override
public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws Exception {
List<String> newFiles = Lists.newArrayList();
for (final PartitionLocation location : newPartitionLocation) {
newFiles.add(location.getEntirePartitionLocation());
}
final GroupScan newGroupScan = createNewGroupScan(newFiles, cacheFileRoot, wasAllPartitionsPruned, metaContext);
return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), scanRel.getTable(), newGroupScan, scanRel.getRowType(), scanRel.getColumns(), true);
}
use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.
the class FileSystemPartitionDescriptor method getFileLocationsAndStatus.
protected Pair<Collection<String>, Boolean> getFileLocationsAndStatus() {
Collection<String> fileLocations = null;
Pair<Collection<String>, Boolean> fileLocationsAndStatus = null;
boolean isExpandedPartial = false;
if (scanRel instanceof DrillScanRel) {
// If a particular GroupScan provides files, get the list of files from there rather than
// DrillTable because GroupScan would have the updated version of the selection
final DrillScanRel drillScan = (DrillScanRel) scanRel;
if (drillScan.getGroupScan().hasFiles()) {
fileLocations = drillScan.getGroupScan().getFiles();
isExpandedPartial = false;
} else {
FileSelection selection = ((FormatSelection) table.getSelection()).getSelection();
fileLocations = selection.getFiles();
isExpandedPartial = selection.isExpandedPartial();
}
} else if (scanRel instanceof EnumerableTableScan) {
FileSelection selection = ((FormatSelection) table.getSelection()).getSelection();
fileLocations = selection.getFiles();
isExpandedPartial = selection.isExpandedPartial();
}
fileLocationsAndStatus = Pair.of(fileLocations, isExpandedPartial);
return fileLocationsAndStatus;
}
use of org.apache.drill.exec.planner.logical.DrillScanRel in project drill by apache.
the class ConvertHiveParquetScanToDrillParquetScan method createNativeScanRel.
/**
* Helper method which creates a DrillScalRel with native HiveScan.
*/
private DrillScanRel createNativeScanRel(final Map<String, String> partitionColMapping, final DrillScanRel hiveScanRel) throws Exception {
final RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
final RelDataType varCharType = typeFactory.createSqlType(SqlTypeName.VARCHAR);
final List<String> nativeScanColNames = Lists.newArrayList();
final List<RelDataType> nativeScanColTypes = Lists.newArrayList();
for (RelDataTypeField field : hiveScanRel.getRowType().getFieldList()) {
final String dirColName = partitionColMapping.get(field.getName());
if (dirColName != null) {
// partition column
nativeScanColNames.add(dirColName);
nativeScanColTypes.add(varCharType);
} else {
nativeScanColNames.add(field.getName());
nativeScanColTypes.add(field.getType());
}
}
final RelDataType nativeScanRowType = typeFactory.createStructType(nativeScanColTypes, nativeScanColNames);
// Create the list of projected columns set in HiveScan. The order of this list may not be same as the order of
// columns in HiveScan row type. Note: If the HiveScan.getColumn() contains a '*', we just need to add it as it is,
// unlike above where we expanded the '*'. HiveScan and related (subscan) can handle '*'.
final List<SchemaPath> nativeScanCols = Lists.newArrayList();
for (SchemaPath colName : hiveScanRel.getColumns()) {
final String partitionCol = partitionColMapping.get(colName.getAsUnescapedPath());
if (partitionCol != null) {
nativeScanCols.add(SchemaPath.getSimplePath(partitionCol));
} else {
nativeScanCols.add(colName);
}
}
final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
final HiveDrillNativeParquetScan nativeHiveScan = new HiveDrillNativeParquetScan(hiveScan.getUserName(), hiveScan.hiveReadEntry, hiveScan.storagePlugin, nativeScanCols, null);
return new DrillScanRel(hiveScanRel.getCluster(), hiveScanRel.getTraitSet(), hiveScanRel.getTable(), nativeHiveScan, nativeScanRowType, nativeScanCols);
}
Aggregations