use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.
the class DrillProjectPushIntoLateralJoinRule method onMatch.
public void onMatch(RelOptRuleCall call) {
DrillProjectRel origProj = call.rel(0);
final DrillLateralJoinRel corr = call.rel(1);
if (StarColumnHelper.containsStarColumn(origProj.getRowType()) || StarColumnHelper.containsStarColumn(corr.getRowType()) || corr.excludeCorrelateColumn) {
return;
}
DrillRelOptUtil.InputRefVisitor collectRefs = new DrillRelOptUtil.InputRefVisitor();
for (RexNode exp : origProj.getChildExps()) {
exp.accept(collectRefs);
}
int correlationIndex = corr.getRequiredColumns().nextSetBit(0);
for (RexInputRef inputRef : collectRefs.getInputRefs()) {
if (inputRef.getIndex() == correlationIndex) {
return;
}
}
final RelNode left = corr.getLeft();
final RelNode right = corr.getRight();
final RelNode convertedLeft = convert(left, left.getTraitSet().plus(DrillRel.DRILL_LOGICAL).simplify());
final RelNode convertedRight = convert(right, right.getTraitSet().plus(DrillRel.DRILL_LOGICAL).simplify());
final RelTraitSet traits = corr.getTraitSet().plus(DrillRel.DRILL_LOGICAL);
boolean trivial = DrillRelOptUtil.isTrivialProject(origProj, true);
RelNode relNode = new DrillLateralJoinRel(corr.getCluster(), traits, convertedLeft, convertedRight, true, corr.getCorrelationId(), corr.getRequiredColumns(), corr.getJoinType());
if (!trivial) {
Map<Integer, Integer> mapWithoutCorr = buildMapWithoutCorrColumn(corr, correlationIndex);
List<RexNode> outputExprs = DrillRelOptUtil.transformExprs(origProj.getCluster().getRexBuilder(), origProj.getChildExps(), mapWithoutCorr);
relNode = new DrillProjectRel(origProj.getCluster(), left.getTraitSet().plus(DrillRel.DRILL_LOGICAL), relNode, outputExprs, origProj.getRowType());
}
call.transformTo(relNode);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.
the class DrillPushRowKeyJoinToScanRule method isRowKeyColumn.
/* Finds whether the given column reference is for the rowkey col(also known as primary-key col).
* We need to recurse down the operators looking at their references down to the scan
* to figure out whether the reference is a rowkey col. Projections can rearrange the
* incoming columns. We also need to handle HepRelVertex/RelSubset while handling the rels.
*/
private static boolean isRowKeyColumn(int index, RelNode rel) {
RelNode curRel = rel;
int curIndex = index;
while (curRel != null && !(curRel instanceof DrillScanRel)) {
logger.debug("IsRowKeyColumn: Rel={}, RowTypePos={}, RowType={}", curRel.toString(), curIndex, curRel.getRowType().toString());
if (curRel instanceof HepRelVertex) {
curRel = ((HepRelVertex) curRel).getCurrentRel();
} else if (curRel instanceof RelSubset) {
if (((RelSubset) curRel).getBest() != null) {
curRel = ((RelSubset) curRel).getBest();
} else {
curRel = ((RelSubset) curRel).getOriginal();
}
} else {
RelNode child = null;
// before recursing down that child rel.
for (RelNode input : curRel.getInputs()) {
if (input.getRowType().getFieldList().size() <= curIndex) {
curIndex -= input.getRowType().getFieldList().size();
} else {
child = input;
break;
}
}
curRel = child;
}
// Otherwise, the column index is the `RexInputRef` index.
if (curRel != null && curRel instanceof DrillProjectRel) {
List<RexNode> childExprs = curRel.getChildExps();
if (childExprs != null && childExprs.size() > 0) {
if (childExprs.get(curIndex) instanceof RexInputRef) {
curIndex = ((RexInputRef) childExprs.get(curIndex)).getIndex();
} else {
// Currently do not support expressions on rowkey col. So if an expr is present,
// return false
logger.debug("IsRowKeyColumn: ABORT: Primary-key EXPR$={}", childExprs.get(curIndex).toString());
return false;
}
}
}
}
logger.debug("IsRowKeyColumn:Primary-key Col={} ", curRel != null ? curRel.getRowType().getFieldNames().get(curIndex) : "??");
// Get the primary-key col name from the scan and match with the column being referenced.
if (curRel != null && curRel instanceof DrillScanRel) {
if (((DrillScanRel) curRel).getGroupScan() instanceof DbGroupScan) {
DbGroupScan dbGroupScan = (DbGroupScan) ((DrillScanRel) curRel).getGroupScan();
String rowKeyName = dbGroupScan.getRowKeyName();
DbGroupScan restrictedGroupScan = dbGroupScan.getRestrictedScan(((DrillScanRel) curRel).getColumns());
// Also verify this scan supports restricted groupscans(random seeks)
if (restrictedGroupScan != null && curRel.getRowType().getFieldNames().get(curIndex).equalsIgnoreCase(rowKeyName)) {
logger.debug("IsRowKeyColumn: FOUND: Rel={}, RowTypePos={}, RowType={}", curRel.toString(), curIndex, curRel.getRowType().toString());
return true;
}
}
}
logger.debug("IsRowKeyColumn: NOT FOUND");
return false;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.
the class ElasticsearchProjectRule method convert.
@Override
public RelNode convert(RelNode relNode) {
Project project = (Project) relNode;
NodeTypeFinder projectFinder = new NodeTypeFinder(ElasticsearchProject.class);
project.getInput().accept(projectFinder);
if (projectFinder.containsNode) {
// Calcite adapter allows only a single Elasticsearch project per tree
return null;
}
RelTraitSet traitSet = project.getTraitSet().replace(out);
List<RexNode> innerProjections = new ArrayList<>();
RelDataType rowType = project.getInput().getRowType();
// check for literals only without input exprs
DrillRelOptUtil.InputRefVisitor collectRefs = new DrillRelOptUtil.InputRefVisitor();
project.getChildExps().forEach(exp -> exp.accept(collectRefs));
if (!collectRefs.getInputRefs().isEmpty()) {
for (RelDataTypeField relDataTypeField : rowType.getFieldList()) {
innerProjections.add(project.getCluster().getRexBuilder().makeInputRef(project.getInput(), relDataTypeField.getIndex()));
}
}
boolean allExprsInputRefs = project.getChildExps().stream().allMatch(rexNode -> rexNode instanceof RexInputRef);
if (collectRefs.getInputRefs().isEmpty() || allExprsInputRefs) {
return CalciteUtils.createProject(traitSet, convert(project.getInput(), out), project.getProjects(), project.getRowType());
} else {
Project elasticsearchProject = CalciteUtils.createProject(traitSet, convert(project.getInput(), out), innerProjections, project.getInput().getRowType());
return project.copy(project.getTraitSet(), elasticsearchProject, project.getProjects(), project.getRowType());
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.
the class ConvertCountToDirectScanRule method collectCounts.
/**
* Collects counts for each aggregation call by using the metadata summary information
* Will return empty result map if was not able to determine count for at least one aggregation call.
*
* For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
* 1. First, we get the total row count from the metadata summary.
* 2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit column>), the count = total row count
* 3. For COUNT(nullable column), count = (total row count - column's null count)
* 4. Also count can not be calculated for parition columns.
* 5. For the columns that are not present in the Summary(Non-existent columns), the count = 0
*
* @param settings planner options
* @param metadataSummary metadata summary containing row counts and column counts
* @param agg aggregate relational expression
* @param scan scan relational expression
* @param project project relational expression
* @return result map where key is count column name, value is count value
*/
private Map<String, Long> collectCounts(PlannerSettings settings, Metadata_V4.MetadataSummary metadataSummary, Aggregate agg, TableScan scan, Project project) {
final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
final long totalRecordCount = metadataSummary.getTotalRowCount();
final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
for (int i = 0; i < agg.getAggCallList().size(); i++) {
AggregateCall aggCall = agg.getAggCallList().get(i);
long cnt;
// rule can be applied only for count function, return empty counts
if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
return ImmutableMap.of();
}
if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
cnt = totalRecordCount;
} else if (aggCall.getArgList().size() == 1) {
// count(columnName) ==> Agg ( Scan )) ==> columnValueCount
int index = aggCall.getArgList().get(0);
if (project != null) {
// return count of "col2" in Scan's metadata, if found.
if (!(project.getProjects().get(index) instanceof RexInputRef)) {
// do not apply for all other cases.
return ImmutableMap.of();
}
index = ((RexInputRef) project.getProjects().get(index)).getIndex();
}
String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
// for implicit column count will be the same as total record count
if (implicitColumnsNames.contains(columnName)) {
cnt = totalRecordCount;
} else {
SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
return ImmutableMap.of();
}
Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = metadataSummary.getColumnTypeInfo(new Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath));
if (columnMetadata == null) {
// If the column doesn't exist in the table, row count is set to 0
cnt = 0;
} else if (columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) {
// if column stats is not available don't apply this rule, return empty counts
return ImmutableMap.of();
} else {
// count of a nullable column = (total row count - column's null count)
cnt = totalRecordCount - columnMetadata.totalNullCount;
}
}
} else {
return ImmutableMap.of();
}
String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
result.put(name, cnt);
}
return ImmutableMap.copyOf(result);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.
the class ConvertCountToDirectScanPrule method collectCounts.
/**
* Collects counts for each aggregation call.
* Will return empty result map if was not able to determine count for at least one aggregation call,
*
* For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
* For star, not null expressions and implicit columns sets count to total record number.
* For other cases obtains counts from group scan operator. Also count can not be calculated for partition columns.
*
* @param agg aggregate relational expression
* @param scan scan relational expression
* @param project project relational expression
* @return result map where key is count column name, value is count value
*/
private Map<String, Long> collectCounts(PlannerSettings settings, DrillAggregateRel agg, DrillScanRel scan, DrillProjectRel project) {
final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
final GroupScan oldGrpScan = scan.getGroupScan();
final long totalRecordCount = (long) oldGrpScan.getScanStats(settings).getRecordCount();
final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
for (int i = 0; i < agg.getAggCallList().size(); i++) {
AggregateCall aggCall = agg.getAggCallList().get(i);
long cnt;
// rule can be applied only for count function, return empty counts
if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
return ImmutableMap.of();
}
if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
cnt = totalRecordCount;
} else if (aggCall.getArgList().size() == 1) {
// count(columnName) ==> Agg ( Scan )) ==> columnValueCount
int index = aggCall.getArgList().get(0);
if (project != null) {
// return count of "col2" in Scan's metadata, if found.
if (!(project.getProjects().get(index) instanceof RexInputRef)) {
// do not apply for all other cases.
return ImmutableMap.of();
}
index = ((RexInputRef) project.getProjects().get(index)).getIndex();
}
String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
// for implicit column count will the same as total record count
if (implicitColumnsNames.contains(columnName)) {
cnt = totalRecordCount;
} else {
SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
return ImmutableMap.of();
}
cnt = oldGrpScan.getColumnValueCount(simplePath);
if (cnt == Statistic.NO_COLUMN_STATS) {
// if column stats is not available don't apply this rule, return empty counts
return ImmutableMap.of();
}
}
} else {
return ImmutableMap.of();
}
String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
result.put(name, cnt);
}
return ImmutableMap.copyOf(result);
}
Aggregations