use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project calcite by apache.
the class DruidQuery method deriveQuerySpec.
protected QuerySpec deriveQuerySpec() {
final RelDataType rowType = table.getRowType();
int i = 1;
Filter filterRel = null;
if (i < rels.size() && rels.get(i) instanceof Filter) {
filterRel = (Filter) rels.get(i++);
}
Project project = null;
if (i < rels.size() && rels.get(i) instanceof Project) {
project = (Project) rels.get(i++);
}
ImmutableBitSet groupSet = null;
List<AggregateCall> aggCalls = null;
List<String> aggNames = null;
if (i < rels.size() && rels.get(i) instanceof Aggregate) {
final Aggregate aggregate = (Aggregate) rels.get(i++);
groupSet = aggregate.getGroupSet();
aggCalls = aggregate.getAggCallList();
aggNames = Util.skip(aggregate.getRowType().getFieldNames(), groupSet.cardinality());
}
Filter havingFilter = null;
if (i < rels.size() && rels.get(i) instanceof Filter) {
havingFilter = (Filter) rels.get(i++);
}
Project postProject = null;
if (i < rels.size() && rels.get(i) instanceof Project) {
postProject = (Project) rels.get(i++);
}
List<Integer> collationIndexes = null;
List<Direction> collationDirections = null;
ImmutableBitSet.Builder numericCollationBitSetBuilder = ImmutableBitSet.builder();
Integer fetch = null;
if (i < rels.size() && rels.get(i) instanceof Sort) {
final Sort sort = (Sort) rels.get(i++);
collationIndexes = new ArrayList<>();
collationDirections = new ArrayList<>();
for (RelFieldCollation fCol : sort.collation.getFieldCollations()) {
collationIndexes.add(fCol.getFieldIndex());
collationDirections.add(fCol.getDirection());
if (sort.getRowType().getFieldList().get(fCol.getFieldIndex()).getType().getFamily() == SqlTypeFamily.NUMERIC) {
numericCollationBitSetBuilder.set(fCol.getFieldIndex());
}
}
fetch = sort.fetch != null ? RexLiteral.intValue(sort.fetch) : null;
}
if (i != rels.size()) {
throw new AssertionError("could not implement all rels");
}
return getQuery(rowType, filterRel, project, groupSet, aggCalls, aggNames, collationIndexes, collationDirections, numericCollationBitSetBuilder.build(), fetch, postProject, havingFilter);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project drill by apache.
the class StreamAggPrule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final DrillAggregateRel aggregate = call.rel(0);
RelNode input = aggregate.getInput();
final RelCollation collation = getCollation(aggregate);
RelTraitSet traits;
if (aggregate.containsDistinctCall()) {
// currently, don't use StreamingAggregate if any of the logical aggrs contains DISTINCT
return;
}
try {
if (aggregate.getGroupSet().isEmpty()) {
DrillDistributionTrait singleDist = DrillDistributionTrait.SINGLETON;
final RelTraitSet singleDistTrait = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(singleDist);
if (create2PhasePlan(call, aggregate)) {
traits = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL);
RelNode convertedInput = convert(input, traits);
new SubsetTransformer<DrillAggregateRel, InvalidRelException>(call) {
@Override
public RelNode convertChild(final DrillAggregateRel join, final RelNode rel) throws InvalidRelException {
DrillDistributionTrait toDist = rel.getTraitSet().getTrait(DrillDistributionTraitDef.INSTANCE);
RelTraitSet traits = newTraitSet(Prel.DRILL_PHYSICAL, toDist);
RelNode newInput = convert(rel, traits);
StreamAggPrel phase1Agg = new StreamAggPrel(aggregate.getCluster(), traits, newInput, aggregate.getGroupSet(), aggregate.getGroupSets(), aggregate.getAggCallList(), OperatorPhase.PHASE_1of2);
UnionExchangePrel exch = new UnionExchangePrel(phase1Agg.getCluster(), singleDistTrait, phase1Agg);
ImmutableBitSet newGroupSet = remapGroupSet(aggregate.getGroupSet());
List<ImmutableBitSet> newGroupSets = Lists.newArrayList();
for (ImmutableBitSet groupSet : aggregate.getGroupSets()) {
newGroupSets.add(remapGroupSet(groupSet));
}
return new StreamAggPrel(aggregate.getCluster(), singleDistTrait, exch, newGroupSet, newGroupSets, phase1Agg.getPhase2AggCalls(), OperatorPhase.PHASE_2of2);
}
}.go(aggregate, convertedInput);
} else {
createTransformRequest(call, aggregate, input, singleDistTrait);
}
} else {
// hash distribute on all grouping keys
final DrillDistributionTrait distOnAllKeys = new DrillDistributionTrait(DrillDistributionTrait.DistributionType.HASH_DISTRIBUTED, ImmutableList.copyOf(getDistributionField(aggregate, true)));
traits = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(collation).plus(distOnAllKeys);
createTransformRequest(call, aggregate, input, traits);
// hash distribute on one grouping key
DrillDistributionTrait distOnOneKey = new DrillDistributionTrait(DrillDistributionTrait.DistributionType.HASH_DISTRIBUTED, ImmutableList.copyOf(getDistributionField(aggregate, false)));
traits = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(collation).plus(distOnOneKey);
if (create2PhasePlan(call, aggregate)) {
traits = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL);
RelNode convertedInput = convert(input, traits);
new SubsetTransformer<DrillAggregateRel, InvalidRelException>(call) {
@Override
public RelNode convertChild(final DrillAggregateRel aggregate, final RelNode rel) throws InvalidRelException {
DrillDistributionTrait toDist = rel.getTraitSet().getTrait(DrillDistributionTraitDef.INSTANCE);
RelTraitSet traits = newTraitSet(Prel.DRILL_PHYSICAL, collation, toDist);
RelNode newInput = convert(rel, traits);
StreamAggPrel phase1Agg = new StreamAggPrel(aggregate.getCluster(), traits, newInput, aggregate.getGroupSet(), aggregate.getGroupSets(), aggregate.getAggCallList(), OperatorPhase.PHASE_1of2);
int numEndPoints = PrelUtil.getSettings(phase1Agg.getCluster()).numEndPoints();
HashToMergeExchangePrel exch = new HashToMergeExchangePrel(phase1Agg.getCluster(), phase1Agg.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(distOnAllKeys), phase1Agg, ImmutableList.copyOf(getDistributionField(aggregate, true)), collation, numEndPoints);
ImmutableBitSet newGroupSet = remapGroupSet(aggregate.getGroupSet());
List<ImmutableBitSet> newGroupSets = Lists.newArrayList();
for (ImmutableBitSet groupSet : aggregate.getGroupSets()) {
newGroupSets.add(remapGroupSet(groupSet));
}
return new StreamAggPrel(aggregate.getCluster(), exch.getTraitSet(), exch, newGroupSet, newGroupSets, phase1Agg.getPhase2AggCalls(), OperatorPhase.PHASE_2of2);
}
}.go(aggregate, convertedInput);
}
}
} catch (InvalidRelException e) {
tracer.warn(e.toString());
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project drill by apache.
the class DrillRelMdDistinctRowCount method getDistinctRowCountInternal.
private Double getDistinctRowCountInternal(DrillJoinRelBase joinRel, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate) {
if (DrillRelOptUtil.guessRows(joinRel)) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
// Assume NDV is unaffected by the join when groupKey comes from one side of the join
// Alleviates NDV over-estimates
ImmutableBitSet.Builder leftMask = ImmutableBitSet.builder();
ImmutableBitSet.Builder rightMask = ImmutableBitSet.builder();
JoinRelType joinType = joinRel.getJoinType();
RelNode left = joinRel.getInputs().get(0);
RelNode right = joinRel.getInputs().get(1);
RelMdUtil.setLeftRightBitmaps(groupKey, leftMask, rightMask, left.getRowType().getFieldCount());
RexNode leftPred = null;
RexNode rightPred = null;
// Identify predicates which can be pushed onto the left and right sides of the join
if (predicate != null) {
List<RexNode> leftFilters = new ArrayList<>();
List<RexNode> rightFilters = new ArrayList<>();
List<RexNode> joinFilters = new ArrayList();
List<RexNode> predList = RelOptUtil.conjunctions(predicate);
RelOptUtil.classifyFilters(joinRel, predList, joinType, joinType == JoinRelType.INNER, !joinType.generatesNullsOnLeft(), !joinType.generatesNullsOnRight(), joinFilters, leftFilters, rightFilters);
RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder();
leftPred = RexUtil.composeConjunction(rexBuilder, leftFilters, true);
rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters, true);
}
double distRowCount = 1;
int gbyCols = 0;
PlannerSettings plannerSettings = PrelUtil.getPlannerSettings(joinRel.getCluster().getPlanner());
/*
* The NDV for a multi-column GBY key past a join is determined as follows:
* GBY(s1, s2, s3) = CNDV(s1)*CNDV(s2)*CNDV(s3)
* where CNDV is determined as follows:
* A) If sX is present as a join column (sX = tX) CNDV(sX) = MIN(NDV(sX), NDV(tX)) where X =1, 2, 3, etc
* B) Otherwise, based on independence assumption CNDV(sX) = NDV(sX)
*/
Set<ImmutableBitSet> joinFiltersSet = new HashSet<>();
for (RexNode filter : RelOptUtil.conjunctions(joinRel.getCondition())) {
final RelOptUtil.InputFinder inputFinder = RelOptUtil.InputFinder.analyze(filter);
joinFiltersSet.add(inputFinder.inputBitSet.build());
}
for (int idx = 0; idx < groupKey.length(); idx++) {
if (groupKey.get(idx)) {
// GBY key is present in some filter - now try options A) and B) as described above
double ndvSGby = Double.MAX_VALUE;
Double ndv;
boolean presentInFilter = false;
ImmutableBitSet sGby = getSingleGbyKey(groupKey, idx);
if (sGby != null) {
// If we see any NULL ndv i.e. cant process ..we bail out!
for (ImmutableBitSet jFilter : joinFiltersSet) {
if (jFilter.contains(sGby)) {
presentInFilter = true;
// Found join condition containing this GBY key. Pick min NDV across all columns in this join
for (int fidx : jFilter) {
if (fidx < left.getRowType().getFieldCount()) {
ndv = mq.getDistinctRowCount(left, ImmutableBitSet.of(fidx), leftPred);
if (ndv == null) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
ndvSGby = Math.min(ndvSGby, ndv);
} else {
ndv = mq.getDistinctRowCount(right, ImmutableBitSet.of(fidx - left.getRowType().getFieldCount()), rightPred);
if (ndv == null) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
ndvSGby = Math.min(ndvSGby, ndv);
}
}
break;
}
}
// Did not find it in any join condition(s)
if (!presentInFilter) {
for (int sidx : sGby) {
if (sidx < left.getRowType().getFieldCount()) {
ndv = mq.getDistinctRowCount(left, ImmutableBitSet.of(sidx), leftPred);
if (ndv == null) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
ndvSGby = ndv;
} else {
ndv = mq.getDistinctRowCount(right, ImmutableBitSet.of(sidx - left.getRowType().getFieldCount()), rightPred);
if (ndv == null) {
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
}
ndvSGby = ndv;
}
}
}
++gbyCols;
// Multiply NDV(s) of different GBY cols to determine the overall NDV
distRowCount *= ndvSGby;
}
}
}
if (gbyCols > 1) {
// Scale with multi-col NDV factor if more than one GBY cols were found
distRowCount *= plannerSettings.getStatisticsMultiColNdvAdjustmentFactor();
}
double joinRowCount = mq.getRowCount(joinRel);
// Cap NDV to join row count
distRowCount = Math.min(distRowCount, joinRowCount);
return RelMdUtil.numDistinctVals(distRowCount, joinRowCount);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project drill by apache.
the class AggPrelBase method prepareForLateralUnnestPipeline.
@Override
public Prel prepareForLateralUnnestPipeline(List<RelNode> children) {
List<Integer> groupingCols = Lists.newArrayList();
groupingCols.add(0);
for (int groupingCol : groupSet.asList()) {
groupingCols.add(groupingCol + 1);
}
ImmutableBitSet groupingSet = ImmutableBitSet.of(groupingCols);
List<ImmutableBitSet> groupingSets = Lists.newArrayList();
groupingSets.add(groupingSet);
List<AggregateCall> aggregateCalls = Lists.newArrayList();
for (AggregateCall aggCall : aggCalls) {
List<Integer> arglist = Lists.newArrayList();
for (int arg : aggCall.getArgList()) {
arglist.add(arg + 1);
}
aggregateCalls.add(AggregateCall.create(aggCall.getAggregation(), aggCall.isDistinct(), aggCall.isApproximate(), arglist, aggCall.filterArg, aggCall.type, aggCall.name));
}
return (Prel) copy(traitSet, children.get(0), indicator, groupingSet, groupingSets, aggregateCalls);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.
the class HiveParserCalcitePlanner method genUDTFPlan.
private RelNode genUDTFPlan(SqlOperator sqlOperator, String genericUDTFName, String outputTableAlias, List<String> colAliases, HiveParserQB qb, List<RexNode> operands, List<ColumnInfo> opColInfos, RelNode input, boolean inSelect, boolean isOuter) throws SemanticException {
Preconditions.checkState(!isOuter || !inSelect, "OUTER is not supported for SELECT UDTF");
// No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
HiveParserQBParseInfo qbp = qb.getParseInfo();
if (inSelect && !qbp.getDestToGroupBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
}
if (inSelect && !qbp.getDestToDistributeBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
}
if (inSelect && !qbp.getDestToSortBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
}
if (inSelect && !qbp.getDestToClusterBy().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
}
if (inSelect && !qbp.getAliasToLateralViews().isEmpty()) {
throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
}
LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
// Create the object inspector for the input columns and initialize the UDTF
RelDataType relDataType = HiveParserUtils.inferReturnTypeForOperands(sqlOperator, operands, cluster.getTypeFactory());
DataType dataType = HiveParserUtils.toDataType(relDataType);
StructObjectInspector outputOI = (StructObjectInspector) HiveInspectors.getObjectInspector(HiveTypeUtil.toHiveTypeInfo(dataType, false));
// this should only happen for select udtf
if (outputTableAlias == null) {
Preconditions.checkState(inSelect, "Table alias not specified for lateral view");
String prefix = "select_" + genericUDTFName + "_alias_";
int i = 0;
while (qb.getAliases().contains(prefix + i)) {
i++;
}
outputTableAlias = prefix + i;
}
if (colAliases.isEmpty()) {
// user did not specify alias names, infer names from outputOI
for (StructField field : outputOI.getAllStructFieldRefs()) {
colAliases.add(field.getFieldName());
}
}
// Make sure that the number of column aliases in the AS clause matches the number of
// columns output by the UDTF
int numOutputCols = outputOI.getAllStructFieldRefs().size();
int numSuppliedAliases = colAliases.size();
if (numOutputCols != numSuppliedAliases) {
throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numOutputCols + " aliases " + "but got " + numSuppliedAliases));
}
// Generate the output column info's / row resolver using internal names.
ArrayList<ColumnInfo> udtfOutputCols = new ArrayList<>();
Iterator<String> colAliasesIter = colAliases.iterator();
for (StructField sf : outputOI.getAllStructFieldRefs()) {
String colAlias = colAliasesIter.next();
assert (colAlias != null);
// Since the UDTF operator feeds into a LVJ operator that will rename all the internal
// names,
// we can just use field name from the UDTF's OI as the internal name
ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
udtfOutputCols.add(col);
}
// Create the row resolver for the table function scan
HiveParserRowResolver udtfOutRR = new HiveParserRowResolver();
for (int i = 0; i < udtfOutputCols.size(); i++) {
udtfOutRR.put(outputTableAlias, colAliases.get(i), udtfOutputCols.get(i));
}
// Build row type from field <type, name>
RelDataType retType = HiveParserTypeConverter.getType(cluster, udtfOutRR, null);
List<RelDataType> argTypes = new ArrayList<>();
RelDataTypeFactory dtFactory = cluster.getRexBuilder().getTypeFactory();
for (ColumnInfo ci : opColInfos) {
argTypes.add(HiveParserUtils.toRelDataType(ci.getType(), dtFactory));
}
SqlOperator calciteOp = HiveParserSqlFunctionConverter.getCalciteFn(genericUDTFName, argTypes, retType, false);
RexNode rexNode = cluster.getRexBuilder().makeCall(calciteOp, operands);
// convert the rex call
TableFunctionConverter udtfConverter = new TableFunctionConverter(cluster, input, frameworkConfig.getOperatorTable(), catalogReader.nameMatcher());
RexCall convertedCall = (RexCall) rexNode.accept(udtfConverter);
SqlOperator convertedOperator = convertedCall.getOperator();
Preconditions.checkState(convertedOperator instanceof SqlUserDefinedTableFunction, "Expect operator to be " + SqlUserDefinedTableFunction.class.getSimpleName() + ", actually got " + convertedOperator.getClass().getSimpleName());
// TODO: how to decide this?
Type elementType = Object[].class;
// create LogicalTableFunctionScan
RelNode tableFunctionScan = LogicalTableFunctionScan.create(input.getCluster(), Collections.emptyList(), convertedCall, elementType, retType, null);
// remember the table alias for the UDTF so that we can reference the cols later
qb.addAlias(outputTableAlias);
RelNode correlRel;
RexBuilder rexBuilder = cluster.getRexBuilder();
// find correlation in the converted call
Pair<List<CorrelationId>, ImmutableBitSet> correlUse = getCorrelationUse(convertedCall);
// create correlate node
if (correlUse == null) {
correlRel = plannerContext.createRelBuilder(catalogManager.getCurrentCatalog(), catalogManager.getCurrentDatabase()).push(input).push(tableFunctionScan).join(isOuter ? JoinRelType.LEFT : JoinRelType.INNER, rexBuilder.makeLiteral(true)).build();
} else {
if (correlUse.left.size() > 1) {
tableFunctionScan = DeduplicateCorrelateVariables.go(rexBuilder, correlUse.left.get(0), Util.skip(correlUse.left), tableFunctionScan);
}
correlRel = LogicalCorrelate.create(input, tableFunctionScan, correlUse.left.get(0), correlUse.right, isOuter ? JoinRelType.LEFT : JoinRelType.INNER);
}
// Add new rel & its RR to the maps
relToHiveColNameCalcitePosMap.put(tableFunctionScan, buildHiveToCalciteColumnMap(udtfOutRR));
relToRowResolver.put(tableFunctionScan, udtfOutRR);
HiveParserRowResolver correlRR = HiveParserRowResolver.getCombinedRR(relToRowResolver.get(input), relToRowResolver.get(tableFunctionScan));
relToHiveColNameCalcitePosMap.put(correlRel, buildHiveToCalciteColumnMap(correlRR));
relToRowResolver.put(correlRel, correlRR);
if (!inSelect) {
return correlRel;
}
// create project node
List<RexNode> projects = new ArrayList<>();
HiveParserRowResolver projectRR = new HiveParserRowResolver();
int j = 0;
for (int i = input.getRowType().getFieldCount(); i < correlRel.getRowType().getFieldCount(); i++) {
projects.add(cluster.getRexBuilder().makeInputRef(correlRel, i));
ColumnInfo inputColInfo = correlRR.getRowSchema().getSignature().get(i);
String colAlias = inputColInfo.getAlias();
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(j++), inputColInfo.getObjectInspector(), null, false);
projectRR.put(null, colAlias, colInfo);
}
RelNode projectNode = LogicalProject.create(correlRel, Collections.emptyList(), projects, tableFunctionScan.getRowType());
relToHiveColNameCalcitePosMap.put(projectNode, buildHiveToCalciteColumnMap(projectRR));
relToRowResolver.put(projectNode, projectRR);
return projectNode;
}
Aggregations