Search in sources :

Example 81 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveRelFieldTrimmer method trimFields.

/**
 * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
 * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}.
 */
public TrimResult trimFields(HiveMultiJoin join, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    final int fieldCount = join.getRowType().getFieldCount();
    final RexNode conditionExpr = join.getCondition();
    final List<RexNode> joinFilters = join.getJoinFilters();
    // Add in fields used in the condition.
    final Set<RelDataTypeField> combinedInputExtraFields = new LinkedHashSet<RelDataTypeField>(extraFields);
    RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(combinedInputExtraFields, fieldsUsed);
    conditionExpr.accept(inputFinder);
    final ImmutableBitSet fieldsUsedPlus = inputFinder.build();
    int inputStartPos = 0;
    int changeCount = 0;
    int newFieldCount = 0;
    List<RelNode> newInputs = new ArrayList<RelNode>();
    List<Mapping> inputMappings = new ArrayList<Mapping>();
    for (RelNode input : join.getInputs()) {
        final RelDataType inputRowType = input.getRowType();
        final int inputFieldCount = inputRowType.getFieldCount();
        // Compute required mapping.
        ImmutableBitSet.Builder inputFieldsUsed = ImmutableBitSet.builder();
        for (int bit : fieldsUsedPlus) {
            if (bit >= inputStartPos && bit < inputStartPos + inputFieldCount) {
                inputFieldsUsed.set(bit - inputStartPos);
            }
        }
        Set<RelDataTypeField> inputExtraFields = Collections.<RelDataTypeField>emptySet();
        TrimResult trimResult = trimChild(join, input, inputFieldsUsed.build(), inputExtraFields);
        newInputs.add(trimResult.left);
        if (trimResult.left != input) {
            ++changeCount;
        }
        final Mapping inputMapping = trimResult.right;
        inputMappings.add(inputMapping);
        // Move offset to point to start of next input.
        inputStartPos += inputFieldCount;
        newFieldCount += inputMapping.getTargetCount();
    }
    Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, newFieldCount);
    int offset = 0;
    int newOffset = 0;
    for (int i = 0; i < inputMappings.size(); i++) {
        Mapping inputMapping = inputMappings.get(i);
        for (IntPair pair : inputMapping) {
            mapping.set(pair.source + offset, pair.target + newOffset);
        }
        offset += inputMapping.getSourceCount();
        newOffset += inputMapping.getTargetCount();
    }
    if (changeCount == 0 && mapping.isIdentity()) {
        return new TrimResult(join, Mappings.createIdentity(fieldCount));
    }
    // Build new join.
    final RexVisitor<RexNode> shuttle = new RexPermuteInputsShuttle(mapping, newInputs.toArray(new RelNode[newInputs.size()]));
    RexNode newConditionExpr = conditionExpr.accept(shuttle);
    List<RexNode> newJoinFilters = Lists.newArrayList();
    for (RexNode joinFilter : joinFilters) {
        newJoinFilters.add(joinFilter.accept(shuttle));
    }
    final RelDataType newRowType = RelOptUtil.permute(join.getCluster().getTypeFactory(), join.getRowType(), mapping);
    final RelNode newJoin = new HiveMultiJoin(join.getCluster(), newInputs, newConditionExpr, newRowType, join.getJoinInputs(), join.getJoinTypes(), newJoinFilters);
    return new TrimResult(newJoin, mapping);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) Mapping(org.apache.calcite.util.mapping.Mapping) RelDataType(org.apache.calcite.rel.type.RelDataType) IntPair(org.apache.calcite.util.mapping.IntPair) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) RexNode(org.apache.calcite.rex.RexNode)

Example 82 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveRelFieldTrimmer method trimFields.

@Override
public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    // Fields:
    // 
    // | sys fields | group fields | indicator fields | agg functions |
    // 
    // Two kinds of trimming:
    // 
    // 1. If agg rel has system fields but none of these are used, create an
    // agg rel with no system fields.
    // 
    // 2. If aggregate functions are not used, remove them.
    // 
    // But group and indicator fields stay, even if they are not used.
    // Compute which input fields are used.
    // agg functions
    // agg functions are added first (before group sets) because rewriteGBConstantsKeys
    // needs it
    final ImmutableBitSet.Builder aggCallFieldsUsedBuilder = ImmutableBitSet.builder();
    for (AggregateCall aggCall : aggregate.getAggCallList()) {
        for (int i : aggCall.getArgList()) {
            aggCallFieldsUsedBuilder.set(i);
        }
        if (aggCall.filterArg >= 0) {
            aggCallFieldsUsedBuilder.set(aggCall.filterArg);
        }
    }
    // transform if group by contain constant keys
    ImmutableBitSet aggCallFieldsUsed = aggCallFieldsUsedBuilder.build();
    aggregate = rewriteGBConstantKeys(aggregate, fieldsUsed, aggCallFieldsUsed);
    // add group fields
    final ImmutableBitSet.Builder inputFieldsUsed = aggregate.getGroupSet().rebuild();
    inputFieldsUsed.addAll(aggCallFieldsUsed);
    final RelDataType rowType = aggregate.getRowType();
    // Create input with trimmed columns.
    final RelNode input = aggregate.getInput();
    final Set<RelDataTypeField> inputExtraFields = Collections.emptySet();
    final TrimResult trimResult = trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields);
    final RelNode newInput = trimResult.left;
    final Mapping inputMapping = trimResult.right;
    ImmutableBitSet originalGroupSet = aggregate.getGroupSet();
    ImmutableBitSet updatedGroupSet = generateNewGroupset(aggregate, fieldsUsed);
    ImmutableBitSet gbKeysDeleted = originalGroupSet.except(updatedGroupSet);
    ImmutableBitSet updatedGroupFields = ImmutableBitSet.range(originalGroupSet.cardinality());
    final int updatedGroupCount = updatedGroupSet.cardinality();
    // we need to clear the bits corresponding to deleted gb keys
    int setIdx = 0;
    while (setIdx != -1) {
        setIdx = gbKeysDeleted.nextSetBit(setIdx);
        if (setIdx != -1) {
            updatedGroupFields = updatedGroupFields.clear(setIdx);
            setIdx++;
        }
    }
    fieldsUsed = fieldsUsed.union(updatedGroupFields);
    // there's nothing to do.
    if (input == newInput && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) {
        return result(aggregate, Mappings.createIdentity(rowType.getFieldCount()));
    }
    // update the group by keys based on inputMapping
    ImmutableBitSet newGroupSet = Mappings.apply(inputMapping, updatedGroupSet);
    // Which agg calls are used by our consumer?
    int originalGroupCount = aggregate.getGroupSet().cardinality();
    int j = originalGroupCount;
    int usedAggCallCount = 0;
    for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
        if (fieldsUsed.get(j++)) {
            ++usedAggCallCount;
        }
    }
    // Offset due to the number of system fields having changed.
    Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION, rowType.getFieldCount(), updatedGroupCount + usedAggCallCount);
    // if group keys were reduced, it means we didn't have grouping therefore
    // we don't need to transform group sets
    ImmutableList<ImmutableBitSet> newGroupSets = null;
    if (!updatedGroupSet.equals(aggregate.getGroupSet())) {
        newGroupSets = ImmutableList.of(newGroupSet);
    } else {
        newGroupSets = ImmutableList.copyOf(Iterables.transform(aggregate.getGroupSets(), input1 -> Mappings.apply(inputMapping, input1)));
    }
    // Populate mapping of where to find the fields. System, group key and
    // indicator fields first.
    int gbKeyIdx = 0;
    for (j = 0; j < originalGroupCount; j++) {
        if (fieldsUsed.get(j)) {
            mapping.set(j, gbKeyIdx);
            gbKeyIdx++;
        }
    }
    // Now create new agg calls, and populate mapping for them.
    final RelBuilder relBuilder = REL_BUILDER.get();
    relBuilder.push(newInput);
    final List<RelBuilder.AggCall> newAggCallList = new ArrayList<>();
    // because lookup in fieldsUsed is done using original group count
    j = originalGroupCount;
    for (AggregateCall aggCall : aggregate.getAggCallList()) {
        if (fieldsUsed.get(j)) {
            final ImmutableList<RexNode> args = relBuilder.fields(Mappings.apply2(inputMapping, aggCall.getArgList()));
            final RexNode filterArg = aggCall.filterArg < 0 ? null : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg));
            RelBuilder.AggCall newAggCall = relBuilder.aggregateCall(aggCall.getAggregation(), aggCall.isDistinct(), aggCall.isApproximate(), filterArg, aggCall.name, args);
            mapping.set(j, updatedGroupCount + newAggCallList.size());
            newAggCallList.add(newAggCall);
        }
        ++j;
    }
    final RelBuilder.GroupKey groupKey = relBuilder.groupKey(newGroupSet, newGroupSets);
    relBuilder.aggregate(groupKey, newAggCallList);
    return result(relBuilder.build(), mapping);
}
Also used : RelBuilder(org.apache.calcite.tools.RelBuilder) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) Mapping(org.apache.calcite.util.mapping.Mapping) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) RexNode(org.apache.calcite.rex.RexNode)

Example 83 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveRelFieldTrimmer method trimFields.

public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields);
    final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
    if (columnAccessInfo != null) {
        // Store information about column accessed by the table so it can be used
        // to send only this information for column masking
        final RelOptHiveTable tab = (RelOptHiveTable) tableAccessRel.getTable();
        final String qualifiedName = tab.getHiveTableMD().getCompleteName();
        final List<FieldSchema> allCols = tab.getHiveTableMD().getAllCols();
        final boolean insideView = tableAccessRel.isInsideView();
        fieldsUsed.asList().stream().filter(idx -> idx < tab.getNoOfNonVirtualCols()).forEach(idx -> {
            if (insideView) {
                columnAccessInfo.addIndirect(qualifiedName, allCols.get(idx).getName());
            } else {
                columnAccessInfo.add(qualifiedName, allCols.get(idx).getName());
            }
        });
    }
    if (fetchStats) {
        fetchColStats(result.getKey(), tableAccessRel, fieldsUsed, extraFields);
    }
    return result;
}
Also used : ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo) Mappings(org.apache.calcite.util.mapping.Mappings) MappingType(org.apache.calcite.util.mapping.MappingType) LoggerFactory(org.slf4j.LoggerFactory) IntPair(org.apache.calcite.util.mapping.IntPair) HiveProject(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject) HiveTableScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan) RexUtil(org.apache.calcite.rex.RexUtil) CorrelationId(org.apache.calcite.rel.core.CorrelationId) RexNode(org.apache.calcite.rex.RexNode) RelBuilder(org.apache.calcite.tools.RelBuilder) Map(java.util.Map) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) HiveSortExchange(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) TableFunctionScan(org.apache.calcite.rel.core.TableFunctionScan) SqlKind(org.apache.calcite.sql.SqlKind) RexLiteral(org.apache.calcite.rex.RexLiteral) Set(java.util.Set) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) List(java.util.List) RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery) RelCollation(org.apache.calcite.rel.RelCollation) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexCorrelVariable(org.apache.calcite.rex.RexCorrelVariable) RexTableInputRef(org.apache.calcite.rex.RexTableInputRef) RexCall(org.apache.calcite.rex.RexCall) Project(org.apache.calcite.rel.core.Project) TableScan(org.apache.calcite.rel.core.TableScan) Iterables(com.google.common.collect.Iterables) RexFieldAccess(org.apache.calcite.rex.RexFieldAccess) HiveTableFunctionScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan) HashMap(java.util.HashMap) Ord(org.apache.calcite.linq4j.Ord) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) RelOptTable(org.apache.calcite.plan.RelOptTable) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Pair(org.apache.calcite.util.Pair) Mapping(org.apache.calcite.util.mapping.Mapping) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) DruidQuery(org.apache.calcite.adapter.druid.DruidQuery) LinkedHashSet(java.util.LinkedHashSet) RelDataType(org.apache.calcite.rel.type.RelDataType) Logger(org.slf4j.Logger) RexBuilder(org.apache.calcite.rex.RexBuilder) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) Table(org.apache.hadoop.hive.ql.metadata.Table) RelNode(org.apache.calcite.rel.RelNode) Aggregate(org.apache.calcite.rel.core.Aggregate) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) HiveCalciteUtil(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil) RexVisitor(org.apache.calcite.rex.RexVisitor) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) RelDistribution(org.apache.calcite.rel.RelDistribution) AggregateCall(org.apache.calcite.rel.core.AggregateCall) CorrelationReferenceFinder(org.apache.calcite.sql2rel.CorrelationReferenceFinder) Collections(java.util.Collections) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ColumnAccessInfo(org.apache.hadoop.hive.ql.parse.ColumnAccessInfo)

Example 84 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveJoinConstraintsRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final Project project = call.rel(0);
    final RexBuilder rexBuilder = project.getCluster().getRexBuilder();
    List<RexNode> topProjExprs = project.getProjects();
    Join join = call.rel(1);
    final JoinRelType joinType = join.getJoinType();
    final RelNode leftInput = join.getLeft();
    final RelNode rightInput = join.getRight();
    final RexNode cond = join.getCondition();
    // TODO:https://issues.apache.org/jira/browse/HIVE-23920
    if (joinType == JoinRelType.ANTI) {
        return;
    }
    // 1) If it is an inner, check whether project only uses columns from one side.
    // That side will need to be the FK side.
    // If it is a left outer, left will be the FK side.
    // If it is a right outer, right will be the FK side.
    final RelNode fkInput;
    final RelNode nonFkInput;
    final ImmutableBitSet topRefs = RelOptUtil.InputFinder.bits(topProjExprs, null);
    final ImmutableBitSet leftBits = ImmutableBitSet.range(leftInput.getRowType().getFieldCount());
    final ImmutableBitSet rightBits = ImmutableBitSet.range(leftInput.getRowType().getFieldCount(), join.getRowType().getFieldCount());
    // These boolean values represent corresponding left, right input which is potential FK
    boolean leftInputPotentialFK = topRefs.intersects(leftBits);
    boolean rightInputPotentialFK = topRefs.intersects(rightBits);
    if (leftInputPotentialFK && rightInputPotentialFK && (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
        // Both inputs are referenced. Before making a decision, try to swap
        // references in join condition if it is an inner join, i.e. if a join
        // condition column is referenced above the join, then we can just
        // reference the column from the other side.
        // For example, given two relations R(a1,a2), S(b1) :
        // SELECT a2, b1 FROM R, S ON R.a1=R.b1 =>
        // SELECT a2, a1 FROM R, S ON R.a1=R.b1
        int joinFieldCount = join.getRowType().getFieldCount();
        Mapping mappingLR = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount);
        Mapping mappingRL = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount);
        for (RexNode conj : RelOptUtil.conjunctions(cond)) {
            if (!conj.isA(SqlKind.EQUALS)) {
                continue;
            }
            RexCall eq = (RexCall) conj;
            RexNode op1 = eq.getOperands().get(0);
            RexNode op2 = eq.getOperands().get(1);
            if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
                // Check references
                int ref1 = ((RexInputRef) op1).getIndex();
                int ref2 = ((RexInputRef) op2).getIndex();
                int leftRef = -1;
                int rightRef = -1;
                if (leftBits.get(ref1) && rightBits.get(ref2)) {
                    leftRef = ref1;
                    rightRef = ref2;
                } else if (rightBits.get(ref1) && leftBits.get(ref2)) {
                    leftRef = ref2;
                    rightRef = ref1;
                }
                if (leftRef != -1 && rightRef != -1) {
                    // as it is useless
                    if (mappingLR.getTargetOpt(leftRef) == -1) {
                        mappingLR.set(leftRef, rightRef);
                    }
                    if (mappingRL.getTargetOpt(rightRef) == -1) {
                        mappingRL.set(rightRef, leftRef);
                    }
                }
            }
        }
        if (mappingLR.size() != 0) {
            // First insert missing elements into the mapping as identity mappings
            for (int i = 0; i < joinFieldCount; i++) {
                if (mappingLR.getTargetOpt(i) == -1) {
                    mappingLR.set(i, i);
                }
                if (mappingRL.getTargetOpt(i) == -1) {
                    mappingRL.set(i, i);
                }
            }
            // Then, we start by trying to reference only left side in top projections
            List<RexNode> swappedTopProjExprs = topProjExprs.stream().map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingRL, call.rel(1)))).collect(Collectors.toList());
            rightInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(rightBits);
            if (!rightInputPotentialFK) {
                topProjExprs = swappedTopProjExprs;
            } else {
                // If it did not work, we try to reference only right side in top projections
                swappedTopProjExprs = topProjExprs.stream().map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingLR, call.rel(1)))).collect(Collectors.toList());
                leftInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(leftBits);
                if (!leftInputPotentialFK) {
                    topProjExprs = swappedTopProjExprs;
                }
            }
        }
    } else if (!leftInputPotentialFK && !rightInputPotentialFK) {
        // TODO: There are no references in the project operator above.
        // In this case, we should probably do two passes, one for
        // left as FK and one for right as FK, although it may be expensive.
        // Currently we only assume left as FK
        leftInputPotentialFK = true;
    }
    final Mode mode;
    switch(joinType) {
        case SEMI:
        case INNER:
            // case ANTI: //TODO:https://issues.apache.org/jira/browse/HIVE-23920
            if (leftInputPotentialFK && rightInputPotentialFK) {
                // and there is nothing to transform
                return;
            }
            fkInput = leftInputPotentialFK ? leftInput : rightInput;
            nonFkInput = leftInputPotentialFK ? rightInput : leftInput;
            mode = Mode.REMOVE;
            break;
        case LEFT:
            fkInput = leftInput;
            nonFkInput = rightInput;
            mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM;
            break;
        case RIGHT:
            fkInput = rightInput;
            nonFkInput = leftInput;
            mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM;
            break;
        default:
            // Other type, bail out
            return;
    }
    // 2) Check whether this join can be rewritten or removed
    RewritablePKFKJoinInfo r = HiveRelOptUtil.isRewritablePKFKJoin(join, fkInput, nonFkInput, call.getMetadataQuery());
    // 3) If it is the only condition, we can trigger the rewriting
    if (r.rewritable) {
        rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, r.nullableNodes);
    } else {
        // Possibly this could be enhanced to take other join type into consideration.
        if (joinType != JoinRelType.INNER) {
            return;
        }
        // first swap fk and non-fk input and see if we can rewrite them
        RewritablePKFKJoinInfo fkRemoval = HiveRelOptUtil.isRewritablePKFKJoin(join, nonFkInput, fkInput, call.getMetadataQuery());
        if (fkRemoval.rewritable) {
            // we have established that nonFkInput is FK, and fkInput is PK
            // and there is no row filtering on FK side
            // check that FK side join column is distinct (i.e. have a group by)
            ImmutableBitSet fkSideBitSet;
            if (nonFkInput == leftInput) {
                fkSideBitSet = leftBits;
            } else {
                fkSideBitSet = rightBits;
            }
            ImmutableBitSet.Builder fkJoinColBuilder = ImmutableBitSet.builder();
            for (RexNode conj : RelOptUtil.conjunctions(cond)) {
                if (!conj.isA(SqlKind.EQUALS)) {
                    return;
                }
                RexCall eq = (RexCall) conj;
                RexNode op1 = eq.getOperands().get(0);
                RexNode op2 = eq.getOperands().get(1);
                if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
                    // Check references
                    int ref1 = ((RexInputRef) op1).getIndex();
                    int ref2 = ((RexInputRef) op2).getIndex();
                    int leftRef = -1;
                    int rightRef = -1;
                    if (fkSideBitSet.get(ref1)) {
                        // check that join columns are not nullable
                        if (op1.getType().isNullable()) {
                            return;
                        }
                        fkJoinColBuilder.set(fkSideBitSet.indexOf(ref1));
                    } else {
                        if (op2.getType().isNullable()) {
                            return;
                        }
                        fkJoinColBuilder.set(fkSideBitSet.indexOf(ref2));
                    }
                }
            }
            if (!call.getMetadataQuery().areColumnsUnique(nonFkInput, fkJoinColBuilder.build())) {
                return;
            }
            // all conditions are met, therefore we can perform rewrite to remove fk side
            rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, fkRemoval.nullableNodes);
        }
    }
}
Also used : Project(org.apache.calcite.rel.core.Project) Mappings(org.apache.calcite.util.mapping.Mappings) MappingType(org.apache.calcite.util.mapping.MappingType) LoggerFactory(org.slf4j.LoggerFactory) RelOptUtil(org.apache.calcite.plan.RelOptUtil) Join(org.apache.calcite.rel.core.Join) ArrayList(java.util.ArrayList) RexUtil(org.apache.calcite.rex.RexUtil) RexNode(org.apache.calcite.rex.RexNode) Mapping(org.apache.calcite.util.mapping.Mapping) RelBuilderFactory(org.apache.calcite.tools.RelBuilderFactory) HiveRelFactories(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) SqlKind(org.apache.calcite.sql.SqlKind) Logger(org.slf4j.Logger) RexBuilder(org.apache.calcite.rex.RexBuilder) HiveRelOptUtil(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil) RelNode(org.apache.calcite.rel.RelNode) Collectors(java.util.stream.Collectors) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) RexInputRef(org.apache.calcite.rex.RexInputRef) RewritablePKFKJoinInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo) RelOptRule(org.apache.calcite.plan.RelOptRule) HiveCalciteUtil(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil) List(java.util.List) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RexCall(org.apache.calcite.rex.RexCall) RewritablePKFKJoinInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) Join(org.apache.calcite.rel.core.Join) Mapping(org.apache.calcite.util.mapping.Mapping) RexCall(org.apache.calcite.rex.RexCall) Project(org.apache.calcite.rel.core.Project) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RelNode(org.apache.calcite.rel.RelNode) RexBuilder(org.apache.calcite.rex.RexBuilder) RexInputRef(org.apache.calcite.rex.RexInputRef) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) RexNode(org.apache.calcite.rex.RexNode)

Example 85 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveIntersectRewriteRule method onMatch.

// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
    final HiveIntersect hiveIntersect = call.rel(0);
    final RelOptCluster cluster = hiveIntersect.getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    int numOfBranch = hiveIntersect.getInputs().size();
    Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
    // 1st level GB: create a GB (col0, col1, count(1) as c) for each branch
    for (int index = 0; index < numOfBranch; index++) {
        RelNode input = hiveIntersect.getInputs().get(index);
        final List<RexNode> gbChildProjLst = Lists.newArrayList();
        final List<Integer> groupSetPositions = Lists.newArrayList();
        for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) {
            gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd));
            groupSetPositions.add(cInd);
        }
        gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1)));
        // create the project before GB because we need a new project with extra column '1'.
        RelNode gbInputRel = null;
        try {
            gbInputRel = HiveProject.create(input, gbChildProjLst, null);
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
        // groupSetPosition includes all the positions
        final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
        List<AggregateCall> aggregateCalls = Lists.newArrayList();
        RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
        // count(1), 1's position is input.getRowType().getFieldList().size()
        AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType);
        aggregateCalls.add(aggregateCall);
        HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, null, aggregateCalls);
        bldr.add(aggregateRel);
    }
    // create a union above all the branches
    HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
    // 2nd level GB: create a GB (col0, col1, count(c)) for each branch
    final List<Integer> groupSetPositions = Lists.newArrayList();
    // the index of c
    int cInd = union.getRowType().getFieldList().size() - 1;
    for (int index = 0; index < union.getRowType().getFieldList().size(); index++) {
        if (index != cInd) {
            groupSetPositions.add(index);
        }
    }
    List<AggregateCall> aggregateCalls = Lists.newArrayList();
    RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
    AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
    aggregateCalls.add(aggregateCall);
    if (hiveIntersect.all) {
        aggregateCall = HiveCalciteUtil.createSingleArgAggCall("min", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
        aggregateCalls.add(aggregateCall);
    }
    final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
    HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), union, groupSet, null, aggregateCalls);
    // add a filter count(c) = #branches
    int countInd = cInd;
    List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
    RexInputRef ref = rexBuilder.makeInputRef(aggregateRel, countInd);
    RexLiteral literal = rexBuilder.makeBigintLiteral(new BigDecimal(numOfBranch));
    childRexNodeLst.add(ref);
    childRexNodeLst.add(literal);
    ImmutableList.Builder<RelDataType> calciteArgTypesBldr = new ImmutableList.Builder<RelDataType>();
    calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
    calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
    RexNode factoredFilterExpr = null;
    try {
        factoredFilterExpr = rexBuilder.makeCall(SqlFunctionConverter.getCalciteFn("=", calciteArgTypesBldr.build(), TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), true, false), childRexNodeLst);
    } catch (CalciteSemanticException e) {
        LOG.debug(e.toString());
        throw new RuntimeException(e);
    }
    RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, factoredFilterExpr);
    if (!hiveIntersect.all) {
        // the schema for intersect distinct is like this
        // R3 on all attributes + count(c) as cnt
        // finally add a project to project out the last column
        Set<Integer> projectOutColumnPositions = new HashSet<>();
        projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
        try {
            call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
    } else {
        // the schema for intersect all is like this
        // R3 + count(c) as cnt + min(c) as m
        // we create a input project for udtf whose schema is like this
        // min(c) as m + R3
        List<RexNode> originalInputRefs = Lists.transform(filterRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {

            @Override
            public RexNode apply(RelDataTypeField input) {
                return new RexInputRef(input.getIndex(), input.getType());
            }
        });
        List<RexNode> copyInputRefs = new ArrayList<>();
        copyInputRefs.add(originalInputRefs.get(originalInputRefs.size() - 1));
        for (int i = 0; i < originalInputRefs.size() - 2; i++) {
            copyInputRefs.add(originalInputRefs.get(i));
        }
        RelNode srcRel = null;
        try {
            srcRel = HiveProject.create(filterRel, copyInputRefs, null);
            HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
            // finally add a project to project out the 1st column
            Set<Integer> projectOutColumnPositions = new HashSet<>();
            projectOutColumnPositions.add(0);
            call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
        } catch (SemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
    }
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) RexLiteral(org.apache.calcite.rex.RexLiteral) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableList(com.google.common.collect.ImmutableList) RexBuilder(org.apache.calcite.rex.RexBuilder) Builder(com.google.common.collect.ImmutableList.Builder) HiveTableFunctionScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) RexBuilder(org.apache.calcite.rex.RexBuilder) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HiveIntersect(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect) HiveUnion(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion) BigDecimal(java.math.BigDecimal) HiveFilter(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter) AggregateCall(org.apache.calcite.rel.core.AggregateCall) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)208 RexNode (org.apache.calcite.rex.RexNode)127 RelNode (org.apache.calcite.rel.RelNode)110 ArrayList (java.util.ArrayList)101 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)66 RexBuilder (org.apache.calcite.rex.RexBuilder)60 AggregateCall (org.apache.calcite.rel.core.AggregateCall)55 RexInputRef (org.apache.calcite.rex.RexInputRef)45 RelDataType (org.apache.calcite.rel.type.RelDataType)39 HashMap (java.util.HashMap)36 RelBuilder (org.apache.calcite.tools.RelBuilder)36 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)30 Mapping (org.apache.calcite.util.mapping.Mapping)30 Pair (org.apache.calcite.util.Pair)29 Aggregate (org.apache.calcite.rel.core.Aggregate)27 ImmutableList (com.google.common.collect.ImmutableList)23 LinkedHashSet (java.util.LinkedHashSet)23 List (java.util.List)22 HashSet (java.util.HashSet)20 RelOptUtil (org.apache.calcite.plan.RelOptUtil)18