Search in sources :

Example 16 with Mapping

use of org.apache.calcite.util.mapping.Mapping in project hive by apache.

the class RelFieldTrimmer method createMapping.

protected Mapping createMapping(ImmutableBitSet fieldsUsed, int fieldCount) {
    final Mapping mapping = Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, fieldsUsed.cardinality());
    int i = 0;
    for (int field : fieldsUsed) {
        mapping.set(field, i++);
    }
    return mapping;
}
Also used : Mapping(org.apache.calcite.util.mapping.Mapping)

Example 17 with Mapping

use of org.apache.calcite.util.mapping.Mapping in project hive by apache.

the class RelFieldTrimmer method trimFields.

/**
 * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
 * {@link org.apache.calcite.rel.core.SetOp} (including UNION and UNION ALL).
 */
public TrimResult trimFields(SetOp setOp, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    final RelDataType rowType = setOp.getRowType();
    final int fieldCount = rowType.getFieldCount();
    int changeCount = 0;
    // system field.)
    if (fieldsUsed.isEmpty()) {
        fieldsUsed = ImmutableBitSet.of(rowType.getFieldCount() - 1);
    }
    // Compute the desired field mapping. Give the consumer the fields they
    // want, in the order that they appear in the bitset.
    final Mapping mapping = createMapping(fieldsUsed, fieldCount);
    // Create input with trimmed columns.
    for (RelNode input : setOp.getInputs()) {
        TrimResult trimResult = trimChild(setOp, input, fieldsUsed, extraFields);
        // We want "mapping", the input gave us "inputMapping", compute
        // "remaining" mapping.
        // |                   |                |
        // |---------------- mapping ---------->|
        // |-- inputMapping -->|                |
        // |                   |-- remaining -->|
        // 
        // For instance, suppose we have columns [a, b, c, d],
        // the consumer asked for mapping = [b, d],
        // and the transformed input has columns inputMapping = [d, a, b].
        // remaining will permute [b, d] to [d, a, b].
        Mapping remaining = Mappings.divide(mapping, trimResult.right);
        // Create a projection; does nothing if remaining is identity.
        final RelBuilder relBuilder = REL_BUILDER.get();
        relBuilder.push(trimResult.left);
        relBuilder.permute(remaining);
        if (input != relBuilder.peek()) {
            ++changeCount;
        }
    }
    final RelBuilder relBuilder = REL_BUILDER.get();
    // there's to do.
    if (changeCount == 0 && mapping.isIdentity()) {
        for (RelNode input : setOp.getInputs()) {
            relBuilder.build();
        }
        return result(setOp, mapping);
    }
    switch(setOp.kind) {
        case UNION:
            relBuilder.union(setOp.all, setOp.getInputs().size());
            break;
        case INTERSECT:
            relBuilder.intersect(setOp.all, setOp.getInputs().size());
            break;
        case EXCEPT:
            assert setOp.getInputs().size() == 2;
            relBuilder.minus(setOp.all);
            break;
        default:
            throw new AssertionError("unknown setOp " + setOp);
    }
    return result(relBuilder.build(), mapping);
}
Also used : RelBuilder(org.apache.calcite.tools.RelBuilder) RelNode(org.apache.calcite.rel.RelNode) RelDataType(org.apache.calcite.rel.type.RelDataType) Mapping(org.apache.calcite.util.mapping.Mapping)

Example 18 with Mapping

use of org.apache.calcite.util.mapping.Mapping in project hive by apache.

the class RelFieldTrimmer method trimFields.

/**
 * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
 * {@link org.apache.calcite.rel.logical.LogicalValues}.
 */
public TrimResult trimFields(LogicalValues values, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    final RelDataType rowType = values.getRowType();
    final int fieldCount = rowType.getFieldCount();
    // which is unlikely to be a system field.
    if (fieldsUsed.isEmpty()) {
        fieldsUsed = ImmutableBitSet.range(fieldCount - 1, fieldCount);
    }
    // If all fields are used, return unchanged.
    if (fieldsUsed.equals(ImmutableBitSet.range(fieldCount))) {
        Mapping mapping = Mappings.createIdentity(fieldCount);
        return result(values, mapping);
    }
    final ImmutableList.Builder<ImmutableList<RexLiteral>> newTuples = ImmutableList.builder();
    for (ImmutableList<RexLiteral> tuple : values.getTuples()) {
        ImmutableList.Builder<RexLiteral> newTuple = ImmutableList.builder();
        for (int field : fieldsUsed) {
            newTuple.add(tuple.get(field));
        }
        newTuples.add(newTuple.build());
    }
    final Mapping mapping = createMapping(fieldsUsed, fieldCount);
    final RelDataType newRowType = RelOptUtil.permute(values.getCluster().getTypeFactory(), rowType, mapping);
    final LogicalValues newValues = LogicalValues.create(values.getCluster(), newRowType, newTuples.build());
    return result(newValues, mapping);
}
Also used : RexLiteral(org.apache.calcite.rex.RexLiteral) ImmutableList(com.google.common.collect.ImmutableList) RelDataType(org.apache.calcite.rel.type.RelDataType) Mapping(org.apache.calcite.util.mapping.Mapping) LogicalValues(org.apache.calcite.rel.logical.LogicalValues)

Example 19 with Mapping

use of org.apache.calcite.util.mapping.Mapping in project hive by apache.

the class RelFieldTrimmer method trimFields.

/**
 * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
 * {@link org.apache.calcite.rel.logical.LogicalFilter}.
 */
public TrimResult trimFields(Filter filter, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
    final RelDataType rowType = filter.getRowType();
    final int fieldCount = rowType.getFieldCount();
    final RexNode conditionExpr = filter.getCondition();
    final RelNode input = filter.getInput();
    // We use the fields used by the consumer, plus any fields used in the
    // filter.
    final Set<RelDataTypeField> inputExtraFields = new LinkedHashSet<>(extraFields);
    RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(inputExtraFields, fieldsUsed);
    conditionExpr.accept(inputFinder);
    final ImmutableBitSet inputFieldsUsed = inputFinder.build();
    // Create input with trimmed columns.
    TrimResult trimResult = trimChild(filter, input, inputFieldsUsed, inputExtraFields);
    RelNode newInput = trimResult.left;
    final Mapping inputMapping = trimResult.right;
    // there's nothing we can do.
    if (newInput == input && fieldsUsed.cardinality() == fieldCount) {
        return result(filter, Mappings.createIdentity(fieldCount));
    }
    // Build new project expressions, and populate the mapping.
    final RexVisitor<RexNode> shuttle = new RexPermuteInputsShuttle(inputMapping, newInput);
    RexNode newConditionExpr = conditionExpr.accept(shuttle);
    // Build new filter with trimmed input and condition.
    final RelBuilder relBuilder = REL_BUILDER.get();
    relBuilder.push(newInput).filter(filter.getVariablesSet(), newConditionExpr);
    // needs them for its condition.
    return result(relBuilder.build(), inputMapping);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RelBuilder(org.apache.calcite.tools.RelBuilder) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) RelOptUtil(org.apache.calcite.plan.RelOptUtil) RelDataType(org.apache.calcite.rel.type.RelDataType) Mapping(org.apache.calcite.util.mapping.Mapping) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) RexNode(org.apache.calcite.rex.RexNode)

Example 20 with Mapping

use of org.apache.calcite.util.mapping.Mapping in project hive by apache.

the class HiveAggregateJoinTransposeRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    try {
        final Aggregate aggregate = call.rel(0);
        final Join join = call.rel(1);
        final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
        final RelBuilder relBuilder = call.builder();
        // If any aggregate call has a filter, bail out
        for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
            if (aggregateCall.getAggregation().unwrap(SqlSplittableAggFunction.class) == null) {
                return;
            }
            if (aggregateCall.filterArg >= 0) {
                return;
            }
        }
        // aggregate operator
        if (join.getJoinType() != JoinRelType.INNER) {
            return;
        }
        if (!allowFunctions && !aggregate.getAggCallList().isEmpty()) {
            return;
        }
        boolean groupingUnique = isGroupingUnique(join, aggregate.getGroupSet());
        if (!groupingUnique && !costBased) {
            // there is no need to check further - the transformation may not happen
            return;
        }
        // Do the columns used by the join appear in the output of the aggregate?
        final ImmutableBitSet aggregateColumns = aggregate.getGroupSet();
        final RelMetadataQuery mq = call.getMetadataQuery();
        final ImmutableBitSet keyColumns = keyColumns(aggregateColumns, mq.getPulledUpPredicates(join).pulledUpPredicates);
        final ImmutableBitSet joinColumns = RelOptUtil.InputFinder.bits(join.getCondition());
        final boolean allColumnsInAggregate = keyColumns.contains(joinColumns);
        final ImmutableBitSet belowAggregateColumns = aggregateColumns.union(joinColumns);
        // Split join condition
        final List<Integer> leftKeys = Lists.newArrayList();
        final List<Integer> rightKeys = Lists.newArrayList();
        final List<Boolean> filterNulls = Lists.newArrayList();
        RexNode nonEquiConj = RelOptUtil.splitJoinCondition(join.getLeft(), join.getRight(), join.getCondition(), leftKeys, rightKeys, filterNulls);
        // If it contains non-equi join conditions, we bail out
        if (!nonEquiConj.isAlwaysTrue()) {
            return;
        }
        // Push each aggregate function down to each side that contains all of its
        // arguments. Note that COUNT(*), because it has no arguments, can go to
        // both sides.
        final Map<Integer, Integer> map = new HashMap<>();
        final List<Side> sides = new ArrayList<>();
        int uniqueCount = 0;
        int offset = 0;
        int belowOffset = 0;
        for (int s = 0; s < 2; s++) {
            final Side side = new Side();
            final RelNode joinInput = join.getInput(s);
            int fieldCount = joinInput.getRowType().getFieldCount();
            final ImmutableBitSet fieldSet = ImmutableBitSet.range(offset, offset + fieldCount);
            final ImmutableBitSet belowAggregateKeyNotShifted = belowAggregateColumns.intersect(fieldSet);
            for (Ord<Integer> c : Ord.zip(belowAggregateKeyNotShifted)) {
                map.put(c.e, belowOffset + c.i);
            }
            final ImmutableBitSet belowAggregateKey = belowAggregateKeyNotShifted.shift(-offset);
            final boolean unique;
            if (!allowFunctions) {
                assert aggregate.getAggCallList().isEmpty();
                // If there are no functions, it doesn't matter as much whether we
                // aggregate the inputs before the join, because there will not be
                // any functions experiencing a cartesian product effect.
                // 
                // But finding out whether the input is already unique requires a call
                // to areColumnsUnique that currently (until [CALCITE-1048] "Make
                // metadata more robust" is fixed) places a heavy load on
                // the metadata system.
                // 
                // So we choose to imagine the the input is already unique, which is
                // untrue but harmless.
                // 
                unique = true;
            } else {
                final Boolean unique0 = mq.areColumnsUnique(joinInput, belowAggregateKey, true);
                unique = unique0 != null && unique0;
            }
            if (unique) {
                ++uniqueCount;
                relBuilder.push(joinInput);
                relBuilder.project(belowAggregateKey.asList().stream().map(relBuilder::field).collect(Collectors.toList()));
                side.newInput = relBuilder.build();
            } else {
                List<AggregateCall> belowAggCalls = new ArrayList<>();
                final SqlSplittableAggFunction.Registry<AggregateCall> belowAggCallRegistry = registry(belowAggCalls);
                final Mappings.TargetMapping mapping = s == 0 ? Mappings.createIdentity(fieldCount) : Mappings.createShiftMapping(fieldCount + offset, 0, offset, fieldCount);
                for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
                    final SqlAggFunction aggregation = aggCall.e.getAggregation();
                    final SqlSplittableAggFunction splitter = Preconditions.checkNotNull(aggregation.unwrap(SqlSplittableAggFunction.class));
                    final AggregateCall call1;
                    if (fieldSet.contains(ImmutableBitSet.of(aggCall.e.getArgList()))) {
                        call1 = splitter.split(aggCall.e, mapping);
                    } else {
                        call1 = splitter.other(rexBuilder.getTypeFactory(), aggCall.e);
                    }
                    if (call1 != null) {
                        side.split.put(aggCall.i, belowAggregateKey.cardinality() + belowAggCallRegistry.register(call1));
                    }
                }
                side.newInput = relBuilder.push(joinInput).aggregate(relBuilder.groupKey(belowAggregateKey, null), belowAggCalls).build();
            }
            offset += fieldCount;
            belowOffset += side.newInput.getRowType().getFieldCount();
            sides.add(side);
        }
        if (uniqueCount == 2) {
            // invocation of this rule; if we continue we might loop forever.
            return;
        }
        // Update condition
        final Mapping mapping = (Mapping) Mappings.target(map::get, join.getRowType().getFieldCount(), belowOffset);
        final RexNode newCondition = RexUtil.apply(mapping, join.getCondition());
        // Create new join
        relBuilder.push(sides.get(0).newInput).push(sides.get(1).newInput).join(join.getJoinType(), newCondition);
        // Aggregate above to sum up the sub-totals
        final List<AggregateCall> newAggCalls = new ArrayList<>();
        final int groupIndicatorCount = aggregate.getGroupCount() + aggregate.getIndicatorCount();
        final int newLeftWidth = sides.get(0).newInput.getRowType().getFieldCount();
        final List<RexNode> projects = new ArrayList<>(rexBuilder.identityProjects(relBuilder.peek().getRowType()));
        for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
            final SqlAggFunction aggregation = aggCall.e.getAggregation();
            final SqlSplittableAggFunction splitter = Preconditions.checkNotNull(aggregation.unwrap(SqlSplittableAggFunction.class));
            final Integer leftSubTotal = sides.get(0).split.get(aggCall.i);
            final Integer rightSubTotal = sides.get(1).split.get(aggCall.i);
            newAggCalls.add(splitter.topSplit(rexBuilder, registry(projects), groupIndicatorCount, relBuilder.peek().getRowType(), aggCall.e, leftSubTotal == null ? -1 : leftSubTotal, rightSubTotal == null ? -1 : rightSubTotal + newLeftWidth));
        }
        relBuilder.project(projects);
        boolean aggConvertedToProjects = false;
        if (allColumnsInAggregate) {
            // let's see if we can convert aggregate into projects
            List<RexNode> projects2 = new ArrayList<>();
            for (int key : Mappings.apply(mapping, aggregate.getGroupSet())) {
                projects2.add(relBuilder.field(key));
            }
            for (AggregateCall newAggCall : newAggCalls) {
                final SqlSplittableAggFunction splitter = newAggCall.getAggregation().unwrap(SqlSplittableAggFunction.class);
                if (splitter != null) {
                    final RelDataType rowType = relBuilder.peek().getRowType();
                    projects2.add(splitter.singleton(rexBuilder, rowType, newAggCall));
                }
            }
            if (projects2.size() == aggregate.getGroupSet().cardinality() + newAggCalls.size()) {
                // We successfully converted agg calls into projects.
                relBuilder.project(projects2);
                aggConvertedToProjects = true;
            }
        }
        if (!aggConvertedToProjects) {
            relBuilder.aggregate(relBuilder.groupKey(Mappings.apply(mapping, aggregate.getGroupSet()), Mappings.apply2(mapping, aggregate.getGroupSets())), newAggCalls);
        }
        RelNode r = relBuilder.build();
        boolean transform = false;
        if (uniqueBased && aggConvertedToProjects) {
            transform = groupingUnique;
        }
        if (!transform && costBased) {
            RelOptCost afterCost = mq.getCumulativeCost(r);
            RelOptCost beforeCost = mq.getCumulativeCost(aggregate);
            transform = afterCost.isLt(beforeCost);
        }
        if (transform) {
            call.transformTo(r);
        }
    } catch (Exception e) {
        if (noColsMissingStats.get() > 0) {
            LOG.warn("Missing column stats (see previous messages), skipping aggregate-join transpose in CBO");
            noColsMissingStats.set(0);
        } else {
            throw e;
        }
    }
}
Also used : RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Mapping(org.apache.calcite.util.mapping.Mapping) RelDataType(org.apache.calcite.rel.type.RelDataType) RelOptCost(org.apache.calcite.plan.RelOptCost) RexBuilder(org.apache.calcite.rex.RexBuilder) SqlSplittableAggFunction(org.apache.calcite.sql.SqlSplittableAggFunction) RelBuilder(org.apache.calcite.tools.RelBuilder) Join(org.apache.calcite.rel.core.Join) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) AggregateCall(org.apache.calcite.rel.core.AggregateCall) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RelNode(org.apache.calcite.rel.RelNode) Mappings(org.apache.calcite.util.mapping.Mappings) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) Aggregate(org.apache.calcite.rel.core.Aggregate) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

Mapping (org.apache.calcite.util.mapping.Mapping)44 RelNode (org.apache.calcite.rel.RelNode)36 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)29 RelDataType (org.apache.calcite.rel.type.RelDataType)26 RexNode (org.apache.calcite.rex.RexNode)25 ArrayList (java.util.ArrayList)22 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)20 RelBuilder (org.apache.calcite.tools.RelBuilder)15 RexPermuteInputsShuttle (org.apache.calcite.rex.RexPermuteInputsShuttle)13 RexBuilder (org.apache.calcite.rex.RexBuilder)11 RelOptUtil (org.apache.calcite.plan.RelOptUtil)9 LinkedHashSet (java.util.LinkedHashSet)8 AggregateCall (org.apache.calcite.rel.core.AggregateCall)8 Aggregate (org.apache.calcite.rel.core.Aggregate)6 Join (org.apache.calcite.rel.core.Join)6 RexInputRef (org.apache.calcite.rex.RexInputRef)6 RexLiteral (org.apache.calcite.rex.RexLiteral)6 Mappings (org.apache.calcite.util.mapping.Mappings)6 ImmutableList (com.google.common.collect.ImmutableList)5 HashMap (java.util.HashMap)5