Search in sources :

Example 61 with RelMetadataQuery

use of org.apache.calcite.rel.metadata.RelMetadataQuery in project flink by apache.

the class FlinkAggregateJoinTransposeRule method onMatch.

public void onMatch(RelOptRuleCall call) {
    final Aggregate aggregate = call.rel(0);
    final Join join = call.rel(1);
    final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
    final RelBuilder relBuilder = call.builder();
    // If any aggregate call has a filter, bail out
    for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
        if (aggregateCall.getAggregation().unwrap(SqlSplittableAggFunction.class) == null) {
            return;
        }
        if (aggregateCall.filterArg >= 0) {
            return;
        }
    }
    // aggregate operator
    if (join.getJoinType() != JoinRelType.INNER) {
        return;
    }
    if (!allowFunctions && !aggregate.getAggCallList().isEmpty()) {
        return;
    }
    // Do the columns used by the join appear in the output of the aggregate?
    final ImmutableBitSet aggregateColumns = aggregate.getGroupSet();
    final RelMetadataQuery mq = RelMetadataQuery.instance();
    final ImmutableBitSet keyColumns = keyColumns(aggregateColumns, mq.getPulledUpPredicates(join).pulledUpPredicates);
    final ImmutableBitSet joinColumns = RelOptUtil.InputFinder.bits(join.getCondition());
    final boolean allColumnsInAggregate = keyColumns.contains(joinColumns);
    final ImmutableBitSet belowAggregateColumns = aggregateColumns.union(joinColumns);
    // Split join condition
    final List<Integer> leftKeys = Lists.newArrayList();
    final List<Integer> rightKeys = Lists.newArrayList();
    final List<Boolean> filterNulls = Lists.newArrayList();
    RexNode nonEquiConj = RelOptUtil.splitJoinCondition(join.getLeft(), join.getRight(), join.getCondition(), leftKeys, rightKeys, filterNulls);
    // If it contains non-equi join conditions, we bail out
    if (!nonEquiConj.isAlwaysTrue()) {
        return;
    }
    // Push each aggregate function down to each side that contains all of its
    // arguments. Note that COUNT(*), because it has no arguments, can go to
    // both sides.
    final Map<Integer, Integer> map = new HashMap<>();
    final List<Side> sides = new ArrayList<>();
    int uniqueCount = 0;
    int offset = 0;
    int belowOffset = 0;
    for (int s = 0; s < 2; s++) {
        final Side side = new Side();
        final RelNode joinInput = join.getInput(s);
        int fieldCount = joinInput.getRowType().getFieldCount();
        final ImmutableBitSet fieldSet = ImmutableBitSet.range(offset, offset + fieldCount);
        final ImmutableBitSet belowAggregateKeyNotShifted = belowAggregateColumns.intersect(fieldSet);
        for (Ord<Integer> c : Ord.zip(belowAggregateKeyNotShifted)) {
            map.put(c.e, belowOffset + c.i);
        }
        final ImmutableBitSet belowAggregateKey = belowAggregateKeyNotShifted.shift(-offset);
        final boolean unique;
        if (!allowFunctions) {
            assert aggregate.getAggCallList().isEmpty();
            // If there are no functions, it doesn't matter as much whether we
            // aggregate the inputs before the join, because there will not be
            // any functions experiencing a cartesian product effect.
            //
            // But finding out whether the input is already unique requires a call
            // to areColumnsUnique that currently (until [CALCITE-1048] "Make
            // metadata more robust" is fixed) places a heavy load on
            // the metadata system.
            //
            // So we choose to imagine the the input is already unique, which is
            // untrue but harmless.
            //
            Util.discard(Bug.CALCITE_1048_FIXED);
            unique = true;
        } else {
            final Boolean unique0 = mq.areColumnsUnique(joinInput, belowAggregateKey);
            unique = unique0 != null && unique0;
        }
        if (unique) {
            ++uniqueCount;
            side.aggregate = false;
            side.newInput = joinInput;
        } else {
            side.aggregate = true;
            List<AggregateCall> belowAggCalls = new ArrayList<>();
            final SqlSplittableAggFunction.Registry<AggregateCall> belowAggCallRegistry = registry(belowAggCalls);
            final Mappings.TargetMapping mapping = s == 0 ? Mappings.createIdentity(fieldCount) : Mappings.createShiftMapping(fieldCount + offset, 0, offset, fieldCount);
            for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
                final SqlAggFunction aggregation = aggCall.e.getAggregation();
                final SqlSplittableAggFunction splitter = Preconditions.checkNotNull(aggregation.unwrap(SqlSplittableAggFunction.class));
                final AggregateCall call1;
                if (fieldSet.contains(ImmutableBitSet.of(aggCall.e.getArgList()))) {
                    call1 = splitter.split(aggCall.e, mapping);
                } else {
                    call1 = splitter.other(rexBuilder.getTypeFactory(), aggCall.e);
                }
                if (call1 != null) {
                    side.split.put(aggCall.i, belowAggregateKey.cardinality() + belowAggCallRegistry.register(call1));
                }
            }
            side.newInput = relBuilder.push(joinInput).aggregate(relBuilder.groupKey(belowAggregateKey, false, null), belowAggCalls).build();
        }
        offset += fieldCount;
        belowOffset += side.newInput.getRowType().getFieldCount();
        sides.add(side);
    }
    if (uniqueCount == 2) {
        // invocation of this rule; if we continue we might loop forever.
        return;
    }
    // Update condition
    final Mapping mapping = (Mapping) Mappings.target(new Function<Integer, Integer>() {

        public Integer apply(Integer a0) {
            return map.get(a0);
        }
    }, join.getRowType().getFieldCount(), belowOffset);
    final RexNode newCondition = RexUtil.apply(mapping, join.getCondition());
    // Create new join
    relBuilder.push(sides.get(0).newInput).push(sides.get(1).newInput).join(join.getJoinType(), newCondition);
    // Aggregate above to sum up the sub-totals
    final List<AggregateCall> newAggCalls = new ArrayList<>();
    final int groupIndicatorCount = aggregate.getGroupCount() + aggregate.getIndicatorCount();
    final int newLeftWidth = sides.get(0).newInput.getRowType().getFieldCount();
    final List<RexNode> projects = new ArrayList<>(rexBuilder.identityProjects(relBuilder.peek().getRowType()));
    for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
        final SqlAggFunction aggregation = aggCall.e.getAggregation();
        final SqlSplittableAggFunction splitter = Preconditions.checkNotNull(aggregation.unwrap(SqlSplittableAggFunction.class));
        final Integer leftSubTotal = sides.get(0).split.get(aggCall.i);
        final Integer rightSubTotal = sides.get(1).split.get(aggCall.i);
        newAggCalls.add(splitter.topSplit(rexBuilder, registry(projects), groupIndicatorCount, relBuilder.peek().getRowType(), aggCall.e, leftSubTotal == null ? -1 : leftSubTotal, rightSubTotal == null ? -1 : rightSubTotal + newLeftWidth));
    }
    relBuilder.project(projects);
    boolean aggConvertedToProjects = false;
    if (allColumnsInAggregate) {
        // let's see if we can convert aggregate into projects
        List<RexNode> projects2 = new ArrayList<>();
        for (int key : Mappings.apply(mapping, aggregate.getGroupSet())) {
            projects2.add(relBuilder.field(key));
        }
        for (AggregateCall newAggCall : newAggCalls) {
            final SqlSplittableAggFunction splitter = newAggCall.getAggregation().unwrap(SqlSplittableAggFunction.class);
            if (splitter != null) {
                projects2.add(splitter.singleton(rexBuilder, relBuilder.peek().getRowType(), newAggCall));
            }
        }
        if (projects2.size() == aggregate.getGroupSet().cardinality() + newAggCalls.size()) {
            // We successfully converted agg calls into projects.
            relBuilder.project(projects2);
            aggConvertedToProjects = true;
        }
    }
    if (!aggConvertedToProjects) {
        relBuilder.aggregate(relBuilder.groupKey(Mappings.apply(mapping, aggregate.getGroupSet()), aggregate.indicator, Mappings.apply2(mapping, aggregate.getGroupSets())), newAggCalls);
    }
    call.transformTo(relBuilder.build());
}
Also used : RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Mapping(org.apache.calcite.util.mapping.Mapping) Function(com.google.common.base.Function) SqlSplittableAggFunction(org.apache.calcite.sql.SqlSplittableAggFunction) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) RexBuilder(org.apache.calcite.rex.RexBuilder) SqlSplittableAggFunction(org.apache.calcite.sql.SqlSplittableAggFunction) RelBuilder(org.apache.calcite.tools.RelBuilder) Join(org.apache.calcite.rel.core.Join) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelNode(org.apache.calcite.rel.RelNode) Mappings(org.apache.calcite.util.mapping.Mappings) Aggregate(org.apache.calcite.rel.core.Aggregate) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) RexNode(org.apache.calcite.rex.RexNode)

Example 62 with RelMetadataQuery

use of org.apache.calcite.rel.metadata.RelMetadataQuery in project hive by apache.

the class HiveAlgorithmsUtil method isFittingIntoMemory.

public static boolean isFittingIntoMemory(Double maxSize, RelNode input, int buckets) {
    final RelMetadataQuery mq = input.getCluster().getMetadataQuery();
    Double currentMemory = mq.cumulativeMemoryWithinPhase(input);
    if (currentMemory != null) {
        if (currentMemory / buckets > maxSize) {
            return false;
        }
        return true;
    }
    return false;
}
Also used : RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery)

Example 63 with RelMetadataQuery

use of org.apache.calcite.rel.metadata.RelMetadataQuery in project hive by apache.

the class HiveAlgorithmsUtil method getJoinMemory.

public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation streamingSide) {
    Double memory = 0.0;
    final RelMetadataQuery mq = join.getCluster().getMetadataQuery();
    if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) {
        // Left side
        final Double leftAvgRowSize = mq.getAverageRowSize(join.getLeft());
        final Double leftRowCount = mq.getRowCount(join.getLeft());
        if (leftAvgRowSize == null || leftRowCount == null) {
            return null;
        }
        memory += leftAvgRowSize * leftRowCount;
    }
    if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.LEFT_RELATION) {
        // Right side
        final Double rightAvgRowSize = mq.getAverageRowSize(join.getRight());
        final Double rightRowCount = mq.getRowCount(join.getRight());
        if (rightAvgRowSize == null || rightRowCount == null) {
            return null;
        }
        memory += rightAvgRowSize * rightRowCount;
    }
    return memory;
}
Also used : RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery)

Example 64 with RelMetadataQuery

use of org.apache.calcite.rel.metadata.RelMetadataQuery in project hive by apache.

the class HiveAlgorithmsUtil method getSplitCountWithRepartition.

public static Integer getSplitCountWithRepartition(HiveJoin join) {
    final Double maxSplitSize = join.getCluster().getPlanner().getContext().unwrap(HiveAlgorithmsConf.class).getMaxSplitSize();
    // We repartition: new number of splits
    final RelMetadataQuery mq = join.getCluster().getMetadataQuery();
    final Double averageRowSize = mq.getAverageRowSize(join);
    final Double rowCount = mq.getRowCount(join);
    if (averageRowSize == null || rowCount == null) {
        return null;
    }
    final Double totalSize = averageRowSize * rowCount;
    final Double splitCount = totalSize / maxSplitSize;
    return splitCount.intValue();
}
Also used : RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery)

Example 65 with RelMetadataQuery

use of org.apache.calcite.rel.metadata.RelMetadataQuery in project hive by apache.

the class HiveOnTezCostModel method getAggregateCost.

@Override
public RelOptCost getAggregateCost(HiveAggregate aggregate) {
    if (aggregate.isBucketedInput()) {
        return HiveCost.FACTORY.makeZeroCost();
    } else {
        final RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery();
        // 1. Sum of input cardinalities
        final Double rCount = mq.getRowCount(aggregate.getInput());
        if (rCount == null) {
            return null;
        }
        // 2. CPU cost = sorting cost
        final double cpuCost = algoUtils.computeSortCPUCost(rCount);
        // 3. IO cost = cost of writing intermediary results to local FS +
        // cost of reading from local FS for transferring to GBy +
        // cost of transferring map outputs to GBy operator
        final Double rAverageSize = mq.getAverageRowSize(aggregate.getInput());
        if (rAverageSize == null) {
            return null;
        }
        final double ioCost = algoUtils.computeSortIOCost(new Pair<Double, Double>(rCount, rAverageSize));
        // 4. Result
        return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost);
    }
}
Also used : RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery)

Aggregations

RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)122 RelNode (org.apache.calcite.rel.RelNode)88 Test (org.junit.Test)43 RexNode (org.apache.calcite.rex.RexNode)38 RelOptPredicateList (org.apache.calcite.plan.RelOptPredicateList)24 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)21 ArrayList (java.util.ArrayList)19 RexBuilder (org.apache.calcite.rex.RexBuilder)19 RelBuilder (org.apache.calcite.tools.RelBuilder)19 ImmutableList (com.google.common.collect.ImmutableList)12 Aggregate (org.apache.calcite.rel.core.Aggregate)12 RexTableInputRef (org.apache.calcite.rex.RexTableInputRef)12 RelOptCluster (org.apache.calcite.plan.RelOptCluster)11 RelTraitSet (org.apache.calcite.plan.RelTraitSet)11 AggregateCall (org.apache.calcite.rel.core.AggregateCall)11 RelCollation (org.apache.calcite.rel.RelCollation)10 RelDistribution (org.apache.calcite.rel.RelDistribution)10 LogicalAggregate (org.apache.calcite.rel.logical.LogicalAggregate)10 HashMap (java.util.HashMap)9 List (java.util.List)9