Search in sources :

Example 16 with Pair

use of org.apache.calcite.util.Pair in project hive by apache.

the class HiveJoinToMultiJoinRule method mergeJoin.

// This method tries to merge the join with its left child. The left
// child should be a join for this to happen.
private static RelNode mergeJoin(HiveJoin join, RelNode left, RelNode right) {
    final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
    // We check whether the join can be combined with any of its children
    final List<RelNode> newInputs = Lists.newArrayList();
    final List<RexNode> newJoinCondition = Lists.newArrayList();
    final List<Pair<Integer, Integer>> joinInputs = Lists.newArrayList();
    final List<JoinRelType> joinTypes = Lists.newArrayList();
    final List<RexNode> joinFilters = Lists.newArrayList();
    // Left child
    if (left instanceof HiveJoin || left instanceof HiveMultiJoin) {
        final RexNode leftCondition;
        final List<Pair<Integer, Integer>> leftJoinInputs;
        final List<JoinRelType> leftJoinTypes;
        final List<RexNode> leftJoinFilters;
        boolean combinable;
        if (left instanceof HiveJoin) {
            HiveJoin hj = (HiveJoin) left;
            leftCondition = hj.getCondition();
            leftJoinInputs = ImmutableList.of(Pair.of(0, 1));
            leftJoinTypes = ImmutableList.of(hj.getJoinType());
            leftJoinFilters = ImmutableList.of(hj.getJoinFilter());
            try {
                combinable = isCombinableJoin(join, hj);
            } catch (CalciteSemanticException e) {
                LOG.trace("Failed to merge join-join", e);
                combinable = false;
            }
        } else {
            HiveMultiJoin hmj = (HiveMultiJoin) left;
            leftCondition = hmj.getCondition();
            leftJoinInputs = hmj.getJoinInputs();
            leftJoinTypes = hmj.getJoinTypes();
            leftJoinFilters = hmj.getJoinFilters();
            try {
                combinable = isCombinableJoin(join, hmj);
            } catch (CalciteSemanticException e) {
                LOG.trace("Failed to merge join-multijoin", e);
                combinable = false;
            }
        }
        if (combinable) {
            newJoinCondition.add(leftCondition);
            for (int i = 0; i < leftJoinInputs.size(); i++) {
                joinInputs.add(leftJoinInputs.get(i));
                joinTypes.add(leftJoinTypes.get(i));
                joinFilters.add(leftJoinFilters.get(i));
            }
            newInputs.addAll(left.getInputs());
        } else {
            // The join operation in the child is not on the same keys
            return null;
        }
    } else {
        // The left child is not a join or multijoin operator
        return null;
    }
    final int numberLeftInputs = newInputs.size();
    // Right child
    newInputs.add(right);
    // If we cannot combine any of the children, we bail out
    newJoinCondition.add(join.getCondition());
    if (newJoinCondition.size() == 1) {
        return null;
    }
    final List<RelDataTypeField> systemFieldList = ImmutableList.of();
    List<List<RexNode>> joinKeyExprs = new ArrayList<List<RexNode>>();
    List<Integer> filterNulls = new ArrayList<Integer>();
    for (int i = 0; i < newInputs.size(); i++) {
        joinKeyExprs.add(new ArrayList<RexNode>());
    }
    RexNode filters;
    try {
        filters = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, newInputs, join.getCondition(), joinKeyExprs, filterNulls, null);
    } catch (CalciteSemanticException e) {
        LOG.trace("Failed to merge joins", e);
        return null;
    }
    ImmutableBitSet.Builder keysInInputsBuilder = ImmutableBitSet.builder();
    for (int i = 0; i < newInputs.size(); i++) {
        List<RexNode> partialCondition = joinKeyExprs.get(i);
        if (!partialCondition.isEmpty()) {
            keysInInputsBuilder.set(i);
        }
    }
    // If we cannot merge, we bail out
    ImmutableBitSet keysInInputs = keysInInputsBuilder.build();
    ImmutableBitSet leftReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs));
    ImmutableBitSet rightReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs, newInputs.size()));
    if (join.getJoinType() != JoinRelType.INNER && (leftReferencedInputs.cardinality() > 1 || rightReferencedInputs.cardinality() > 1)) {
        return null;
    }
    // Otherwise, we add to the join specs
    if (join.getJoinType() != JoinRelType.INNER) {
        int leftInput = keysInInputs.nextSetBit(0);
        int rightInput = keysInInputs.nextSetBit(numberLeftInputs);
        joinInputs.add(Pair.of(leftInput, rightInput));
        joinTypes.add(join.getJoinType());
        joinFilters.add(filters);
    } else {
        for (int i : leftReferencedInputs) {
            for (int j : rightReferencedInputs) {
                joinInputs.add(Pair.of(i, j));
                joinTypes.add(join.getJoinType());
                joinFilters.add(filters);
            }
        }
    }
    // We can now create a multijoin operator
    RexNode newCondition = RexUtil.flatten(rexBuilder, RexUtil.composeConjunction(rexBuilder, newJoinCondition, false));
    List<RelNode> newInputsArray = Lists.newArrayList(newInputs);
    JoinPredicateInfo joinPredInfo = null;
    try {
        joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(newInputsArray, systemFieldList, newCondition);
    } catch (CalciteSemanticException e) {
        throw new RuntimeException(e);
    }
    // If the number of joins < number of input tables-1, this is not a star join.
    if (joinPredInfo.getEquiJoinPredicateElements().size() < newInputs.size() - 1) {
        return null;
    }
    // Validate that the multi-join is a valid star join before returning it.
    for (int i = 0; i < newInputs.size(); i++) {
        List<RexNode> joinKeys = null;
        for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
            List<RexNode> currJoinKeys = joinPredInfo.getEquiJoinPredicateElements().get(j).getJoinExprs(i);
            if (currJoinKeys.isEmpty()) {
                continue;
            }
            if (joinKeys == null) {
                joinKeys = currJoinKeys;
            } else {
                // Bail out if this is the case.
                if (!joinKeys.containsAll(currJoinKeys) || !currJoinKeys.containsAll(joinKeys)) {
                    return null;
                }
            }
        }
    }
    return new HiveMultiJoin(join.getCluster(), newInputsArray, newCondition, join.getRowType(), joinInputs, joinTypes, joinFilters, joinPredInfo);
}
Also used : HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) RexBuilder(org.apache.calcite.rex.RexBuilder) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Pair(org.apache.calcite.util.Pair) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) RexNode(org.apache.calcite.rex.RexNode)

Example 17 with Pair

use of org.apache.calcite.util.Pair in project hive by apache.

the class Vectorizer method validateAggregationDesc.

private Pair<Boolean, Boolean> validateAggregationDesc(AggregationDesc aggDesc, ProcessingMode processingMode, boolean hasKeys) {
    String udfName = aggDesc.getGenericUDAFName().toLowerCase();
    if (!supportedAggregationUdfs.contains(udfName)) {
        setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported");
        return new Pair<Boolean, Boolean>(false, false);
    }
    if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) {
        return new Pair<Boolean, Boolean>(false, false);
    }
    // See if we can vectorize the aggregation.
    VectorizationContext vc = new ValidatorVectorizationContext(hiveConf);
    VectorAggregateExpression vectorAggrExpr;
    try {
        vectorAggrExpr = vc.getAggregatorExpression(aggDesc);
    } catch (Exception e) {
        // We should have already attempted to vectorize in validateAggregationDesc.
        if (LOG.isDebugEnabled()) {
            LOG.debug("Vectorization of aggregation should have succeeded ", e);
        }
        setExpressionIssue("Aggregation Function", "Vectorization of aggreation should have succeeded " + e);
        return new Pair<Boolean, Boolean>(false, false);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Aggregation " + aggDesc.getExprString() + " --> " + " vector expression " + vectorAggrExpr.toString());
    }
    ObjectInspector.Category outputCategory = aggregationOutputCategory(vectorAggrExpr);
    boolean outputIsPrimitive = (outputCategory == ObjectInspector.Category.PRIMITIVE);
    if (processingMode == ProcessingMode.MERGE_PARTIAL && hasKeys && !outputIsPrimitive) {
        setOperatorIssue("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types");
        return new Pair<Boolean, Boolean>(false, false);
    }
    return new Pair<Boolean, Boolean>(true, outputIsPrimitive);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) UDFToBoolean(org.apache.hadoop.hive.ql.udf.UDFToBoolean) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Pair(org.apache.calcite.util.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 18 with Pair

use of org.apache.calcite.util.Pair in project flink by apache.

the class FlinkAggregateExpandDistinctAggregatesRule method convertSingletonDistinct.

/**
	 * Converts an aggregate with one distinct aggregate and one or more
	 * non-distinct aggregates to multi-phase aggregates (see reference example
	 * below).
	 *
	 * @param relBuilder Contains the input relational expression
	 * @param aggregate  Original aggregate
	 * @param argLists   Arguments and filters to the distinct aggregate function
	 *
	 */
private RelBuilder convertSingletonDistinct(RelBuilder relBuilder, Aggregate aggregate, Set<Pair<List<Integer>, Integer>> argLists) {
    // For example,
    //	SELECT deptno, COUNT(*), SUM(bonus), MIN(DISTINCT sal)
    //	FROM emp
    //	GROUP BY deptno
    //
    // becomes
    //
    //	SELECT deptno, SUM(cnt), SUM(bonus), MIN(sal)
    //	FROM (
    //		  SELECT deptno, COUNT(*) as cnt, SUM(bonus), sal
    //		  FROM EMP
    //		  GROUP BY deptno, sal)			// Aggregate B
    //	GROUP BY deptno						// Aggregate A
    relBuilder.push(aggregate.getInput());
    final List<Pair<RexNode, String>> projects = new ArrayList<>();
    final Map<Integer, Integer> sourceOf = new HashMap<>();
    SortedSet<Integer> newGroupSet = new TreeSet<>();
    final List<RelDataTypeField> childFields = relBuilder.peek().getRowType().getFieldList();
    final boolean hasGroupBy = aggregate.getGroupSet().size() > 0;
    SortedSet<Integer> groupSet = new TreeSet<>(aggregate.getGroupSet().asList());
    // Add the distinct aggregate column(s) to the group-by columns,
    // if not already a part of the group-by
    newGroupSet.addAll(aggregate.getGroupSet().asList());
    for (Pair<List<Integer>, Integer> argList : argLists) {
        newGroupSet.addAll(argList.getKey());
    }
    // transformation.
    for (int arg : newGroupSet) {
        sourceOf.put(arg, projects.size());
        projects.add(RexInputRef.of2(arg, childFields));
    }
    // Generate the intermediate aggregate B
    final List<AggregateCall> aggCalls = aggregate.getAggCallList();
    final List<AggregateCall> newAggCalls = new ArrayList<>();
    final List<Integer> fakeArgs = new ArrayList<>();
    final Map<AggregateCall, Integer> callArgMap = new HashMap<>();
    // e.g. if real arguments are 0, 1, 3. Then the fake arguments will be 2, 4
    for (final AggregateCall aggCall : aggCalls) {
        if (!aggCall.isDistinct()) {
            for (int arg : aggCall.getArgList()) {
                if (!groupSet.contains(arg)) {
                    sourceOf.put(arg, projects.size());
                }
            }
        }
    }
    int fakeArg0 = 0;
    for (final AggregateCall aggCall : aggCalls) {
        // We will deal with non-distinct aggregates below
        if (!aggCall.isDistinct()) {
            boolean isGroupKeyUsedInAgg = false;
            for (int arg : aggCall.getArgList()) {
                if (groupSet.contains(arg)) {
                    isGroupKeyUsedInAgg = true;
                    break;
                }
            }
            if (aggCall.getArgList().size() == 0 || isGroupKeyUsedInAgg) {
                while (sourceOf.get(fakeArg0) != null) {
                    ++fakeArg0;
                }
                fakeArgs.add(fakeArg0);
                ++fakeArg0;
            }
        }
    }
    for (final AggregateCall aggCall : aggCalls) {
        if (!aggCall.isDistinct()) {
            for (int arg : aggCall.getArgList()) {
                if (!groupSet.contains(arg)) {
                    sourceOf.remove(arg);
                }
            }
        }
    }
    // Compute the remapped arguments using fake arguments for non-distinct
    // aggregates with no arguments e.g. count(*).
    int fakeArgIdx = 0;
    for (final AggregateCall aggCall : aggCalls) {
        // as-is all the non-distinct aggregates
        if (!aggCall.isDistinct()) {
            final AggregateCall newCall = AggregateCall.create(aggCall.getAggregation(), false, aggCall.getArgList(), -1, ImmutableBitSet.of(newGroupSet).cardinality(), relBuilder.peek(), null, aggCall.name);
            newAggCalls.add(newCall);
            if (newCall.getArgList().size() == 0) {
                int fakeArg = fakeArgs.get(fakeArgIdx);
                callArgMap.put(newCall, fakeArg);
                sourceOf.put(fakeArg, projects.size());
                projects.add(Pair.of((RexNode) new RexInputRef(fakeArg, newCall.getType()), newCall.getName()));
                ++fakeArgIdx;
            } else {
                for (int arg : newCall.getArgList()) {
                    if (groupSet.contains(arg)) {
                        int fakeArg = fakeArgs.get(fakeArgIdx);
                        callArgMap.put(newCall, fakeArg);
                        sourceOf.put(fakeArg, projects.size());
                        projects.add(Pair.of((RexNode) new RexInputRef(fakeArg, newCall.getType()), newCall.getName()));
                        ++fakeArgIdx;
                    } else {
                        sourceOf.put(arg, projects.size());
                        projects.add(Pair.of((RexNode) new RexInputRef(arg, newCall.getType()), newCall.getName()));
                    }
                }
            }
        }
    }
    // Generate the aggregate B (see the reference example above)
    relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), false, ImmutableBitSet.of(newGroupSet), null, newAggCalls));
    // Convert the existing aggregate to aggregate A (see the reference example above)
    final List<AggregateCall> newTopAggCalls = Lists.newArrayList(aggregate.getAggCallList());
    // Use the remapped arguments for the (non)distinct aggregate calls
    for (int i = 0; i < newTopAggCalls.size(); i++) {
        // Re-map arguments.
        final AggregateCall aggCall = newTopAggCalls.get(i);
        final int argCount = aggCall.getArgList().size();
        final List<Integer> newArgs = new ArrayList<>(argCount);
        final AggregateCall newCall;
        for (int j = 0; j < argCount; j++) {
            final Integer arg = aggCall.getArgList().get(j);
            if (callArgMap.containsKey(aggCall)) {
                newArgs.add(sourceOf.get(callArgMap.get(aggCall)));
            } else {
                newArgs.add(sourceOf.get(arg));
            }
        }
        if (aggCall.isDistinct()) {
            newCall = AggregateCall.create(aggCall.getAggregation(), false, newArgs, -1, aggregate.getGroupSet().cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.name);
        } else {
            // aggregate A must be SUM. For other aggregates, it remains the same.
            if (aggCall.getAggregation() instanceof SqlCountAggFunction) {
                if (aggCall.getArgList().size() == 0) {
                    newArgs.add(sourceOf.get(callArgMap.get(aggCall)));
                }
                if (hasGroupBy) {
                    SqlSumAggFunction sumAgg = new SqlSumAggFunction(null);
                    newCall = AggregateCall.create(sumAgg, false, newArgs, -1, aggregate.getGroupSet().cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.getName());
                } else {
                    SqlSumEmptyIsZeroAggFunction sumAgg = new SqlSumEmptyIsZeroAggFunction();
                    newCall = AggregateCall.create(sumAgg, false, newArgs, -1, aggregate.getGroupSet().cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.getName());
                }
            } else {
                newCall = AggregateCall.create(aggCall.getAggregation(), false, newArgs, -1, aggregate.getGroupSet().cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.name);
            }
        }
        newTopAggCalls.set(i, newCall);
    }
    // Populate the group-by keys with the remapped arguments for aggregate A
    newGroupSet.clear();
    for (int arg : aggregate.getGroupSet()) {
        newGroupSet.add(sourceOf.get(arg));
    }
    relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), aggregate.indicator, ImmutableBitSet.of(newGroupSet), null, newTopAggCalls));
    return relBuilder;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SqlCountAggFunction(org.apache.calcite.sql.fun.SqlCountAggFunction) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SqlSumEmptyIsZeroAggFunction(org.apache.calcite.sql.fun.SqlSumEmptyIsZeroAggFunction) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) TreeSet(java.util.TreeSet) SqlSumAggFunction(org.apache.calcite.sql.fun.SqlSumAggFunction) RexInputRef(org.apache.calcite.rex.RexInputRef) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ImmutableIntList(org.apache.calcite.util.ImmutableIntList) List(java.util.List) Pair(org.apache.calcite.util.Pair) RexNode(org.apache.calcite.rex.RexNode)

Example 19 with Pair

use of org.apache.calcite.util.Pair in project flink by apache.

the class FlinkAggregateExpandDistinctAggregatesRule method rewriteUsingGroupingSets.

/*
	public RelBuilder convertSingletonDistinct(RelBuilder relBuilder,
											   Aggregate aggregate, Set<Pair<List<Integer>, Integer>> argLists) {
		// For example,
		//	SELECT deptno, COUNT(*), SUM(bonus), MIN(DISTINCT sal)
		//	FROM emp
		//	GROUP BY deptno
		//
		// becomes
		//
		//	SELECT deptno, SUM(cnt), SUM(bonus), MIN(sal)
		//	FROM (
		//		  SELECT deptno, COUNT(*) as cnt, SUM(bonus), sal
		//		  FROM EMP
		//		  GROUP BY deptno, sal)			// Aggregate B
		//	GROUP BY deptno						// Aggregate A
		relBuilder.push(aggregate.getInput());
		final List<Pair<RexNode, String>> projects = new ArrayList<>();
		final Map<Integer, Integer> sourceOf = new HashMap<>();
		SortedSet<Integer> newGroupSet = new TreeSet<>();
		final List<RelDataTypeField> childFields =
				relBuilder.peek().getRowType().getFieldList();
		final boolean hasGroupBy = aggregate.getGroupSet().size() > 0;

		// Add the distinct aggregate column(s) to the group-by columns,
		// if not already a part of the group-by
		newGroupSet.addAll(aggregate.getGroupSet().asList());
		for (Pair<List<Integer>, Integer> argList : argLists) {
			newGroupSet.addAll(argList.getKey());
		}

		// Re-map the arguments to the aggregate A. These arguments will get
		// remapped because of the intermediate aggregate B generated as part of the
		// transformation.
		for (int arg : newGroupSet) {
			sourceOf.put(arg, projects.size());
			projects.add(RexInputRef.of2(arg, childFields));
		}
		// Generate the intermediate aggregate B
		final List<AggregateCall> aggCalls = aggregate.getAggCallList();
		final List<AggregateCall> newAggCalls = new ArrayList<>();
		final List<Integer> fakeArgs = new ArrayList<>();
		final Map<AggregateCall, Integer> callArgMap = new HashMap<>();
		// First identify the real arguments, then use the rest for fake arguments
		// e.g. if real arguments are 0, 1, 3. Then the fake arguments will be 2, 4
		for (final AggregateCall aggCall : aggCalls) {
			if (!aggCall.isDistinct()) {
				for (int arg : aggCall.getArgList()) {
					if (!sourceOf.containsKey(arg)) {
						sourceOf.put(arg, projects.size());
					}
				}
			}
		}
		int fakeArg0 = 0;
		for (final AggregateCall aggCall : aggCalls) {
			// We will deal with non-distinct aggregates below
			if (!aggCall.isDistinct()) {
				boolean isGroupKeyUsedInAgg = false;
				for (int arg : aggCall.getArgList()) {
					if (sourceOf.containsKey(arg)) {
						isGroupKeyUsedInAgg = true;
						break;
					}
				}
				if (aggCall.getArgList().size() == 0 || isGroupKeyUsedInAgg) {
					while (sourceOf.get(fakeArg0) != null) {
						++fakeArg0;
					}
					fakeArgs.add(fakeArg0);
				}
			}
		}
		for (final AggregateCall aggCall : aggCalls) {
			if (!aggCall.isDistinct()) {
				for (int arg : aggCall.getArgList()) {
					if (!sourceOf.containsKey(arg)) {
						sourceOf.remove(arg);
					}
				}
			}
		}
		// Compute the remapped arguments using fake arguments for non-distinct
		// aggregates with no arguments e.g. count(*).
		int fakeArgIdx = 0;
		for (final AggregateCall aggCall : aggCalls) {
			// Project the column corresponding to the distinct aggregate. Project
			// as-is all the non-distinct aggregates
			if (!aggCall.isDistinct()) {
				final AggregateCall newCall =
						AggregateCall.create(aggCall.getAggregation(), false,
								aggCall.getArgList(), -1,
								ImmutableBitSet.of(newGroupSet).cardinality(),
								relBuilder.peek(), null, aggCall.name);
				newAggCalls.add(newCall);
				if (newCall.getArgList().size() == 0) {
					int fakeArg = fakeArgs.get(fakeArgIdx);
					callArgMap.put(newCall, fakeArg);
					sourceOf.put(fakeArg, projects.size());
					projects.add(
							Pair.of((RexNode) new RexInputRef(fakeArg, newCall.getType()),
									newCall.getName()));
					++fakeArgIdx;
				} else {
					for (int arg : newCall.getArgList()) {
						if (sourceOf.containsKey(arg)) {
							int fakeArg = fakeArgs.get(fakeArgIdx);
							callArgMap.put(newCall, fakeArg);
							sourceOf.put(fakeArg, projects.size());
							projects.add(
									Pair.of((RexNode) new RexInputRef(fakeArg, newCall.getType()),
											newCall.getName()));
							++fakeArgIdx;
						} else {
							sourceOf.put(arg, projects.size());
							projects.add(
									Pair.of((RexNode) new RexInputRef(arg, newCall.getType()),
											newCall.getName()));
						}
					}
				}
			}
		}
		// Generate the aggregate B (see the reference example above)
		relBuilder.push(
				aggregate.copy(
						aggregate.getTraitSet(), relBuilder.build(),
						false, ImmutableBitSet.of(newGroupSet), null, newAggCalls));
		// Convert the existing aggregate to aggregate A (see the reference example above)
		final List<AggregateCall> newTopAggCalls =
				Lists.newArrayList(aggregate.getAggCallList());
		// Use the remapped arguments for the (non)distinct aggregate calls
		for (int i = 0; i < newTopAggCalls.size(); i++) {
			// Re-map arguments.
			final AggregateCall aggCall = newTopAggCalls.get(i);
			final int argCount = aggCall.getArgList().size();
			final List<Integer> newArgs = new ArrayList<>(argCount);
			final AggregateCall newCall;


			for (int j = 0; j < argCount; j++) {
				final Integer arg = aggCall.getArgList().get(j);
				if (callArgMap.containsKey(aggCall)) {
					newArgs.add(sourceOf.get(callArgMap.get(aggCall)));
				}
				else {
					newArgs.add(sourceOf.get(arg));
				}
			}
			if (aggCall.isDistinct()) {
				newCall =
						AggregateCall.create(aggCall.getAggregation(), false, newArgs,
								-1, aggregate.getGroupSet().cardinality(), relBuilder.peek(),
								aggCall.getType(), aggCall.name);
			} else {
				// If aggregate B had a COUNT aggregate call the corresponding aggregate at
				// aggregate A must be SUM. For other aggregates, it remains the same.
				if (aggCall.getAggregation() instanceof SqlCountAggFunction) {
					if (aggCall.getArgList().size() == 0) {
						newArgs.add(sourceOf.get(callArgMap.get(aggCall)));
					}
					if (hasGroupBy) {
						SqlSumAggFunction sumAgg = new SqlSumAggFunction(null);
						newCall =
								AggregateCall.create(sumAgg, false, newArgs, -1,
										aggregate.getGroupSet().cardinality(), relBuilder.peek(),
										aggCall.getType(), aggCall.getName());
					} else {
						SqlSumEmptyIsZeroAggFunction sumAgg = new SqlSumEmptyIsZeroAggFunction();
						newCall =
								AggregateCall.create(sumAgg, false, newArgs, -1,
										aggregate.getGroupSet().cardinality(), relBuilder.peek(),
										aggCall.getType(), aggCall.getName());
					}
				} else {
					newCall =
							AggregateCall.create(aggCall.getAggregation(), false, newArgs, -1,
									aggregate.getGroupSet().cardinality(),
									relBuilder.peek(), aggCall.getType(), aggCall.name);
				}
			}
			newTopAggCalls.set(i, newCall);
		}
		// Populate the group-by keys with the remapped arguments for aggregate A
		newGroupSet.clear();
		for (int arg : aggregate.getGroupSet()) {
			newGroupSet.add(sourceOf.get(arg));
		}
		relBuilder.push(
				aggregate.copy(aggregate.getTraitSet(),
						relBuilder.build(), aggregate.indicator,
						ImmutableBitSet.of(newGroupSet), null, newTopAggCalls));
		return relBuilder;
	}
	*/
@SuppressWarnings("DanglingJavadoc")
private void rewriteUsingGroupingSets(RelOptRuleCall call, Aggregate aggregate, Set<Pair<List<Integer>, Integer>> argLists) {
    final Set<ImmutableBitSet> groupSetTreeSet = new TreeSet<>(ImmutableBitSet.ORDERING);
    groupSetTreeSet.add(aggregate.getGroupSet());
    for (Pair<List<Integer>, Integer> argList : argLists) {
        groupSetTreeSet.add(ImmutableBitSet.of(argList.left).setIf(argList.right, argList.right >= 0).union(aggregate.getGroupSet()));
    }
    final ImmutableList<ImmutableBitSet> groupSets = ImmutableList.copyOf(groupSetTreeSet);
    final ImmutableBitSet fullGroupSet = ImmutableBitSet.union(groupSets);
    final List<AggregateCall> distinctAggCalls = new ArrayList<>();
    for (Pair<AggregateCall, String> aggCall : aggregate.getNamedAggCalls()) {
        if (!aggCall.left.isDistinct()) {
            distinctAggCalls.add(aggCall.left.rename(aggCall.right));
        }
    }
    final RelBuilder relBuilder = call.builder();
    relBuilder.push(aggregate.getInput());
    relBuilder.aggregate(relBuilder.groupKey(fullGroupSet, groupSets.size() > 1, groupSets), distinctAggCalls);
    final RelNode distinct = relBuilder.peek();
    final int groupCount = fullGroupSet.cardinality();
    final int indicatorCount = groupSets.size() > 1 ? groupCount : 0;
    final RelOptCluster cluster = aggregate.getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    final RelDataTypeFactory typeFactory = cluster.getTypeFactory();
    final RelDataType booleanType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BOOLEAN), false);
    final List<Pair<RexNode, String>> predicates = new ArrayList<>();
    final Map<ImmutableBitSet, Integer> filters = new HashMap<>();
    /** Function to register a filter for a group set. */
    class Registrar {

        RexNode group = null;

        private int register(ImmutableBitSet groupSet) {
            if (group == null) {
                group = makeGroup(groupCount - 1);
            }
            final RexNode node = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, group, rexBuilder.makeExactLiteral(toNumber(remap(fullGroupSet, groupSet))));
            predicates.add(Pair.of(node, toString(groupSet)));
            return groupCount + indicatorCount + distinctAggCalls.size() + predicates.size() - 1;
        }

        private RexNode makeGroup(int i) {
            final RexInputRef ref = rexBuilder.makeInputRef(booleanType, groupCount + i);
            final RexNode kase = rexBuilder.makeCall(SqlStdOperatorTable.CASE, ref, rexBuilder.makeExactLiteral(BigDecimal.ZERO), rexBuilder.makeExactLiteral(TWO.pow(i)));
            if (i == 0) {
                return kase;
            } else {
                return rexBuilder.makeCall(SqlStdOperatorTable.PLUS, makeGroup(i - 1), kase);
            }
        }

        private BigDecimal toNumber(ImmutableBitSet bitSet) {
            BigDecimal n = BigDecimal.ZERO;
            for (int key : bitSet) {
                n = n.add(TWO.pow(key));
            }
            return n;
        }

        private String toString(ImmutableBitSet bitSet) {
            final StringBuilder buf = new StringBuilder("$i");
            for (int key : bitSet) {
                buf.append(key).append('_');
            }
            return buf.substring(0, buf.length() - 1);
        }
    }
    final Registrar registrar = new Registrar();
    for (ImmutableBitSet groupSet : groupSets) {
        filters.put(groupSet, registrar.register(groupSet));
    }
    if (!predicates.isEmpty()) {
        List<Pair<RexNode, String>> nodes = new ArrayList<>();
        for (RelDataTypeField f : relBuilder.peek().getRowType().getFieldList()) {
            final RexNode node = rexBuilder.makeInputRef(f.getType(), f.getIndex());
            nodes.add(Pair.of(node, f.getName()));
        }
        nodes.addAll(predicates);
        relBuilder.project(Pair.left(nodes), Pair.right(nodes));
    }
    int x = groupCount + indicatorCount;
    final List<AggregateCall> newCalls = new ArrayList<>();
    for (AggregateCall aggCall : aggregate.getAggCallList()) {
        final int newFilterArg;
        final List<Integer> newArgList;
        final SqlAggFunction aggregation;
        if (!aggCall.isDistinct()) {
            aggregation = SqlStdOperatorTable.MIN;
            newArgList = ImmutableIntList.of(x++);
            newFilterArg = filters.get(aggregate.getGroupSet());
        } else {
            aggregation = aggCall.getAggregation();
            newArgList = remap(fullGroupSet, aggCall.getArgList());
            newFilterArg = filters.get(ImmutableBitSet.of(aggCall.getArgList()).setIf(aggCall.filterArg, aggCall.filterArg >= 0).union(aggregate.getGroupSet()));
        }
        final AggregateCall newCall = AggregateCall.create(aggregation, false, newArgList, newFilterArg, aggregate.getGroupCount(), distinct, null, aggCall.name);
        newCalls.add(newCall);
    }
    relBuilder.aggregate(relBuilder.groupKey(remap(fullGroupSet, aggregate.getGroupSet()), aggregate.indicator, remap(fullGroupSet, aggregate.getGroupSets())), newCalls);
    relBuilder.convert(aggregate.getRowType(), true);
    call.transformTo(relBuilder.build());
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) TreeSet(java.util.TreeSet) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) RexBuilder(org.apache.calcite.rex.RexBuilder) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ImmutableIntList(org.apache.calcite.util.ImmutableIntList) List(java.util.List) Pair(org.apache.calcite.util.Pair) RelBuilder(org.apache.calcite.tools.RelBuilder) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) BigDecimal(java.math.BigDecimal) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Example 20 with Pair

use of org.apache.calcite.util.Pair in project flink by apache.

the class FlinkRelDecorrelator method aggregateCorrelatorOutput.

/**
	 * Pulls a {@link Project} above a {@link Correlate} from its RHS input.
	 * Enforces nullability for join output.
	 *
	 * @param correlate Correlate
	 * @param project   the original project as the RHS input of the join
	 * @param isCount   Positions which are calls to the <code>COUNT</code>
	 *                  aggregation function
	 * @return the subtree with the new LogicalProject at the root
	 */
private RelNode aggregateCorrelatorOutput(Correlate correlate, LogicalProject project, Set<Integer> isCount) {
    final RelNode left = correlate.getLeft();
    final JoinRelType joinType = correlate.getJoinType().toJoinType();
    // now create the new project
    final List<Pair<RexNode, String>> newProjects = Lists.newArrayList();
    // Project everything from the LHS and then those from the original
    // project
    final List<RelDataTypeField> leftInputFields = left.getRowType().getFieldList();
    for (int i = 0; i < leftInputFields.size(); i++) {
        newProjects.add(RexInputRef.of2(i, leftInputFields));
    }
    // Marked where the projected expr is coming from so that the types will
    // become nullable for the original projections which are now coming out
    // of the nullable side of the OJ.
    boolean projectPulledAboveLeftCorrelator = joinType.generatesNullsOnRight();
    for (Pair<RexNode, String> pair : project.getNamedProjects()) {
        RexNode newProjExpr = removeCorrelationExpr(pair.left, projectPulledAboveLeftCorrelator, isCount);
        newProjects.add(Pair.of(newProjExpr, pair.right));
    }
    return RelOptUtil.createProject(correlate, newProjects, false);
}
Also used : JoinRelType(org.apache.calcite.rel.core.JoinRelType) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) Pair(org.apache.calcite.util.Pair) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

Pair (org.apache.calcite.util.Pair)29 RexNode (org.apache.calcite.rex.RexNode)25 RelNode (org.apache.calcite.rel.RelNode)20 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)19 ArrayList (java.util.ArrayList)16 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)11 HashMap (java.util.HashMap)10 RexInputRef (org.apache.calcite.rex.RexInputRef)8 HiveRelNode (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode)8 TreeMap (java.util.TreeMap)7 AggregateCall (org.apache.calcite.rel.core.AggregateCall)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)6 Map (java.util.Map)6 NavigableMap (java.util.NavigableMap)6 SortedMap (java.util.SortedMap)6 RexBuilder (org.apache.calcite.rex.RexBuilder)6 JoinRelType (org.apache.calcite.rel.core.JoinRelType)5 ImmutableList (com.google.common.collect.ImmutableList)4 List (java.util.List)4