Search in sources :

Example 16 with RelOptCluster

use of org.apache.calcite.plan.RelOptCluster in project hive by apache.

the class HiveExceptRewriteRule method onMatch.

// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
    final HiveExcept hiveExcept = call.rel(0);
    final RelOptCluster cluster = hiveExcept.getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
    // branch
    try {
        bldr.add(createFirstGB(hiveExcept.getInputs().get(0), true, cluster, rexBuilder));
        bldr.add(createFirstGB(hiveExcept.getInputs().get(1), false, cluster, rexBuilder));
    } catch (CalciteSemanticException e) {
        LOG.debug(e.toString());
        throw new RuntimeException(e);
    }
    // create a union above all the branches
    // the schema of union looks like this
    // all keys + VCol + c
    HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
    // 2nd level GB: create a GB (all keys + sum(c) as a + sum(VCol*c) as b) for
    // each branch
    final List<RexNode> gbChildProjLst = Lists.newArrayList();
    final List<Integer> groupSetPositions = Lists.newArrayList();
    int unionColumnSize = union.getRowType().getFieldList().size();
    for (int cInd = 0; cInd < unionColumnSize; cInd++) {
        gbChildProjLst.add(rexBuilder.makeInputRef(union, cInd));
        // the last 2 columns are VCol and c
        if (cInd < unionColumnSize - 2) {
            groupSetPositions.add(cInd);
        }
    }
    try {
        gbChildProjLst.add(multiply(rexBuilder.makeInputRef(union, unionColumnSize - 2), rexBuilder.makeInputRef(union, unionColumnSize - 1), cluster, rexBuilder));
    } catch (CalciteSemanticException e) {
        LOG.debug(e.toString());
        throw new RuntimeException(e);
    }
    RelNode gbInputRel = null;
    try {
        // Here we create a project for the following reasons:
        // (1) GBy only accepts arg as a position of the input, however, we need to sum on VCol*c
        // (2) This can better reuse the function createSingleArgAggCall.
        gbInputRel = HiveProject.create(union, gbChildProjLst, null);
    } catch (CalciteSemanticException e) {
        LOG.debug(e.toString());
        throw new RuntimeException(e);
    }
    // gbInputRel's schema is like this
    // all keys + VCol + c + VCol*c
    List<AggregateCall> aggregateCalls = Lists.newArrayList();
    RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
    // sum(c)
    AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("sum", cluster, TypeInfoFactory.longTypeInfo, unionColumnSize - 1, aggFnRetType);
    aggregateCalls.add(aggregateCall);
    // sum(VCol*c)
    aggregateCall = HiveCalciteUtil.createSingleArgAggCall("sum", cluster, TypeInfoFactory.longTypeInfo, unionColumnSize, aggFnRetType);
    aggregateCalls.add(aggregateCall);
    final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
    HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, false, groupSet, null, aggregateCalls);
    if (!hiveExcept.all) {
        RelNode filterRel = null;
        try {
            filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, makeFilterExprForExceptDistinct(aggregateRel, unionColumnSize, cluster, rexBuilder));
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
        // finally add a project to project out the last 2 columns
        Set<Integer> projectOutColumnPositions = new HashSet<>();
        projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 2);
        projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
        try {
            call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
    } else {
        List<RexNode> originalInputRefs = Lists.transform(aggregateRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {

            @Override
            public RexNode apply(RelDataTypeField input) {
                return new RexInputRef(input.getIndex(), input.getType());
            }
        });
        List<RexNode> copyInputRefs = new ArrayList<>();
        try {
            copyInputRefs.add(makeExprForExceptAll(aggregateRel, unionColumnSize, cluster, rexBuilder));
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
        for (int i = 0; i < originalInputRefs.size() - 2; i++) {
            copyInputRefs.add(originalInputRefs.get(i));
        }
        RelNode srcRel = null;
        try {
            srcRel = HiveProject.create(aggregateRel, copyInputRefs, null);
            HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
            // finally add a project to project out the 1st columns
            Set<Integer> projectOutColumnPositions = new HashSet<>();
            projectOutColumnPositions.add(0);
            call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
        } catch (SemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
    }
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) RelBuilder(org.apache.calcite.tools.RelBuilder) RexBuilder(org.apache.calcite.rex.RexBuilder) Builder(com.google.common.collect.ImmutableList.Builder) HiveTableFunctionScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) RexBuilder(org.apache.calcite.rex.RexBuilder) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HiveExcept(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept) HiveUnion(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion) HiveFilter(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter) AggregateCall(org.apache.calcite.rel.core.AggregateCall) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Example 17 with RelOptCluster

use of org.apache.calcite.plan.RelOptCluster in project hive by apache.

the class HiveRelOptUtil method splitJoinCondition.

private static void splitJoinCondition(List<RelDataTypeField> sysFieldList, List<RelNode> inputs, RexNode condition, List<List<RexNode>> joinKeys, List<Integer> filterNulls, List<SqlOperator> rangeOp, List<RexNode> nonEquiList) throws CalciteSemanticException {
    final int sysFieldCount = sysFieldList.size();
    final RelOptCluster cluster = inputs.get(0).getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    if (condition instanceof RexCall) {
        RexCall call = (RexCall) condition;
        if (call.getOperator() == SqlStdOperatorTable.AND) {
            for (RexNode operand : call.getOperands()) {
                splitJoinCondition(sysFieldList, inputs, operand, joinKeys, filterNulls, rangeOp, nonEquiList);
            }
            return;
        }
        RexNode leftKey = null;
        RexNode rightKey = null;
        int leftInput = 0;
        int rightInput = 0;
        List<RelDataTypeField> leftFields = null;
        List<RelDataTypeField> rightFields = null;
        boolean reverse = false;
        SqlKind kind = call.getKind();
        // Only consider range operators if we haven't already seen one
        if ((kind == SqlKind.EQUALS) || (filterNulls != null && kind == SqlKind.IS_NOT_DISTINCT_FROM) || (rangeOp != null && rangeOp.isEmpty() && (kind == SqlKind.GREATER_THAN || kind == SqlKind.GREATER_THAN_OR_EQUAL || kind == SqlKind.LESS_THAN || kind == SqlKind.LESS_THAN_OR_EQUAL))) {
            final List<RexNode> operands = call.getOperands();
            RexNode op0 = operands.get(0);
            RexNode op1 = operands.get(1);
            final ImmutableBitSet projRefs0 = InputFinder.bits(op0);
            final ImmutableBitSet projRefs1 = InputFinder.bits(op1);
            final ImmutableBitSet[] inputsRange = new ImmutableBitSet[inputs.size()];
            int totalFieldCount = 0;
            for (int i = 0; i < inputs.size(); i++) {
                final int firstField = totalFieldCount + sysFieldCount;
                totalFieldCount = firstField + inputs.get(i).getRowType().getFieldCount();
                inputsRange[i] = ImmutableBitSet.range(firstField, totalFieldCount);
            }
            boolean foundBothInputs = false;
            for (int i = 0; i < inputs.size() && !foundBothInputs; i++) {
                if (projRefs0.intersects(inputsRange[i]) && projRefs0.union(inputsRange[i]).equals(inputsRange[i])) {
                    if (leftKey == null) {
                        leftKey = op0;
                        leftInput = i;
                        leftFields = inputs.get(leftInput).getRowType().getFieldList();
                    } else {
                        rightKey = op0;
                        rightInput = i;
                        rightFields = inputs.get(rightInput).getRowType().getFieldList();
                        reverse = true;
                        foundBothInputs = true;
                    }
                } else if (projRefs1.intersects(inputsRange[i]) && projRefs1.union(inputsRange[i]).equals(inputsRange[i])) {
                    if (leftKey == null) {
                        leftKey = op1;
                        leftInput = i;
                        leftFields = inputs.get(leftInput).getRowType().getFieldList();
                    } else {
                        rightKey = op1;
                        rightInput = i;
                        rightFields = inputs.get(rightInput).getRowType().getFieldList();
                        foundBothInputs = true;
                    }
                }
            }
            if ((leftKey != null) && (rightKey != null)) {
                // adjustment array
                int[] adjustments = new int[totalFieldCount];
                for (int i = 0; i < inputs.size(); i++) {
                    final int adjustment = inputsRange[i].nextSetBit(0);
                    for (int j = adjustment; j < inputsRange[i].length(); j++) {
                        adjustments[j] = -adjustment;
                    }
                }
                // replace right Key input ref
                rightKey = rightKey.accept(new RelOptUtil.RexInputConverter(rexBuilder, rightFields, rightFields, adjustments));
                // left key only needs to be adjusted if there are system
                // fields, but do it for uniformity
                leftKey = leftKey.accept(new RelOptUtil.RexInputConverter(rexBuilder, leftFields, leftFields, adjustments));
                RelDataType leftKeyType = leftKey.getType();
                RelDataType rightKeyType = rightKey.getType();
                if (leftKeyType != rightKeyType) {
                    // perform casting using Hive rules
                    TypeInfo rType = TypeConverter.convert(rightKeyType);
                    TypeInfo lType = TypeConverter.convert(leftKeyType);
                    TypeInfo tgtType = FunctionRegistry.getCommonClassForComparison(lType, rType);
                    if (tgtType == null) {
                        throw new CalciteSemanticException("Cannot find common type for join keys " + leftKey + " (type " + leftKeyType + ") and " + rightKey + " (type " + rightKeyType + ")");
                    }
                    RelDataType targetKeyType = TypeConverter.convert(tgtType, rexBuilder.getTypeFactory());
                    if (leftKeyType != targetKeyType && TypeInfoUtils.isConversionRequiredForComparison(tgtType, lType)) {
                        leftKey = rexBuilder.makeCast(targetKeyType, leftKey);
                    }
                    if (rightKeyType != targetKeyType && TypeInfoUtils.isConversionRequiredForComparison(tgtType, rType)) {
                        rightKey = rexBuilder.makeCast(targetKeyType, rightKey);
                    }
                }
            }
        }
        if ((leftKey != null) && (rightKey != null)) {
            // found suitable join keys
            // add them to key list, ensuring that if there is a
            // non-equi join predicate, it appears at the end of the
            // key list; also mark the null filtering property
            addJoinKey(joinKeys.get(leftInput), leftKey, (rangeOp != null) && !rangeOp.isEmpty());
            addJoinKey(joinKeys.get(rightInput), rightKey, (rangeOp != null) && !rangeOp.isEmpty());
            if (filterNulls != null && kind == SqlKind.EQUALS) {
                // nulls are considered not matching for equality comparison
                // add the position of the most recently inserted key
                filterNulls.add(joinKeys.get(leftInput).size() - 1);
            }
            if (rangeOp != null && kind != SqlKind.EQUALS && kind != SqlKind.IS_DISTINCT_FROM) {
                if (reverse) {
                    kind = reverse(kind);
                }
                rangeOp.add(op(kind, call.getOperator()));
            }
            return;
        }
    // else fall through and add this condition as nonEqui condition
    }
    // The operator is not of RexCall type
    // So we fail. Fall through.
    // Add this condition to the list of non-equi-join conditions.
    nonEquiList.add(condition);
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlKind(org.apache.calcite.sql.SqlKind) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) RexCall(org.apache.calcite.rex.RexCall) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

RelOptCluster (org.apache.calcite.plan.RelOptCluster)17 RelNode (org.apache.calcite.rel.RelNode)13 RexBuilder (org.apache.calcite.rex.RexBuilder)9 ArrayList (java.util.ArrayList)7 RexNode (org.apache.calcite.rex.RexNode)7 RelDataType (org.apache.calcite.rel.type.RelDataType)6 HashSet (java.util.HashSet)5 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)5 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)5 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)5 ImmutableList (com.google.common.collect.ImmutableList)4 HashMap (java.util.HashMap)3 List (java.util.List)3 RelTraitSet (org.apache.calcite.plan.RelTraitSet)3 AggregateCall (org.apache.calcite.rel.core.AggregateCall)3 RexInputRef (org.apache.calcite.rex.RexInputRef)3 RelBuilder (org.apache.calcite.tools.RelBuilder)3 HiveRelNode (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode)3 Builder (com.google.common.collect.ImmutableList.Builder)2 BigDecimal (java.math.BigDecimal)2