Search in sources :

Example 16 with RexBuilder

use of org.apache.calcite.rex.RexBuilder in project hive by apache.

the class HiveIntersectRewriteRule method onMatch.

// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
    final HiveIntersect hiveIntersect = call.rel(0);
    final RelOptCluster cluster = hiveIntersect.getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    int numOfBranch = hiveIntersect.getInputs().size();
    Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
    // 1st level GB: create a GB (col0, col1, count(1) as c) for each branch
    for (int index = 0; index < numOfBranch; index++) {
        RelNode input = hiveIntersect.getInputs().get(index);
        final List<RexNode> gbChildProjLst = Lists.newArrayList();
        final List<Integer> groupSetPositions = Lists.newArrayList();
        for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) {
            gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd));
            groupSetPositions.add(cInd);
        }
        gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1)));
        // create the project before GB because we need a new project with extra column '1'.
        RelNode gbInputRel = null;
        try {
            gbInputRel = HiveProject.create(input, gbChildProjLst, null);
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
        // groupSetPosition includes all the positions
        final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
        List<AggregateCall> aggregateCalls = Lists.newArrayList();
        RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
        // count(1), 1's position is input.getRowType().getFieldList().size()
        AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType);
        aggregateCalls.add(aggregateCall);
        HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, false, groupSet, null, aggregateCalls);
        bldr.add(aggregateRel);
    }
    // create a union above all the branches
    HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
    // 2nd level GB: create a GB (col0, col1, count(c)) for each branch
    final List<Integer> groupSetPositions = Lists.newArrayList();
    // the index of c
    int cInd = union.getRowType().getFieldList().size() - 1;
    for (int index = 0; index < union.getRowType().getFieldList().size(); index++) {
        if (index != cInd) {
            groupSetPositions.add(index);
        }
    }
    List<AggregateCall> aggregateCalls = Lists.newArrayList();
    RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
    AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
    aggregateCalls.add(aggregateCall);
    if (hiveIntersect.all) {
        aggregateCall = HiveCalciteUtil.createSingleArgAggCall("min", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
        aggregateCalls.add(aggregateCall);
    }
    final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
    HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), union, false, groupSet, null, aggregateCalls);
    // add a filter count(c) = #branches
    int countInd = cInd;
    List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
    RexInputRef ref = rexBuilder.makeInputRef(aggregateRel, countInd);
    RexLiteral literal = rexBuilder.makeBigintLiteral(new BigDecimal(numOfBranch));
    childRexNodeLst.add(ref);
    childRexNodeLst.add(literal);
    ImmutableList.Builder<RelDataType> calciteArgTypesBldr = new ImmutableList.Builder<RelDataType>();
    calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
    calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
    RexNode factoredFilterExpr = null;
    try {
        factoredFilterExpr = rexBuilder.makeCall(SqlFunctionConverter.getCalciteFn("=", calciteArgTypesBldr.build(), TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), true), childRexNodeLst);
    } catch (CalciteSemanticException e) {
        LOG.debug(e.toString());
        throw new RuntimeException(e);
    }
    RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, factoredFilterExpr);
    if (!hiveIntersect.all) {
        // the schema for intersect distinct is like this
        // R3 on all attributes + count(c) as cnt
        // finally add a project to project out the last column
        Set<Integer> projectOutColumnPositions = new HashSet<>();
        projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
        try {
            call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
    } else {
        // the schema for intersect all is like this
        // R3 + count(c) as cnt + min(c) as m
        // we create a input project for udtf whose schema is like this
        // min(c) as m + R3
        List<RexNode> originalInputRefs = Lists.transform(filterRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {

            @Override
            public RexNode apply(RelDataTypeField input) {
                return new RexInputRef(input.getIndex(), input.getType());
            }
        });
        List<RexNode> copyInputRefs = new ArrayList<>();
        copyInputRefs.add(originalInputRefs.get(originalInputRefs.size() - 1));
        for (int i = 0; i < originalInputRefs.size() - 2; i++) {
            copyInputRefs.add(originalInputRefs.get(i));
        }
        RelNode srcRel = null;
        try {
            srcRel = HiveProject.create(filterRel, copyInputRefs, null);
            HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
            // finally add a project to project out the 1st column
            Set<Integer> projectOutColumnPositions = new HashSet<>();
            projectOutColumnPositions.add(0);
            call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
        } catch (SemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
    }
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) RexLiteral(org.apache.calcite.rex.RexLiteral) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableList(com.google.common.collect.ImmutableList) RelBuilder(org.apache.calcite.tools.RelBuilder) RexBuilder(org.apache.calcite.rex.RexBuilder) Builder(com.google.common.collect.ImmutableList.Builder) HiveTableFunctionScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) RexBuilder(org.apache.calcite.rex.RexBuilder) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HiveIntersect(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect) HiveUnion(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion) BigDecimal(java.math.BigDecimal) HiveFilter(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter) AggregateCall(org.apache.calcite.rel.core.AggregateCall) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Example 17 with RexBuilder

use of org.apache.calcite.rex.RexBuilder in project hive by apache.

the class HiveJoinAddNotNullRule method onMatch.

//~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
    final Join join = call.rel(0);
    RelNode lChild = join.getLeft();
    RelNode rChild = join.getRight();
    HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
    assert registry != null;
    if (join.getJoinType() != JoinRelType.INNER) {
        return;
    }
    if (join.getCondition().isAlwaysTrue()) {
        return;
    }
    JoinPredicateInfo joinPredInfo;
    try {
        joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join);
    } catch (CalciteSemanticException e) {
        return;
    }
    Set<Integer> joinLeftKeyPositions = new HashSet<Integer>();
    Set<Integer> joinRightKeyPositions = new HashSet<Integer>();
    for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
        JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(i);
        joinLeftKeyPositions.addAll(joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema());
        joinRightKeyPositions.addAll(joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema());
    }
    // Build not null conditions
    final RelOptCluster cluster = join.getCluster();
    final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
    Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
    final List<RexNode> newLeftConditions = getNotNullConditions(cluster, rexBuilder, join.getLeft(), joinLeftKeyPositions, leftPushedPredicates);
    Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
    final List<RexNode> newRightConditions = getNotNullConditions(cluster, rexBuilder, join.getRight(), joinRightKeyPositions, rightPushedPredicates);
    // Nothing will be added to the expression
    RexNode newLeftPredicate = RexUtil.composeConjunction(rexBuilder, newLeftConditions, false);
    RexNode newRightPredicate = RexUtil.composeConjunction(rexBuilder, newRightConditions, false);
    if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) {
        return;
    }
    if (!newLeftPredicate.isAlwaysTrue()) {
        RelNode curr = lChild;
        lChild = filterFactory.createFilter(lChild, newLeftPredicate);
        call.getPlanner().onCopy(curr, lChild);
    }
    if (!newRightPredicate.isAlwaysTrue()) {
        RelNode curr = rChild;
        rChild = filterFactory.createFilter(rChild, newRightPredicate);
        call.getPlanner().onCopy(curr, rChild);
    }
    Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), lChild, rChild, join.getJoinType(), join.isSemiJoinDone());
    call.getPlanner().onCopy(join, newJoin);
    // Register information about created predicates
    registry.getPushedPredicates(newJoin, 0).addAll(leftPushedPredicates);
    registry.getPushedPredicates(newJoin, 1).addAll(rightPushedPredicates);
    call.transformTo(newJoin);
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) JoinLeafPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo) RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) RexBuilder(org.apache.calcite.rex.RexBuilder) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet) RexNode(org.apache.calcite.rex.RexNode)

Example 18 with RexBuilder

use of org.apache.calcite.rex.RexBuilder in project hive by apache.

the class HiveSemiJoinRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    LOG.debug("Matched HiveSemiJoinRule");
    final Project project = call.rel(0);
    final Join join = call.rel(1);
    final RelNode left = call.rel(2);
    final Aggregate aggregate = call.rel(3);
    final RelOptCluster cluster = join.getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    final ImmutableBitSet bits = RelOptUtil.InputFinder.bits(project.getProjects(), null);
    final ImmutableBitSet rightBits = ImmutableBitSet.range(left.getRowType().getFieldCount(), join.getRowType().getFieldCount());
    if (bits.intersects(rightBits)) {
        return;
    }
    final JoinInfo joinInfo = join.analyzeCondition();
    if (!joinInfo.rightSet().equals(ImmutableBitSet.range(aggregate.getGroupCount()))) {
        // By the way, neither a super-set nor a sub-set would work.
        return;
    }
    if (join.getJoinType() == JoinRelType.LEFT) {
        // since for LEFT join we are only interested in rows from LEFT we can get rid of right side
        call.transformTo(call.builder().push(left).project(project.getProjects(), project.getRowType().getFieldNames()).build());
        return;
    }
    if (join.getJoinType() != JoinRelType.INNER) {
        return;
    }
    if (!joinInfo.isEqui()) {
        return;
    }
    LOG.debug("All conditions matched for HiveSemiJoinRule. Going to apply transformation.");
    final List<Integer> newRightKeyBuilder = Lists.newArrayList();
    final List<Integer> aggregateKeys = aggregate.getGroupSet().asList();
    for (int key : joinInfo.rightKeys) {
        newRightKeyBuilder.add(aggregateKeys.get(key));
    }
    final ImmutableIntList newRightKeys = ImmutableIntList.copyOf(newRightKeyBuilder);
    final RelNode newRight = aggregate.getInput();
    final RexNode newCondition = RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight, newRightKeys, rexBuilder);
    RelNode semi = null;
    // is not expected further down the pipeline. see jira for more details
    if (aggregate.getInput() instanceof HepRelVertex && ((HepRelVertex) aggregate.getInput()).getCurrentRel() instanceof Join) {
        Join rightJoin = (Join) (((HepRelVertex) aggregate.getInput()).getCurrentRel());
        List<RexNode> projects = new ArrayList<>();
        for (int i = 0; i < rightJoin.getRowType().getFieldCount(); i++) {
            projects.add(rexBuilder.makeInputRef(rightJoin, i));
        }
        RelNode topProject = call.builder().push(rightJoin).project(projects, rightJoin.getRowType().getFieldNames(), true).build();
        semi = call.builder().push(left).push(topProject).semiJoin(newCondition).build();
    } else {
        semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build();
    }
    call.transformTo(call.builder().push(semi).project(project.getProjects(), project.getRowType().getFieldNames()).build());
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) Join(org.apache.calcite.rel.core.Join) JoinInfo(org.apache.calcite.rel.core.JoinInfo) Project(org.apache.calcite.rel.core.Project) HepRelVertex(org.apache.calcite.plan.hep.HepRelVertex) RelNode(org.apache.calcite.rel.RelNode) RexBuilder(org.apache.calcite.rex.RexBuilder) ImmutableIntList(org.apache.calcite.util.ImmutableIntList) Aggregate(org.apache.calcite.rel.core.Aggregate) RexNode(org.apache.calcite.rex.RexNode)

Example 19 with RexBuilder

use of org.apache.calcite.rex.RexBuilder in project hive by apache.

the class HivePointLookupOptimizerRule method onMatch.

public void onMatch(RelOptRuleCall call) {
    final Filter filter = call.rel(0);
    final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
    final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition());
    // 1. We try to transform possible candidates
    RexTransformIntoInClause transformIntoInClause = new RexTransformIntoInClause(rexBuilder, filter, minNumORClauses);
    RexNode newCondition = transformIntoInClause.apply(condition);
    // 2. We merge IN expressions
    RexMergeInClause mergeInClause = new RexMergeInClause(rexBuilder);
    newCondition = mergeInClause.apply(newCondition);
    // 3. If we could not transform anything, we bail out
    if (newCondition.toString().equals(condition.toString())) {
        return;
    }
    // 4. We create the filter with the new condition
    RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newCondition);
    call.transformTo(newFilter);
}
Also used : RelNode(org.apache.calcite.rel.RelNode) Filter(org.apache.calcite.rel.core.Filter) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Example 20 with RexBuilder

use of org.apache.calcite.rex.RexBuilder in project hive by apache.

the class HivePreFilteringRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final Filter filter = call.rel(0);
    // 0. Register that we have visited this operator in this rule
    HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
    if (registry != null) {
        registry.registerVisited(this, filter);
    }
    final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
    // 1. Recompose filter possibly by pulling out common elements from DNF
    // expressions
    RexNode topFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition());
    // 2. We extract possible candidates to be pushed down
    List<RexNode> operandsToPushDown = new ArrayList<>();
    List<RexNode> deterministicExprs = new ArrayList<>();
    List<RexNode> nonDeterministicExprs = new ArrayList<>();
    switch(topFilterCondition.getKind()) {
        case AND:
            ImmutableList<RexNode> operands = RexUtil.flattenAnd(((RexCall) topFilterCondition).getOperands());
            Set<String> operandsToPushDownDigest = new HashSet<String>();
            List<RexNode> extractedCommonOperands = null;
            for (RexNode operand : operands) {
                if (operand.getKind() == SqlKind.OR) {
                    extractedCommonOperands = extractCommonOperands(rexBuilder, operand, maxCNFNodeCount);
                    for (RexNode extractedExpr : extractedCommonOperands) {
                        if (operandsToPushDownDigest.add(extractedExpr.toString())) {
                            operandsToPushDown.add(extractedExpr);
                        }
                    }
                }
                // elements of DNF/CNF & extract more deterministic pieces out.
                if (HiveCalciteUtil.isDeterministic(operand)) {
                    deterministicExprs.add(operand);
                } else {
                    nonDeterministicExprs.add(operand);
                }
            }
            // NOTE: Hive by convention doesn't pushdown non deterministic expressions
            if (nonDeterministicExprs.size() > 0) {
                for (RexNode expr : deterministicExprs) {
                    if (!operandsToPushDownDigest.contains(expr.toString())) {
                        operandsToPushDown.add(expr);
                        operandsToPushDownDigest.add(expr.toString());
                    }
                }
                topFilterCondition = RexUtil.pullFactors(rexBuilder, RexUtil.composeConjunction(rexBuilder, nonDeterministicExprs, false));
            }
            break;
        case OR:
            operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition, maxCNFNodeCount);
            break;
        default:
            return;
    }
    // 2. If we did not generate anything for the new predicate, we bail out
    if (operandsToPushDown.isEmpty()) {
        return;
    }
    // 3. If the new conjuncts are already present in the plan, we bail out
    final List<RexNode> newConjuncts = HiveCalciteUtil.getPredsNotPushedAlready(filter.getInput(), operandsToPushDown);
    RexNode newPredicate = RexUtil.composeConjunction(rexBuilder, newConjuncts, false);
    if (newPredicate.isAlwaysTrue()) {
        return;
    }
    // 4. Otherwise, we create a new condition
    final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, newPredicate);
    // 5. We create the new filter that might be pushed down
    RelNode newChildFilter = filterFactory.createFilter(filter.getInput(), newChildFilterCondition);
    RelNode newTopFilter = filterFactory.createFilter(newChildFilter, topFilterCondition);
    // 6. We register both so we do not fire the rule on them again
    if (registry != null) {
        registry.registerVisited(this, newChildFilter);
        registry.registerVisited(this, newTopFilter);
    }
    call.transformTo(newTopFilter);
}
Also used : RelNode(org.apache.calcite.rel.RelNode) Filter(org.apache.calcite.rel.core.Filter) ArrayList(java.util.ArrayList) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode) HashSet(java.util.HashSet)

Aggregations

RexBuilder (org.apache.calcite.rex.RexBuilder)60 RexNode (org.apache.calcite.rex.RexNode)52 ArrayList (java.util.ArrayList)32 RelDataType (org.apache.calcite.rel.type.RelDataType)26 RelNode (org.apache.calcite.rel.RelNode)24 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)20 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)13 AggregateCall (org.apache.calcite.rel.core.AggregateCall)11 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)11 RexInputRef (org.apache.calcite.rex.RexInputRef)10 RelOptCluster (org.apache.calcite.plan.RelOptCluster)9 HashMap (java.util.HashMap)8 RelBuilder (org.apache.calcite.tools.RelBuilder)8 ImmutableList (com.google.common.collect.ImmutableList)6 BigDecimal (java.math.BigDecimal)6 RelOptPredicateList (org.apache.calcite.plan.RelOptPredicateList)6 RexLiteral (org.apache.calcite.rex.RexLiteral)6 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)6 Pair (org.apache.calcite.util.Pair)6 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)6