Search in sources :

Example 36 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class HiveJoinAddNotNullRule method onMatch.

// ~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
    Join join = call.rel(0);
    // eg  select * from left_tbl where (select 1 from all_null_right limit 1) is null
    if (join.getJoinType() == JoinRelType.FULL || (join.getJoinType() != JoinRelType.ANTI && join.getCondition().isAlwaysTrue())) {
        return;
    }
    JoinPredicateInfo joinPredInfo;
    try {
        joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join);
    } catch (CalciteSemanticException e) {
        return;
    }
    HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
    assert registry != null;
    Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
    Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
    boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin();
    boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin() || join.getJoinType() == JoinRelType.ANTI;
    RexNode newLeftPredicate = getNewPredicate(join, registry, joinPredInfo, leftPushedPredicates, genPredOnLeft, 0);
    RexNode newRightPredicate = getNewPredicate(join, registry, joinPredInfo, rightPushedPredicates, genPredOnRight, 1);
    if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) {
        return;
    }
    RelNode lChild = getNewChild(call, join.getLeft(), newLeftPredicate);
    RelNode rChild = getNewChild(call, join.getRight(), newRightPredicate);
    Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), lChild, rChild, join.getJoinType(), join.isSemiJoinDone());
    call.getPlanner().onCopy(join, newJoin);
    // Register information about created predicates
    registry.getPushedPredicates(newJoin, 0).addAll(leftPushedPredicates);
    registry.getPushedPredicates(newJoin, 1).addAll(rightPushedPredicates);
    call.transformTo(newJoin);
}
Also used : RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RexNode(org.apache.calcite.rex.RexNode)

Example 37 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class HiveAntiSemiJoinRule method onMatch.

// is null filter over a left join.
public void onMatch(final RelOptRuleCall call) {
    final Project project = call.rel(0);
    final Filter filter = call.rel(1);
    final Join join = call.rel(2);
    perform(call, project, filter, join);
}
Also used : Project(org.apache.calcite.rel.core.Project) Filter(org.apache.calcite.rel.core.Filter) Join(org.apache.calcite.rel.core.Join) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin)

Example 38 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class JoinVisitor method genJoin.

private JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
    // 1. Extract join type
    JoinCondDesc[] joinCondns;
    boolean semiJoin;
    boolean noOuterJoin;
    if (join instanceof HiveMultiJoin) {
        HiveMultiJoin hmj = (HiveMultiJoin) join;
        joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
        for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
            joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
        }
        semiJoin = false;
        noOuterJoin = !hmj.isOuterJoin();
    } else {
        joinCondns = new JoinCondDesc[1];
        JoinRelType joinRelType = JoinRelType.INNER;
        if (join instanceof Join) {
            joinRelType = ((Join) join).getJoinType();
        }
        JoinType joinType;
        switch(joinRelType) {
            case SEMI:
                joinType = JoinType.LEFTSEMI;
                semiJoin = true;
                break;
            case ANTI:
                joinType = JoinType.ANTI;
                semiJoin = true;
                break;
            default:
                assert join instanceof Join;
                joinType = transformJoinType(((Join) join).getJoinType());
                semiJoin = false;
        }
        joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
        noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
    }
    // 2. We create the join aux structures
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
    Operator<?>[] childOps = new Operator[children.size()];
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    int outputPos = 0;
    for (int pos = 0; pos < children.size(); pos++) {
        // 2.1. Backtracking from RS
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        if (inputRS.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = inputRS.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        int[] index = inputRS.getValueIndex();
        Byte tag = (byte) rsDesc.getTag();
        // 2.1.1. If semijoin...
        if (semiJoin && pos != 0) {
            exprMap.put(tag, new ArrayList<ExprNodeDesc>());
            childOps[pos] = inputRS;
            continue;
        }
        posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
        List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo info = new ColumnInfo(parentColumns.get(i));
            info.setInternalName(outputColumnNames.get(outputPos));
            info.setTabAlias(tabAlias);
            outputColumns.add(info);
            reversedExprs.put(outputColumnNames.get(outputPos), tag);
            outputPos++;
        }
        exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
        colExprMap.putAll(descriptors);
        childOps[pos] = inputRS;
    }
    // 3. We populate the filters and filterMap structure needed in the join descriptor
    List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
    int[][] filterMap = new int[children.size()][];
    for (int i = 0; i < children.size(); i++) {
        filtersPerInput.add(new ArrayList<ExprNodeDesc>());
    }
    // 3. We populate the filters structure
    for (int i = 0; i < filterExpressions.size(); i++) {
        int leftPos = joinCondns[i].getLeft();
        int rightPos = joinCondns[i].getRight();
        for (ExprNodeDesc expr : filterExpressions.get(i)) {
            // We need to update the exprNode, as currently
            // they refer to columns in the output of the join;
            // they should refer to the columns output by the RS
            int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
            if (inputPos == -1) {
                inputPos = leftPos;
            }
            filtersPerInput.get(inputPos).add(expr);
            if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
                if (inputPos == leftPos) {
                    updateFilterMap(filterMap, leftPos, rightPos);
                } else {
                    updateFilterMap(filterMap, rightPos, leftPos);
                }
            }
        }
    }
    for (int pos = 0; pos < children.size(); pos++) {
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        Byte tag = (byte) rsDesc.getTag();
        filters.put(tag, filtersPerInput.get(pos));
    }
    // 4. We create the join operator with its descriptor
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(filterMap);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    joinOp.getConf().setBaseSrc(baseSrc);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
    }
    return joinOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) HashSet(java.util.HashSet) Set(java.util.Set) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) JoinCond(org.apache.hadoop.hive.ql.parse.JoinCond) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) JoinRelType(org.apache.calcite.rel.core.JoinRelType) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Example 39 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class JoinVisitor method visit.

@Override
OpAttr visit(RelNode joinRel) throws SemanticException {
    // 0. Additional data structures needed for the join optimization
    // through Hive
    String[] baseSrc = new String[joinRel.getInputs().size()];
    String tabAlias = hiveOpConverter.getHiveDerivedTableAlias();
    // 1. Convert inputs
    OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
    List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
    for (int i = 0; i < inputs.length; i++) {
        inputs[i] = hiveOpConverter.dispatch(joinRel.getInput(i));
        children.add(inputs[i].inputs.get(0));
        baseSrc[i] = inputs[i].tabAlias;
    }
    // 2. Generate tags
    for (int tag = 0; tag < children.size(); tag++) {
        ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
        reduceSinkOp.getConf().setTag(tag);
    }
    // 3. Virtual columns
    Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
    newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
    if (joinRel instanceof HiveMultiJoin || !((joinRel instanceof Join) && ((((Join) joinRel).isSemiJoin()) || (((Join) joinRel).getJoinType() == JoinRelType.ANTI)))) {
        int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
        for (int i = 1; i < inputs.length; i++) {
            newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
            shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
    }
    // 4. Extract join key expressions from HiveSortExchange
    ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
    for (int i = 0; i < inputs.length; i++) {
        joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getKeyExpressions();
    }
    // 5. Extract rest of join predicate info. We infer the rest of join condition
    // that will be added to the filters (join conditions that are not part of
    // the join key)
    List<RexNode> joinFilters;
    if (joinRel instanceof HiveJoin) {
        joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
    } else if (joinRel instanceof HiveMultiJoin) {
        joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
    } else if (joinRel instanceof HiveSemiJoin) {
        joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
    } else if (joinRel instanceof HiveAntiJoin) {
        joinFilters = ImmutableList.of(((HiveAntiJoin) joinRel).getJoinFilter());
    } else {
        throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
    }
    List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
    for (int i = 0; i < joinFilters.size(); i++) {
        List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
        if (joinFilters.get(i) != null) {
            for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
                ExprNodeDesc expr = HiveOpConverterUtils.convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
                filterExpressionsForInput.add(expr);
            }
        }
        filterExpressions.add(filterExpressionsForInput);
    }
    // 6. Generate Join operator
    JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
    // 7. Return result
    return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ArrayList(java.util.ArrayList) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) RexNode(org.apache.calcite.rex.RexNode)

Example 40 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class SubstitutionVisitor method toMutable.

private static MutableRel toMutable(RelNode rel) {
    if (rel instanceof TableScan) {
        return MutableScan.of((TableScan) rel);
    }
    if (rel instanceof Values) {
        return MutableValues.of((Values) rel);
    }
    if (rel instanceof Project) {
        final Project project = (Project) rel;
        final MutableRel input = toMutable(project.getInput());
        return MutableProject.of(input, project.getProjects(), project.getRowType().getFieldNames());
    }
    if (rel instanceof Filter) {
        final Filter filter = (Filter) rel;
        final MutableRel input = toMutable(filter.getInput());
        return MutableFilter.of(input, filter.getCondition());
    }
    if (rel instanceof Aggregate) {
        final Aggregate aggregate = (Aggregate) rel;
        final MutableRel input = toMutable(aggregate.getInput());
        return MutableAggregate.of(input, aggregate.indicator, aggregate.getGroupSet(), aggregate.getGroupSets(), aggregate.getAggCallList());
    }
    if (rel instanceof Join) {
        final Join join = (Join) rel;
        final MutableRel left = toMutable(join.getLeft());
        final MutableRel right = toMutable(join.getRight());
        return MutableJoin.of(join.getCluster(), left, right, join.getCondition(), join.getJoinType(), join.getVariablesSet());
    }
    if (rel instanceof Sort) {
        final Sort sort = (Sort) rel;
        final MutableRel input = toMutable(sort.getInput());
        return MutableSort.of(input, sort.getCollation(), sort.offset, sort.fetch);
    }
    throw new RuntimeException("cannot translate " + rel + " to MutableRel");
}
Also used : TableScan(org.apache.calcite.rel.core.TableScan) Project(org.apache.calcite.rel.core.Project) Filter(org.apache.calcite.rel.core.Filter) Values(org.apache.calcite.rel.core.Values) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) Join(org.apache.calcite.rel.core.Join) LogicalSort(org.apache.calcite.rel.logical.LogicalSort) Sort(org.apache.calcite.rel.core.Sort) Aggregate(org.apache.calcite.rel.core.Aggregate) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate)

Aggregations

Join (org.apache.calcite.rel.core.Join)73 RelNode (org.apache.calcite.rel.RelNode)45 RexNode (org.apache.calcite.rex.RexNode)40 ArrayList (java.util.ArrayList)31 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)25 Project (org.apache.calcite.rel.core.Project)22 RexBuilder (org.apache.calcite.rex.RexBuilder)20 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)18 RelBuilder (org.apache.calcite.tools.RelBuilder)17 HiveJoin (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin)14 Aggregate (org.apache.calcite.rel.core.Aggregate)13 Test (org.junit.Test)13 Filter (org.apache.calcite.rel.core.Filter)12 RelNode (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode)11 SemiJoin (org.apache.calcite.rel.core.SemiJoin)11 RelOptCluster (org.apache.calcite.plan.RelOptCluster)10 JoinRelType (org.apache.calcite.rel.core.JoinRelType)9 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)9 Mappings (org.apache.calcite.util.mapping.Mappings)9 List (java.util.List)8