Search in sources :

Example 1 with JoinType

use of org.apache.hadoop.hive.ql.parse.JoinType in project hive by apache.

the class HiveOpConverter method genJoin.

private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
    // 1. Extract join type
    JoinCondDesc[] joinCondns;
    boolean semiJoin;
    boolean noOuterJoin;
    if (join instanceof HiveMultiJoin) {
        HiveMultiJoin hmj = (HiveMultiJoin) join;
        joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
        for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
            joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
        }
        semiJoin = false;
        noOuterJoin = !hmj.isOuterJoin();
    } else {
        joinCondns = new JoinCondDesc[1];
        semiJoin = join instanceof SemiJoin;
        JoinType joinType;
        if (semiJoin) {
            joinType = JoinType.LEFTSEMI;
        } else {
            joinType = extractJoinType((Join) join);
        }
        joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
        noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
    }
    // 2. We create the join aux structures
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
    Operator<?>[] childOps = new Operator[children.size()];
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    int outputPos = 0;
    for (int pos = 0; pos < children.size(); pos++) {
        // 2.1. Backtracking from RS
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        if (inputRS.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = inputRS.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        int[] index = inputRS.getValueIndex();
        Byte tag = (byte) rsDesc.getTag();
        // 2.1.1. If semijoin...
        if (semiJoin && pos != 0) {
            exprMap.put(tag, new ArrayList<ExprNodeDesc>());
            childOps[pos] = inputRS;
            continue;
        }
        posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
        List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo info = new ColumnInfo(parentColumns.get(i));
            info.setInternalName(outputColumnNames.get(outputPos));
            info.setTabAlias(tabAlias);
            outputColumns.add(info);
            reversedExprs.put(outputColumnNames.get(outputPos), tag);
            outputPos++;
        }
        exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
        colExprMap.putAll(descriptors);
        childOps[pos] = inputRS;
    }
    // 3. We populate the filters and filterMap structure needed in the join descriptor
    List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
    int[][] filterMap = new int[children.size()][];
    for (int i = 0; i < children.size(); i++) {
        filtersPerInput.add(new ArrayList<ExprNodeDesc>());
    }
    // 3. We populate the filters structure
    for (int i = 0; i < filterExpressions.size(); i++) {
        int leftPos = joinCondns[i].getLeft();
        int rightPos = joinCondns[i].getRight();
        for (ExprNodeDesc expr : filterExpressions.get(i)) {
            // We need to update the exprNode, as currently
            // they refer to columns in the output of the join;
            // they should refer to the columns output by the RS
            int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
            if (inputPos == -1) {
                inputPos = leftPos;
            }
            filtersPerInput.get(inputPos).add(expr);
            if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
                if (inputPos == leftPos) {
                    updateFilterMap(filterMap, leftPos, rightPos);
                } else {
                    updateFilterMap(filterMap, rightPos, leftPos);
                }
            }
        }
    }
    for (int pos = 0; pos < children.size(); pos++) {
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        Byte tag = (byte) rsDesc.getTag();
        filters.put(tag, filtersPerInput.get(pos));
    }
    // 4. We create the join operator with its descriptor
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(filterMap);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    joinOp.getConf().setBaseSrc(baseSrc);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
    }
    return joinOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) JoinCond(org.apache.hadoop.hive.ql.parse.JoinCond) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Set (java.util.Set)1 Join (org.apache.calcite.rel.core.Join)1 SemiJoin (org.apache.calcite.rel.core.SemiJoin)1 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)1 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)1 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)1 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)1 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)1