Search in sources :

Example 31 with JoinOperator

use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.

the class SemanticAnalyzer method genJoinOperator.

private Operator genJoinOperator(QB qb, QBJoinTree joinTree, Map<String, Operator> map, Operator joiningOp) throws SemanticException {
    QBJoinTree leftChild = joinTree.getJoinSrc();
    Operator joinSrcOp = joiningOp instanceof JoinOperator ? joiningOp : null;
    if (joinSrcOp == null && leftChild != null) {
        joinSrcOp = genJoinOperator(qb, leftChild, map, null);
    }
    if (joinSrcOp != null) {
        ArrayList<ASTNode> filter = joinTree.getFiltersForPushing().get(0);
        for (ASTNode cond : filter) {
            joinSrcOp = genFilterPlan(qb, cond, joinSrcOp, false);
        }
    }
    String[] baseSrc = joinTree.getBaseSrc();
    Operator[] srcOps = new Operator[baseSrc.length];
    // set of input to the join that should be
    HashSet<Integer> omitOpts = null;
    // omitted by the output
    int pos = 0;
    for (String src : baseSrc) {
        if (src != null) {
            Operator srcOp = map.get(src.toLowerCase());
            // for left-semi join, generate an additional selection & group-by
            // operator before ReduceSink
            ArrayList<ASTNode> fields = joinTree.getRHSSemijoinColumns(src);
            if (fields != null) {
                // the RHS table columns should be not be output from the join
                if (omitOpts == null) {
                    omitOpts = new HashSet<Integer>();
                }
                omitOpts.add(pos);
                // generate a selection operator for group-by keys only
                srcOp = insertSelectForSemijoin(fields, srcOp);
                // generate a groupby operator (HASH mode) for a map-side partial
                // aggregation for semijoin
                srcOps[pos++] = genMapGroupByForSemijoin(qb, fields, srcOp, GroupByDesc.Mode.HASH);
            } else {
                srcOps[pos++] = srcOp;
            }
        } else {
            assert pos == 0;
            srcOps[pos++] = joinSrcOp;
        }
    }
    ExprNodeDesc[][] joinKeys = genJoinKeys(joinTree, srcOps);
    for (int i = 0; i < srcOps.length; i++) {
        // generate a ReduceSink operator for the join
        String[] srcs = baseSrc[i] != null ? new String[] { baseSrc[i] } : joinTree.getLeftAliases();
        if (!isCBOExecuted()) {
            srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree, joinKeys[i]);
        }
        srcOps[i] = genJoinReduceSinkChild(qb, joinKeys[i], srcOps[i], srcs, joinTree.getNextTag());
    }
    JoinOperator joinOp = (JoinOperator) genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts, joinKeys);
    joinOp.getConf().setQBJoinTreeProps(joinTree);
    joinContext.put(joinOp, joinTree);
    if (joinTree.getPostJoinFilters().size() != 0) {
        // Safety check for postconditions
        assert joinTree.getNoOuterJoin();
        Operator op = joinOp;
        for (ASTNode condn : joinTree.getPostJoinFilters()) {
            op = genFilterPlan(qb, condn, op, false);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Generated " + op + " with post-filtering conditions after JOIN operator");
            }
        }
        return op;
    }
    return joinOp;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)

Example 32 with JoinOperator

use of org.apache.hadoop.hive.ql.exec.JoinOperator in project hive by apache.

the class SemanticAnalyzer method genJoinOperatorChildren.

private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right, HashSet<Integer> omitOpts, ExprNodeDesc[][] joinKeys) throws SemanticException {
    RowResolver outputRR = new RowResolver();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    // all children are base classes
    Operator<?>[] rightOps = new Operator[right.length];
    int outputPos = 0;
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    HashMap<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    HashMap<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
    for (int pos = 0; pos < right.length; ++pos) {
        Operator<?> input = right[pos] == null ? left : right[pos];
        if (input == null) {
            input = left;
        }
        ReduceSinkOperator rs = (ReduceSinkOperator) input;
        if (rs.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = rs.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = (ReduceSinkDesc) (input.getConf());
        int[] index = rs.getValueIndex();
        ArrayList<ExprNodeDesc> valueDesc = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> filterDesc = new ArrayList<ExprNodeDesc>();
        Byte tag = (byte) rsDesc.getTag();
        // check whether this input operator produces output
        if (omitOpts != null && omitOpts.contains(pos)) {
            exprMap.put(tag, valueDesc);
            filterMap.put(tag, filterDesc);
            rightOps[pos] = input;
            continue;
        }
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        // prepare output descriptors for the input opt
        RowResolver inputRR = opParseCtx.get(input).getRowResolver();
        RowResolver parentRR = opParseCtx.get(parent).getRowResolver();
        posToAliasMap.put(pos, new HashSet<String>(inputRR.getTableNames()));
        List<ColumnInfo> columns = parentRR.getColumnInfos();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo prev = columns.get(i);
            String[] nm = parentRR.reverseLookup(prev.getInternalName());
            String[] nm2 = parentRR.getAlternateMappings(prev.getInternalName());
            if (outputRR.get(nm[0], nm[1]) != null) {
                continue;
            }
            ColumnInfo info = new ColumnInfo(prev);
            String field;
            if (index[i] >= 0) {
                field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
            } else {
                field = Utilities.ReduceField.VALUE + "." + valColNames.get(-index[i] - 1);
            }
            String internalName = getColumnInternalName(outputColumnNames.size());
            ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
            info.setInternalName(internalName);
            colExprMap.put(internalName, desc);
            outputRR.put(nm[0], nm[1], info);
            if (nm2 != null) {
                outputRR.addMappingOnly(nm2[0], nm2[1], info);
            }
            valueDesc.add(desc);
            outputColumnNames.add(internalName);
            reversedExprs.put(internalName, tag);
        }
        for (ASTNode cond : join.getFilters().get(tag)) {
            filterDesc.add(genExprNodeDesc(cond, inputRR));
        }
        exprMap.put(tag, valueDesc);
        filterMap.put(tag, filterDesc);
        rightOps[pos] = input;
    }
    JoinCondDesc[] joinCondns = new JoinCondDesc[join.getJoinCond().length];
    for (int i = 0; i < join.getJoinCond().length; i++) {
        JoinCond condn = join.getJoinCond()[i];
        joinCondns[i] = new JoinCondDesc(condn);
    }
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, join.getNoOuterJoin(), joinCondns, filterMap, joinKeys);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(join.getFilterMap());
    // For outer joins, add filters that apply to more than one input
    if (!join.getNoOuterJoin() && join.getPostJoinFilters().size() != 0) {
        List<ExprNodeDesc> residualFilterExprs = new ArrayList<ExprNodeDesc>();
        for (ASTNode cond : join.getPostJoinFilters()) {
            residualFilterExprs.add(genExprNodeDesc(cond, outputRR));
        }
        desc.setResidualFilterExprs(residualFilterExprs);
        // Clean post-conditions
        join.getPostJoinFilters().clear();
    }
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(getOpContext(), desc, new RowSchema(outputRR.getColumnInfos()), rightOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    if (join.getNullSafes() != null) {
        boolean[] nullsafes = new boolean[join.getNullSafes().size()];
        for (int i = 0; i < nullsafes.length; i++) {
            nullsafes[i] = join.getNullSafes().get(i);
        }
        desc.setNullSafes(nullsafes);
    }
    return putOpInsertMap(joinOp, outputRR);
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) HashSet(java.util.HashSet) Set(java.util.Set) TreeSet(java.util.TreeSet) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) LateralViewJoinDesc(org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Aggregations

JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)32 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)18 Operator (org.apache.hadoop.hive.ql.exec.Operator)18 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)15 ArrayList (java.util.ArrayList)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)14 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)12 HashMap (java.util.HashMap)11 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)11 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)11 List (java.util.List)9 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)9 JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)9 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)8 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)8 HashSet (java.util.HashSet)7 LinkedHashMap (java.util.LinkedHashMap)7 Path (org.apache.hadoop.fs.Path)7 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)7 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)7