Search in sources :

Example 11 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class CommonMergeJoinOperator method getFetchInputAtCloseList.

/*
   * In case of outer joins, we need to push records through even if one of the sides is done
   * sending records. For e.g. In the case of full outer join, the right side needs to send in data
   * for the join even after the left side has completed sending all the records on its side. This
   * can be done once at initialize time and at close, these tags will still forward records until
   * they have no more to send. Also, subsequent joins need to fetch their data as well since
   * any join following the outer join could produce results with one of the outer sides depending on
   * the join condition. We could optimize for the case of inner joins in the future here.
   */
private Set<Integer> getFetchInputAtCloseList() {
    Set<Integer> retval = new TreeSet<Integer>();
    for (JoinCondDesc joinCondDesc : conf.getConds()) {
        retval.add(joinCondDesc.getLeft());
        retval.add(joinCondDesc.getRight());
    }
    return retval;
}
Also used : TreeSet(java.util.TreeSet) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc)

Example 12 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class AbstractSMBJoinProc method canConvertJoinToBucketMapJoin.

// Can the join operator be converted to a bucket map-merge join operator ?
@SuppressWarnings("unchecked")
protected boolean canConvertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException {
    // This has already been inspected and rejected
    if (context.getRejectedJoinOps().contains(joinOp)) {
        return false;
    }
    if (!this.pGraphContext.getJoinOps().contains(joinOp)) {
        return false;
    }
    Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
    try {
        String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
        bigTableMatcherClass = JavaUtils.loadClass(selector);
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e.getMessage());
    }
    BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
        // of any type. So return false.
        return false;
    }
    int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates);
    if (bigTablePosition < 0) {
        // contains aliases from sub-query
        return false;
    }
    context.setBigTablePosition(bigTablePosition);
    String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1];
    joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias);
    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
    // get the join keys from parent ReduceSink operators
    for (Operator<? extends OperatorDesc> parentOp : parentOps) {
        ReduceSinkDesc rsconf = ((ReduceSinkOperator) parentOp).getConf();
        Byte tag = (byte) rsconf.getTag();
        List<ExprNodeDesc> keys = rsconf.getKeyCols();
        keyExprMap.put(tag, keys);
    }
    context.setKeyExprMap(keyExprMap);
    // Make a deep copy of the aliases so that they are not changed in the context
    String[] joinSrcs = joinOp.getConf().getBaseSrc();
    String[] srcs = new String[joinSrcs.length];
    for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
        joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]);
        srcs[srcPos] = new String(joinSrcs[srcPos]);
    }
    // table matcher.
    return checkConvertBucketMapJoin(context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs));
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) HashMap(java.util.HashMap) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 13 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class SemanticAnalyzer method genJoinOperatorChildren.

private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right, HashSet<Integer> omitOpts, ExprNodeDesc[][] joinKeys) throws SemanticException {
    RowResolver outputRR = new RowResolver();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    // all children are base classes
    Operator<?>[] rightOps = new Operator[right.length];
    int outputPos = 0;
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    HashMap<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    HashMap<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
    for (int pos = 0; pos < right.length; ++pos) {
        Operator<?> input = right[pos] == null ? left : right[pos];
        if (input == null) {
            input = left;
        }
        ReduceSinkOperator rs = (ReduceSinkOperator) input;
        if (rs.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = rs.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = (ReduceSinkDesc) (input.getConf());
        int[] index = rs.getValueIndex();
        ArrayList<ExprNodeDesc> valueDesc = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> filterDesc = new ArrayList<ExprNodeDesc>();
        Byte tag = (byte) rsDesc.getTag();
        // check whether this input operator produces output
        if (omitOpts != null && omitOpts.contains(pos)) {
            exprMap.put(tag, valueDesc);
            filterMap.put(tag, filterDesc);
            rightOps[pos] = input;
            continue;
        }
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        // prepare output descriptors for the input opt
        RowResolver inputRR = opParseCtx.get(input).getRowResolver();
        RowResolver parentRR = opParseCtx.get(parent).getRowResolver();
        posToAliasMap.put(pos, new HashSet<String>(inputRR.getTableNames()));
        List<ColumnInfo> columns = parentRR.getColumnInfos();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo prev = columns.get(i);
            String[] nm = parentRR.reverseLookup(prev.getInternalName());
            String[] nm2 = parentRR.getAlternateMappings(prev.getInternalName());
            if (outputRR.get(nm[0], nm[1]) != null) {
                continue;
            }
            ColumnInfo info = new ColumnInfo(prev);
            String field;
            if (index[i] >= 0) {
                field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
            } else {
                field = Utilities.ReduceField.VALUE + "." + valColNames.get(-index[i] - 1);
            }
            String internalName = getColumnInternalName(outputColumnNames.size());
            ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
            info.setInternalName(internalName);
            colExprMap.put(internalName, desc);
            outputRR.put(nm[0], nm[1], info);
            if (nm2 != null) {
                outputRR.addMappingOnly(nm2[0], nm2[1], info);
            }
            valueDesc.add(desc);
            outputColumnNames.add(internalName);
            reversedExprs.put(internalName, tag);
        }
        for (ASTNode cond : join.getFilters().get(tag)) {
            filterDesc.add(genExprNodeDesc(cond, inputRR));
        }
        exprMap.put(tag, valueDesc);
        filterMap.put(tag, filterDesc);
        rightOps[pos] = input;
    }
    JoinCondDesc[] joinCondns = new JoinCondDesc[join.getJoinCond().length];
    for (int i = 0; i < join.getJoinCond().length; i++) {
        JoinCond condn = join.getJoinCond()[i];
        joinCondns[i] = new JoinCondDesc(condn);
    }
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, join.getNoOuterJoin(), joinCondns, filterMap, joinKeys);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(join.getFilterMap());
    // For outer joins, add filters that apply to more than one input
    if (!join.getNoOuterJoin() && join.getPostJoinFilters().size() != 0) {
        List<ExprNodeDesc> residualFilterExprs = new ArrayList<ExprNodeDesc>();
        for (ASTNode cond : join.getPostJoinFilters()) {
            residualFilterExprs.add(genExprNodeDesc(cond, outputRR));
        }
        desc.setResidualFilterExprs(residualFilterExprs);
        // Clean post-conditions
        join.getPostJoinFilters().clear();
    }
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(getOpContext(), desc, new RowSchema(outputRR.getColumnInfos()), rightOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    if (join.getNullSafes() != null) {
        boolean[] nullsafes = new boolean[join.getNullSafes().size()];
        for (int i = 0; i < nullsafes.length; i++) {
            nullsafes[i] = join.getNullSafes().get(i);
        }
        desc.setNullSafes(nullsafes);
    }
    return putOpInsertMap(joinOp, outputRR);
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) HashSet(java.util.HashSet) Set(java.util.Set) TreeSet(java.util.TreeSet) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) LateralViewJoinDesc(org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Aggregations

JoinCondDesc (org.apache.hadoop.hive.ql.plan.JoinCondDesc)13 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)7 ArrayList (java.util.ArrayList)6 JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)6 List (java.util.List)5 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)5 Operator (org.apache.hadoop.hive.ql.exec.Operator)5 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)5 HashMap (java.util.HashMap)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)4 HashSet (java.util.HashSet)3 LinkedHashMap (java.util.LinkedHashMap)3 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)3 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)3 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)3 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)3 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)3 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)3 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)3