Search in sources :

Example 6 with JoinDesc

use of org.apache.hadoop.hive.ql.plan.JoinDesc in project hive by apache.

the class JoinVisitor method genJoin.

private JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
    // 1. Extract join type
    JoinCondDesc[] joinCondns;
    boolean semiJoin;
    boolean noOuterJoin;
    if (join instanceof HiveMultiJoin) {
        HiveMultiJoin hmj = (HiveMultiJoin) join;
        joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
        for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
            joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
        }
        semiJoin = false;
        noOuterJoin = !hmj.isOuterJoin();
    } else {
        joinCondns = new JoinCondDesc[1];
        JoinRelType joinRelType = JoinRelType.INNER;
        if (join instanceof Join) {
            joinRelType = ((Join) join).getJoinType();
        }
        JoinType joinType;
        switch(joinRelType) {
            case SEMI:
                joinType = JoinType.LEFTSEMI;
                semiJoin = true;
                break;
            case ANTI:
                joinType = JoinType.ANTI;
                semiJoin = true;
                break;
            default:
                assert join instanceof Join;
                joinType = transformJoinType(((Join) join).getJoinType());
                semiJoin = false;
        }
        joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
        noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
    }
    // 2. We create the join aux structures
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
    Operator<?>[] childOps = new Operator[children.size()];
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    int outputPos = 0;
    for (int pos = 0; pos < children.size(); pos++) {
        // 2.1. Backtracking from RS
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        if (inputRS.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = inputRS.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        int[] index = inputRS.getValueIndex();
        Byte tag = (byte) rsDesc.getTag();
        // 2.1.1. If semijoin...
        if (semiJoin && pos != 0) {
            exprMap.put(tag, new ArrayList<ExprNodeDesc>());
            childOps[pos] = inputRS;
            continue;
        }
        posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
        List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo info = new ColumnInfo(parentColumns.get(i));
            info.setInternalName(outputColumnNames.get(outputPos));
            info.setTabAlias(tabAlias);
            outputColumns.add(info);
            reversedExprs.put(outputColumnNames.get(outputPos), tag);
            outputPos++;
        }
        exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
        colExprMap.putAll(descriptors);
        childOps[pos] = inputRS;
    }
    // 3. We populate the filters and filterMap structure needed in the join descriptor
    List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
    int[][] filterMap = new int[children.size()][];
    for (int i = 0; i < children.size(); i++) {
        filtersPerInput.add(new ArrayList<ExprNodeDesc>());
    }
    // 3. We populate the filters structure
    for (int i = 0; i < filterExpressions.size(); i++) {
        int leftPos = joinCondns[i].getLeft();
        int rightPos = joinCondns[i].getRight();
        for (ExprNodeDesc expr : filterExpressions.get(i)) {
            // We need to update the exprNode, as currently
            // they refer to columns in the output of the join;
            // they should refer to the columns output by the RS
            int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
            if (inputPos == -1) {
                inputPos = leftPos;
            }
            filtersPerInput.get(inputPos).add(expr);
            if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
                if (inputPos == leftPos) {
                    updateFilterMap(filterMap, leftPos, rightPos);
                } else {
                    updateFilterMap(filterMap, rightPos, leftPos);
                }
            }
        }
    }
    for (int pos = 0; pos < children.size(); pos++) {
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        Byte tag = (byte) rsDesc.getTag();
        filters.put(tag, filtersPerInput.get(pos));
    }
    // 4. We create the join operator with its descriptor
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(filterMap);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    joinOp.getConf().setBaseSrc(baseSrc);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
    }
    return joinOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) HashSet(java.util.HashSet) Set(java.util.Set) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) JoinCond(org.apache.hadoop.hive.ql.parse.JoinCond) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) JoinRelType(org.apache.calcite.rel.core.JoinRelType) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Example 7 with JoinDesc

use of org.apache.hadoop.hive.ql.plan.JoinDesc in project hive by apache.

the class CorrelationOptimizer method findPossibleAutoConvertedJoinOperators.

private void findPossibleAutoConvertedJoinOperators() throws SemanticException {
    // based on hive.auto.convert.join.noconditionaltask.size.
    for (JoinOperator joinOp : pCtx.getJoinOps()) {
        boolean isAbleToGuess = true;
        boolean mayConvert = false;
        // Get total size and individual alias's size
        long aliasTotalKnownInputSize = 0;
        Map<String, Long> aliasToSize = new HashMap<String, Long>();
        Map<Integer, Set<String>> posToAliases = new HashMap<Integer, Set<String>>();
        for (int pos = 0; pos < joinOp.getNumParent(); pos++) {
            Operator<? extends OperatorDesc> op = joinOp.getParentOperators().get(pos);
            Set<TableScanOperator> topOps = CorrelationUtilities.findTableScanOperators(op);
            if (topOps.isEmpty()) {
                isAbleToGuess = false;
                break;
            }
            Set<String> aliases = new LinkedHashSet<String>();
            for (TableScanOperator tsop : topOps) {
                Table table = tsop.getConf().getTableMetadata();
                if (table == null) {
                    // table should not be null.
                    throw new SemanticException("The table of " + tsop.getName() + " " + tsop.getIdentifier() + " is null, which is not expected.");
                }
                String alias = tsop.getConf().getAlias();
                aliases.add(alias);
                Path p = table.getPath();
                ContentSummary resultCs = null;
                try {
                    FileSystem fs = table.getPath().getFileSystem(pCtx.getConf());
                    resultCs = fs.getContentSummary(p);
                } catch (IOException e) {
                    LOG.warn("Encounter a error while querying content summary of table " + table.getCompleteName() + " from FileSystem. " + "Cannot guess if CommonJoinOperator will optimize " + joinOp.getName() + " " + joinOp.getIdentifier());
                }
                if (resultCs == null) {
                    isAbleToGuess = false;
                    break;
                }
                long size = resultCs.getLength();
                aliasTotalKnownInputSize += size;
                Long es = aliasToSize.get(alias);
                if (es == null) {
                    es = Long.valueOf(0);
                }
                es += size;
                aliasToSize.put(alias, es);
            }
            posToAliases.put(pos, aliases);
        }
        if (!isAbleToGuess) {
            LOG.info("Cannot guess if CommonJoinOperator will optimize " + joinOp.getName() + " " + joinOp.getIdentifier());
            continue;
        }
        JoinDesc joinDesc = joinOp.getConf();
        Byte[] order = joinDesc.getTagOrder();
        int numAliases = order.length;
        Set<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(joinDesc.getConds());
        if (bigTableCandidates.isEmpty()) {
            continue;
        }
        long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(pCtx.getConf(), HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
        for (int i = 0; i < numAliases; i++) {
            // this table cannot be big table
            if (!bigTableCandidates.contains(i)) {
                continue;
            }
            Set<String> aliases = posToAliases.get(i);
            long aliasKnownSize = Utilities.sumOf(aliasToSize, aliases);
            if (!CommonJoinTaskDispatcher.cannotConvert(aliasKnownSize, aliasTotalKnownInputSize, ThresholdOfSmallTblSizeSum)) {
                mayConvert = true;
            }
        }
        if (mayConvert) {
            LOG.info(joinOp.getName() + " " + joinOp.getIdentifier() + " may be converted to MapJoin by CommonJoinResolver");
            skipedJoinOperators.add(joinOp);
        }
    }
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) LinkedHashSet(java.util.LinkedHashSet) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) FileSystem(org.apache.hadoop.fs.FileSystem) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) IOException(java.io.IOException) ContentSummary(org.apache.hadoop.fs.ContentSummary) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Example 8 with JoinDesc

use of org.apache.hadoop.hive.ql.plan.JoinDesc in project hive by apache.

the class HiveOpConverter method genJoin.

private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
    // 1. Extract join type
    JoinCondDesc[] joinCondns;
    boolean semiJoin;
    boolean noOuterJoin;
    if (join instanceof HiveMultiJoin) {
        HiveMultiJoin hmj = (HiveMultiJoin) join;
        joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
        for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
            joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
        }
        semiJoin = false;
        noOuterJoin = !hmj.isOuterJoin();
    } else {
        joinCondns = new JoinCondDesc[1];
        semiJoin = join instanceof SemiJoin;
        JoinType joinType;
        if (semiJoin) {
            joinType = JoinType.LEFTSEMI;
        } else {
            joinType = extractJoinType((Join) join);
        }
        joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
        noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
    }
    // 2. We create the join aux structures
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
    Operator<?>[] childOps = new Operator[children.size()];
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    int outputPos = 0;
    for (int pos = 0; pos < children.size(); pos++) {
        // 2.1. Backtracking from RS
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        if (inputRS.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = inputRS.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        int[] index = inputRS.getValueIndex();
        Byte tag = (byte) rsDesc.getTag();
        // 2.1.1. If semijoin...
        if (semiJoin && pos != 0) {
            exprMap.put(tag, new ArrayList<ExprNodeDesc>());
            childOps[pos] = inputRS;
            continue;
        }
        posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
        List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo info = new ColumnInfo(parentColumns.get(i));
            info.setInternalName(outputColumnNames.get(outputPos));
            info.setTabAlias(tabAlias);
            outputColumns.add(info);
            reversedExprs.put(outputColumnNames.get(outputPos), tag);
            outputPos++;
        }
        exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
        colExprMap.putAll(descriptors);
        childOps[pos] = inputRS;
    }
    // 3. We populate the filters and filterMap structure needed in the join descriptor
    List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
    int[][] filterMap = new int[children.size()][];
    for (int i = 0; i < children.size(); i++) {
        filtersPerInput.add(new ArrayList<ExprNodeDesc>());
    }
    // 3. We populate the filters structure
    for (int i = 0; i < filterExpressions.size(); i++) {
        int leftPos = joinCondns[i].getLeft();
        int rightPos = joinCondns[i].getRight();
        for (ExprNodeDesc expr : filterExpressions.get(i)) {
            // We need to update the exprNode, as currently
            // they refer to columns in the output of the join;
            // they should refer to the columns output by the RS
            int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
            if (inputPos == -1) {
                inputPos = leftPos;
            }
            filtersPerInput.get(inputPos).add(expr);
            if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
                if (inputPos == leftPos) {
                    updateFilterMap(filterMap, leftPos, rightPos);
                } else {
                    updateFilterMap(filterMap, rightPos, leftPos);
                }
            }
        }
    }
    for (int pos = 0; pos < children.size(); pos++) {
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        Byte tag = (byte) rsDesc.getTag();
        filters.put(tag, filtersPerInput.get(pos));
    }
    // 4. We create the join operator with its descriptor
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(filterMap);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    joinOp.getConf().setBaseSrc(baseSrc);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
    }
    return joinOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) JoinCond(org.apache.hadoop.hive.ql.parse.JoinCond) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Example 9 with JoinDesc

use of org.apache.hadoop.hive.ql.plan.JoinDesc in project hive by apache.

the class SemanticAnalyzer method genJoinOperatorChildren.

private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, Operator[] right, HashSet<Integer> omitOpts, ExprNodeDesc[][] joinKeys) throws SemanticException {
    RowResolver outputRR = new RowResolver();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    // all children are base classes
    Operator<?>[] rightOps = new Operator[right.length];
    int outputPos = 0;
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    HashMap<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    HashMap<Byte, List<ExprNodeDesc>> filterMap = new HashMap<Byte, List<ExprNodeDesc>>();
    // Only used for semijoin with residual predicates
    List<ColumnInfo> topSelectInputColumns = new ArrayList<>();
    for (int pos = 0; pos < right.length; ++pos) {
        Operator<?> input = right[pos] == null ? left : right[pos];
        if (input == null) {
            input = left;
        }
        ReduceSinkOperator rs = (ReduceSinkOperator) input;
        if (rs.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = rs.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = (ReduceSinkDesc) (input.getConf());
        int[] index = rs.getValueIndex();
        ArrayList<ExprNodeDesc> valueDesc = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> filterDesc = new ArrayList<ExprNodeDesc>();
        Byte tag = (byte) rsDesc.getTag();
        // we will add a Select on top of the join
        if (omitOpts != null && omitOpts.contains(pos) && join.getPostJoinFilters().size() == 0) {
            exprMap.put(tag, valueDesc);
            filterMap.put(tag, filterDesc);
            rightOps[pos] = input;
            continue;
        }
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        // prepare output descriptors for the input opt
        RowResolver inputRR = opParseCtx.get(input).getRowResolver();
        RowResolver parentRR = opParseCtx.get(parent).getRowResolver();
        posToAliasMap.put(pos, new HashSet<String>(inputRR.getTableNames()));
        List<ColumnInfo> columns = parentRR.getColumnInfos();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo prev = columns.get(i);
            String[] nm = parentRR.reverseLookup(prev.getInternalName());
            String[] nm2 = parentRR.getAlternateMappings(prev.getInternalName());
            if (outputRR.get(nm[0], nm[1]) != null) {
                continue;
            }
            ColumnInfo info = new ColumnInfo(prev);
            String field;
            if (index[i] >= 0) {
                field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
            } else {
                field = Utilities.ReduceField.VALUE + "." + valColNames.get(-index[i] - 1);
            }
            String internalName = getColumnInternalName(outputColumnNames.size());
            ExprNodeColumnDesc desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
            info.setInternalName(internalName);
            colExprMap.put(internalName, desc);
            outputRR.put(nm[0], nm[1], info);
            if (nm2 != null) {
                outputRR.addMappingOnly(nm2[0], nm2[1], info);
            }
            valueDesc.add(desc);
            outputColumnNames.add(internalName);
            reversedExprs.put(internalName, tag);
            // Populate semijoin select if needed
            if (omitOpts == null || !omitOpts.contains(pos)) {
                topSelectInputColumns.add(info);
            }
        }
        for (ASTNode cond : join.getFilters().get(tag)) {
            filterDesc.add(genExprNodeDesc(cond, inputRR));
        }
        exprMap.put(tag, valueDesc);
        filterMap.put(tag, filterDesc);
        rightOps[pos] = input;
    }
    JoinCondDesc[] joinCondns = new JoinCondDesc[join.getJoinCond().length];
    for (int i = 0; i < join.getJoinCond().length; i++) {
        JoinCond condn = join.getJoinCond()[i];
        joinCondns[i] = new JoinCondDesc(condn);
    }
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, join.getNoOuterJoin(), joinCondns, filterMap, joinKeys, null);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(join.getFilterMap());
    // Add filters that apply to more than one input
    if (join.getPostJoinFilters().size() != 0 && (!join.getNoOuterJoin() || !join.getNoSemiJoin() || HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PUSH_RESIDUAL_INNER))) {
        LOG.debug("Generate JOIN with post-filtering conditions");
        List<ExprNodeDesc> residualFilterExprs = new ArrayList<ExprNodeDesc>();
        for (ASTNode cond : join.getPostJoinFilters()) {
            residualFilterExprs.add(genExprNodeDesc(cond, outputRR, false, isCBOExecuted()));
        }
        desc.setResidualFilterExprs(residualFilterExprs);
        // Clean post-conditions
        join.getPostJoinFilters().clear();
    }
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(getOpContext(), desc, new RowSchema(outputRR.getColumnInfos()), rightOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    if (join.getNullSafes() != null) {
        boolean[] nullsafes = new boolean[join.getNullSafes().size()];
        for (int i = 0; i < nullsafes.length; i++) {
            nullsafes[i] = join.getNullSafes().get(i);
        }
        desc.setNullSafes(nullsafes);
    }
    Operator<?> topOp = putOpInsertMap(joinOp, outputRR);
    if (omitOpts != null && !omitOpts.isEmpty() && desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) {
        // Adding a select operator to top of semijoin to ensure projection of only correct columns
        final List<ExprNodeDesc> topSelectExprs = new ArrayList<>();
        final List<String> topSelectOutputColNames = new ArrayList<>();
        final RowResolver topSelectRR = new RowResolver();
        final Map<String, ExprNodeDesc> topSelectColExprMap = new HashMap<String, ExprNodeDesc>();
        for (ColumnInfo colInfo : topSelectInputColumns) {
            ExprNodeColumnDesc columnExpr = new ExprNodeColumnDesc(colInfo);
            topSelectExprs.add(columnExpr);
            topSelectOutputColNames.add(colInfo.getInternalName());
            topSelectColExprMap.put(colInfo.getInternalName(), columnExpr);
            String[] nm = outputRR.reverseLookup(columnExpr.getColumn());
            String[] nm2 = outputRR.getAlternateMappings(columnExpr.getColumn());
            topSelectRR.put(nm[0], nm[1], colInfo);
            if (nm2 != null) {
                topSelectRR.addMappingOnly(nm2[0], nm2[1], colInfo);
            }
        }
        final SelectDesc topSelect = new SelectDesc(topSelectExprs, topSelectOutputColNames);
        topOp = putOpInsertMap(OperatorFactory.getAndMakeChild(topSelect, new RowSchema(topSelectRR.getColumnInfos()), topOp), topSelectRR);
        topOp.setColumnExprMap(topSelectColExprMap);
    }
    return topOp;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) HashSet(java.util.HashSet) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) Set(java.util.Set) TreeSet(java.util.TreeSet) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) LateralViewJoinDesc(org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Example 10 with JoinDesc

use of org.apache.hadoop.hive.ql.plan.JoinDesc in project hive by apache.

the class GenSparkSkewJoinProcessor method processSkewJoin.

@SuppressWarnings("unchecked")
public static void processSkewJoin(JoinOperator joinOp, Task<? extends Serializable> currTask, ReduceWork reduceWork, ParseContext parseCtx) throws SemanticException {
    SparkWork currentWork = ((SparkTask) currTask).getWork();
    if (currentWork.getChildren(reduceWork).size() > 0) {
        LOG.warn("Skip runtime skew join as the ReduceWork has child work and hasn't been split.");
        return;
    }
    List<Task<? extends Serializable>> children = currTask.getChildTasks();
    Path baseTmpDir = parseCtx.getContext().getMRTmpPath();
    JoinDesc joinDescriptor = joinOp.getConf();
    Map<Byte, List<ExprNodeDesc>> joinValues = joinDescriptor.getExprs();
    int numAliases = joinValues.size();
    Map<Byte, Path> bigKeysDirMap = new HashMap<Byte, Path>();
    Map<Byte, Map<Byte, Path>> smallKeysDirMap = new HashMap<Byte, Map<Byte, Path>>();
    Map<Byte, Path> skewJoinJobResultsDir = new HashMap<Byte, Path>();
    Byte[] tags = joinDescriptor.getTagOrder();
    // for each joining table, set dir for big key and small keys properly
    for (int i = 0; i < numAliases; i++) {
        Byte alias = tags[i];
        bigKeysDirMap.put(alias, GenMRSkewJoinProcessor.getBigKeysDir(baseTmpDir, alias));
        Map<Byte, Path> smallKeysMap = new HashMap<Byte, Path>();
        smallKeysDirMap.put(alias, smallKeysMap);
        for (Byte src2 : tags) {
            if (!src2.equals(alias)) {
                smallKeysMap.put(src2, GenMRSkewJoinProcessor.getSmallKeysDir(baseTmpDir, alias, src2));
            }
        }
        skewJoinJobResultsDir.put(alias, GenMRSkewJoinProcessor.getBigKeysSkewJoinResultDir(baseTmpDir, alias));
    }
    joinDescriptor.setHandleSkewJoin(true);
    joinDescriptor.setBigKeysDirMap(bigKeysDirMap);
    joinDescriptor.setSmallKeysDirMap(smallKeysDirMap);
    joinDescriptor.setSkewKeyDefinition(HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVESKEWJOINKEY));
    // create proper table/column desc for spilled tables
    TableDesc keyTblDesc = (TableDesc) reduceWork.getKeyDesc().clone();
    List<String> joinKeys = Utilities.getColumnNames(keyTblDesc.getProperties());
    List<String> joinKeyTypes = Utilities.getColumnTypes(keyTblDesc.getProperties());
    Map<Byte, TableDesc> tableDescList = new HashMap<Byte, TableDesc>();
    Map<Byte, RowSchema> rowSchemaList = new HashMap<Byte, RowSchema>();
    Map<Byte, List<ExprNodeDesc>> newJoinValues = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> newJoinKeys = new HashMap<Byte, List<ExprNodeDesc>>();
    // used for create mapJoinDesc, should be in order
    List<TableDesc> newJoinValueTblDesc = new ArrayList<TableDesc>();
    for (int i = 0; i < tags.length; i++) {
        newJoinValueTblDesc.add(null);
    }
    for (int i = 0; i < numAliases; i++) {
        Byte alias = tags[i];
        List<ExprNodeDesc> valueCols = joinValues.get(alias);
        String colNames = "";
        String colTypes = "";
        int columnSize = valueCols.size();
        List<ExprNodeDesc> newValueExpr = new ArrayList<ExprNodeDesc>();
        List<ExprNodeDesc> newKeyExpr = new ArrayList<ExprNodeDesc>();
        ArrayList<ColumnInfo> columnInfos = new ArrayList<ColumnInfo>();
        boolean first = true;
        for (int k = 0; k < columnSize; k++) {
            TypeInfo type = valueCols.get(k).getTypeInfo();
            // any name, it does not matter.
            String newColName = i + "_VALUE_" + k;
            ColumnInfo columnInfo = new ColumnInfo(newColName, type, alias.toString(), false);
            columnInfos.add(columnInfo);
            newValueExpr.add(new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), false));
            if (!first) {
                colNames = colNames + ",";
                colTypes = colTypes + ",";
            }
            first = false;
            colNames = colNames + newColName;
            colTypes = colTypes + valueCols.get(k).getTypeString();
        }
        // we are putting join keys at last part of the spilled table
        for (int k = 0; k < joinKeys.size(); k++) {
            if (!first) {
                colNames = colNames + ",";
                colTypes = colTypes + ",";
            }
            first = false;
            colNames = colNames + joinKeys.get(k);
            colTypes = colTypes + joinKeyTypes.get(k);
            ColumnInfo columnInfo = new ColumnInfo(joinKeys.get(k), TypeInfoFactory.getPrimitiveTypeInfo(joinKeyTypes.get(k)), alias.toString(), false);
            columnInfos.add(columnInfo);
            newKeyExpr.add(new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), false));
        }
        newJoinValues.put(alias, newValueExpr);
        newJoinKeys.put(alias, newKeyExpr);
        tableDescList.put(alias, Utilities.getTableDesc(colNames, colTypes));
        rowSchemaList.put(alias, new RowSchema(columnInfos));
        // construct value table Desc
        String valueColNames = "";
        String valueColTypes = "";
        first = true;
        for (int k = 0; k < columnSize; k++) {
            // any name, it does not matter.
            String newColName = i + "_VALUE_" + k;
            if (!first) {
                valueColNames = valueColNames + ",";
                valueColTypes = valueColTypes + ",";
            }
            valueColNames = valueColNames + newColName;
            valueColTypes = valueColTypes + valueCols.get(k).getTypeString();
            first = false;
        }
        newJoinValueTblDesc.set((byte) i, Utilities.getTableDesc(valueColNames, valueColTypes));
    }
    joinDescriptor.setSkewKeysValuesTables(tableDescList);
    joinDescriptor.setKeyTableDesc(keyTblDesc);
    // create N-1 map join tasks
    HashMap<Path, Task<? extends Serializable>> bigKeysDirToTaskMap = new HashMap<Path, Task<? extends Serializable>>();
    List<Serializable> listWorks = new ArrayList<Serializable>();
    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
    for (int i = 0; i < numAliases - 1; i++) {
        Byte src = tags[i];
        HiveConf hiveConf = new HiveConf(parseCtx.getConf(), GenSparkSkewJoinProcessor.class);
        SparkWork sparkWork = new SparkWork(parseCtx.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
        Task<? extends Serializable> skewJoinMapJoinTask = TaskFactory.get(sparkWork);
        skewJoinMapJoinTask.setFetchSource(currTask.isFetchSource());
        // create N TableScans
        Operator<? extends OperatorDesc>[] parentOps = new TableScanOperator[tags.length];
        for (int k = 0; k < tags.length; k++) {
            Operator<? extends OperatorDesc> ts = GenMapRedUtils.createTemporaryTableScanOperator(joinOp.getCompilationOpContext(), rowSchemaList.get((byte) k));
            ((TableScanOperator) ts).setTableDescSkewJoin(tableDescList.get((byte) k));
            parentOps[k] = ts;
        }
        // create the MapJoinOperator
        String dumpFilePrefix = "mapfile" + PlanUtils.getCountForMapJoinDumpFilePrefix();
        MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc, joinDescriptor.getOutputColumnNames(), i, joinDescriptor.getConds(), joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, joinDescriptor.getMemoryMonitorInfo(), joinDescriptor.getInMemoryDataSize());
        mapJoinDescriptor.setTagOrder(tags);
        mapJoinDescriptor.setHandleSkewJoin(false);
        mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes());
        mapJoinDescriptor.setColumnExprMap(joinDescriptor.getColumnExprMap());
        // temporarily, mark it as child of all the TS
        MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(joinOp.getCompilationOpContext(), mapJoinDescriptor, null, parentOps);
        // clone the original join operator, and replace it with the MJ
        // this makes sure MJ has the same downstream operator plan as the original join
        List<Operator<?>> reducerList = new ArrayList<Operator<?>>();
        reducerList.add(reduceWork.getReducer());
        Operator<? extends OperatorDesc> reducer = SerializationUtilities.cloneOperatorTree(reducerList).get(0);
        Preconditions.checkArgument(reducer instanceof JoinOperator, "Reducer should be join operator, but actually is " + reducer.getName());
        JoinOperator cloneJoinOp = (JoinOperator) reducer;
        List<Operator<? extends OperatorDesc>> childOps = cloneJoinOp.getChildOperators();
        for (Operator<? extends OperatorDesc> childOp : childOps) {
            childOp.replaceParent(cloneJoinOp, mapJoinOp);
        }
        mapJoinOp.setChildOperators(childOps);
        // set memory usage for the MJ operator
        setMemUsage(mapJoinOp, skewJoinMapJoinTask, parseCtx);
        // create N MapWorks and add them to the SparkWork
        MapWork bigMapWork = null;
        Map<Byte, Path> smallTblDirs = smallKeysDirMap.get(src);
        for (int j = 0; j < tags.length; j++) {
            MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
            sparkWork.add(mapWork);
            // This code has been only added for testing
            boolean mapperCannotSpanPartns = parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
            mapWork.setMapperCannotSpanPartns(mapperCannotSpanPartns);
            Operator<? extends OperatorDesc> tableScan = parentOps[j];
            String alias = tags[j].toString();
            ArrayList<String> aliases = new ArrayList<String>();
            aliases.add(alias);
            Path path;
            if (j == i) {
                path = bigKeysDirMap.get(tags[j]);
                bigKeysDirToTaskMap.put(path, skewJoinMapJoinTask);
                bigMapWork = mapWork;
            } else {
                path = smallTblDirs.get(tags[j]);
            }
            mapWork.addPathToAlias(path, aliases);
            mapWork.getAliasToWork().put(alias, tableScan);
            PartitionDesc partitionDesc = new PartitionDesc(tableDescList.get(tags[j]), null);
            mapWork.addPathToPartitionInfo(path, partitionDesc);
            mapWork.getAliasToPartnInfo().put(alias, partitionDesc);
            mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
        }
        // connect all small dir map work to the big dir map work
        Preconditions.checkArgument(bigMapWork != null, "Haven't identified big dir MapWork");
        // these 2 flags are intended only for the big-key map work
        bigMapWork.setNumMapTasks(HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVESKEWJOINMAPJOINNUMMAPTASK));
        bigMapWork.setMinSplitSize(HiveConf.getLongVar(hiveConf, HiveConf.ConfVars.HIVESKEWJOINMAPJOINMINSPLIT));
        // use HiveInputFormat so that we can control the number of map tasks
        bigMapWork.setInputformat(HiveInputFormat.class.getName());
        for (BaseWork work : sparkWork.getRoots()) {
            Preconditions.checkArgument(work instanceof MapWork, "All root work should be MapWork, but got " + work.getClass().getSimpleName());
            if (work != bigMapWork) {
                sparkWork.connect(work, bigMapWork, new SparkEdgeProperty(SparkEdgeProperty.SHUFFLE_NONE));
            }
        }
        // insert SparkHashTableSink and Dummy operators
        for (int j = 0; j < tags.length; j++) {
            if (j != i) {
                insertSHTS(tags[j], (TableScanOperator) parentOps[j], bigMapWork);
            }
        }
        listWorks.add(skewJoinMapJoinTask.getWork());
        listTasks.add(skewJoinMapJoinTask);
    }
    if (children != null) {
        for (Task<? extends Serializable> tsk : listTasks) {
            for (Task<? extends Serializable> oldChild : children) {
                tsk.addDependentTask(oldChild);
            }
        }
        currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
        for (Task<? extends Serializable> oldChild : children) {
            oldChild.getParentTasks().remove(currTask);
        }
        listTasks.addAll(children);
        for (Task<? extends Serializable> oldChild : children) {
            listWorks.add(oldChild.getWork());
        }
    }
    ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx context = new ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx(bigKeysDirToTaskMap, children);
    ConditionalWork cndWork = new ConditionalWork(listWorks);
    ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork);
    cndTsk.setListTasks(listTasks);
    cndTsk.setResolver(new ConditionalResolverSkewJoin());
    cndTsk.setResolverCtx(context);
    currTask.setChildTasks(new ArrayList<Task<? extends Serializable>>());
    currTask.addDependentTask(cndTsk);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ConditionalWork(org.apache.hadoop.hive.ql.plan.ConditionalWork) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) List(java.util.List) ArrayList(java.util.ArrayList) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) ConditionalResolverSkewJoin(org.apache.hadoop.hive.ql.plan.ConditionalResolverSkewJoin) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) Map(java.util.Map) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) SparkHashTableSinkOperator(org.apache.hadoop.hive.ql.exec.SparkHashTableSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) Path(org.apache.hadoop.fs.Path) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SparkEdgeProperty(org.apache.hadoop.hive.ql.plan.SparkEdgeProperty) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc)

Aggregations

JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)24 ArrayList (java.util.ArrayList)15 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)15 Operator (org.apache.hadoop.hive.ql.exec.Operator)14 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)13 List (java.util.List)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 JoinCondDesc (org.apache.hadoop.hive.ql.plan.JoinCondDesc)12 HashMap (java.util.HashMap)11 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)10 LinkedHashMap (java.util.LinkedHashMap)9 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)9 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)9 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)8 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)8 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)8 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)7 HashSet (java.util.HashSet)6