Search in sources :

Example 1 with OpTraits

use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.

the class ConvertJoinMapJoin method process.

@Override
public /*
   * (non-Javadoc) we should ideally not modify the tree we traverse. However,
   * since we need to walk the tree at any time when we modify the operator, we
   * might as well do it here.
   */
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;
    hashTableLoadFactor = context.conf.getFloatVar(ConfVars.HIVEHASHTABLELOADFACTOR);
    fastHashTableAvailable = context.conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
    JoinOperator joinOp = (JoinOperator) nd;
    // adjust noconditional task size threshold for LLAP
    LlapClusterStateForCompile llapInfo = null;
    if ("llap".equalsIgnoreCase(context.conf.getVar(ConfVars.HIVE_EXECUTION_MODE))) {
        llapInfo = LlapClusterStateForCompile.getClusterInfo(context.conf);
        llapInfo.initClusterInfo();
    }
    MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(context.conf, llapInfo);
    joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
    maxJoinMemory = memoryMonitorInfo.getAdjustedNoConditionalTaskSize();
    LOG.info("maxJoinMemory: {}", maxJoinMemory);
    hashMapDataStructure = HashMapDataStructureType.of(joinOp.getConf());
    TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
    boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin();
    if (!hiveConvertJoin) {
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
        if (retval == null) {
            return retval;
        } else {
            fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
    }
    // if we have traits, and table info is present in the traits, we know the
    // exact number of buckets. Else choose the largest number of estimated
    // reducers from the parent operators.
    int numBuckets = -1;
    if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
        numBuckets = estimateNumBuckets(joinOp, true);
    } else {
        numBuckets = 1;
    }
    LOG.info("Estimated number of buckets " + numBuckets);
    MapJoinConversion mapJoinConversion = getMapJoinConversion(joinOp, context, numBuckets, false, maxJoinMemory, true);
    if (mapJoinConversion == null) {
        Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
        if (retval == null) {
            return retval;
        } else {
            // only case is full outer join with SMB enabled which is not possible. Convert to regular
            // join.
            fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
    }
    if (numBuckets > 1) {
        if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
            // Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ
            if (llapInfo != null) {
                if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversion, numBuckets)) {
                    return null;
                }
            } else if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx)) {
                return null;
            }
        }
    }
    // check if we can convert to map join no bucket scaling.
    LOG.info("Convert to non-bucketed map join");
    if (numBuckets != 1) {
        mapJoinConversion = getMapJoinConversion(joinOp, context, 1, false, maxJoinMemory, true);
    }
    if (mapJoinConversion == null) {
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        fallbackToReduceSideJoin(joinOp, context);
        return null;
    }
    // Currently, this is a MJ path and we don's support FULL OUTER MapJoin yet.
    if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
        fallbackToReduceSideJoin(joinOp, context);
        return null;
    }
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
    if (mapJoinOp == null) {
        fallbackToReduceSideJoin(joinOp, context);
        return null;
    }
    // map join operator by default has no bucket cols and num of reduce sinks
    // reduced by 1
    mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
    preserveOperatorInfos(mapJoinOp, joinOp, context);
    // propagate this change till the next RS
    for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
        setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
    }
    return null;
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) LlapClusterStateForCompile(org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext)

Example 2 with OpTraits

use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.

the class ConvertJoinMapJoin method convertJoinBucketMapJoin.

private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, MapJoinConversion mapJoinConversion, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
    if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
        return false;
    }
    final int bigTablePosition = mapJoinConversion.getBigTablePos();
    if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
        LOG.info("Check conversion to bucket map join failed.");
        return false;
    }
    // Incase the join has extra keys other than bucketed columns, partition keys need to be updated
    // on small table(s).
    ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
    OpTraits opTraits = bigTableRS.getOpTraits();
    List<List<String>> listBucketCols = opTraits.getBucketColNames();
    List<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
    boolean updatePartitionCols = false;
    List<Integer> positions = new ArrayList<>();
    if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
        updatePartitionCols = true;
        // Prepare updated partition columns for small table(s).
        // Get the positions of bucketed columns
        int i = 0;
        Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
        for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
            // It is guaranteed there is only 1 list within listBucketCols.
            for (String colName : listBucketCols.get(0)) {
                if (colExprMap.get(colName).isSame(bigTableExpr)) {
                    positions.add(i++);
                }
            }
        }
    }
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
    if (mapJoinOp == null) {
        LOG.debug("Conversion to bucket map join failed.");
        return false;
    }
    MapJoinDesc joinDesc = mapJoinOp.getConf();
    joinDesc.setBucketMapJoin(true);
    // we can set the traits for this join operator
    opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
    mapJoinOp.setOpTraits(opTraits);
    preserveOperatorInfos(mapJoinOp, joinOp, context);
    setNumberOfBucketsOnChildren(mapJoinOp);
    // Once the conversion is done, we can set the partitioner to bucket cols on the small table
    Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
    bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
    joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
    // Update the partition columns in small table to ensure correct routing of hash tables.
    if (updatePartitionCols) {
        // on the small table side.
        for (Operator<?> op : mapJoinOp.getParentOperators()) {
            if (!(op instanceof ReduceSinkOperator)) {
                continue;
            }
            ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
            List<ExprNodeDesc> newPartitionCols = new ArrayList<>();
            List<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
            for (Integer position : positions) {
                newPartitionCols.add(partitionCols.get(position));
            }
            rsOp.getConf().setPartitionCols(newPartitionCols);
        }
    }
    // Update the memory monitor info for LLAP.
    MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
    if (memoryMonitorInfo.isLlap()) {
        memoryMonitorInfo.setHashTableInflationFactor(1);
        memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
    }
    return true;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 3 with OpTraits

use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.

the class ConvertJoinMapJoin method checkConvertJoinSMBJoin.

/*
   * This method tries to convert a join to an SMB. This is done based on
   * traits. If the sorted by columns are the same as the join columns then, we
   * can convert the join to an SMB. Otherwise retain the bucket map join as it
   * is still more efficient than a regular join.
   */
private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
    ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
    int numBuckets = bigTableRS.getParentOperators().get(0).getOpTraits().getNumBuckets();
    int size = -1;
    boolean shouldCheckExternalTables = context.conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS);
    StringBuilder sb = new StringBuilder();
    for (Operator<?> parentOp : joinOp.getParentOperators()) {
        if (shouldCheckExternalTables && hasExternalTableAncestor(parentOp, sb)) {
            LOG.debug("External table {} found in join - disabling SMB join.", sb.toString());
            return false;
        }
        // each side better have 0 or more RS. if either side is unbalanced, cannot convert.
        // This is a workaround for now. Right fix would be to refactor code in the
        // MapRecordProcessor and ReduceRecordProcessor with respect to the sources.
        Set<ReduceSinkOperator> set = OperatorUtils.findOperatorsUpstream(parentOp.getParentOperators(), ReduceSinkOperator.class);
        if (size < 0) {
            size = set.size();
            continue;
        }
        if (((size > 0) && (set.size() > 0)) || ((size == 0) && (set.size() == 0))) {
            continue;
        } else {
            return false;
        }
    }
    // transformation of the join operation
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        if (!(parentOp instanceof ReduceSinkOperator)) {
            // could be mux/demux operators. Currently not supported
            LOG.debug("Found correlation optimizer operators. Cannot convert to SMB at this time.");
            return false;
        }
        ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp;
        List<ExprNodeDesc> keyCols = rsOp.getConf().getKeyCols();
        // For SMB, the key column(s) in RS should be same as bucket column(s) and sort column(s)`
        List<String> sortCols = rsOp.getOpTraits().getSortCols().get(0);
        List<String> bucketCols = rsOp.getOpTraits().getBucketColNames().get(0);
        if (sortCols.size() != keyCols.size() || bucketCols.size() != keyCols.size()) {
            return false;
        }
        // Check columns.
        for (int i = 0; i < sortCols.size(); i++) {
            ExprNodeDesc sortCol = rsOp.getColumnExprMap().get(sortCols.get(i));
            ExprNodeDesc bucketCol = rsOp.getColumnExprMap().get(bucketCols.get(i));
            if (!(sortCol.isSame(keyCols.get(i)) && bucketCol.isSame(keyCols.get(i)))) {
                return false;
            }
        }
        // check Parent's traits are same as rs
        OpTraits parentTraits = rsOp.getParentOperators().get(0).getOpTraits();
        if (null == parentTraits) {
            // programming error - shouldn't be null
            return false;
        }
        if (!checkColEquality(parentTraits.getSortCols(), rsOp.getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) {
            LOG.info("We cannot convert to SMB because the sort column names do not match.");
            return false;
        }
        if (!checkColEquality(parentTraits.getBucketColNames(), rsOp.getOpTraits().getBucketColNames(), rsOp.getColumnExprMap(), true)) {
            LOG.info("We cannot convert to SMB because bucket column names do not match.");
            return false;
        }
    }
    if (numBuckets < 0) {
        numBuckets = bigTableRS.getConf().getNumReducers();
    }
    tezBucketJoinProcCtx.setNumBuckets(numBuckets);
    // With bucketing using two different versions. Version 1 for exiting
    // tables and version 2 for new tables. All the inputs to the SMB must be
    // from same version. This only applies to tables read directly and not
    // intermediate outputs of joins/groupbys
    int bucketingVersion = -1;
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        // Check if the parent is coming from a table scan, if so, what is the version of it.
        assert parentOp.getParentOperators() != null && parentOp.getParentOperators().size() == 1;
        Operator<?> op = parentOp;
        while (op != null && !(op instanceof TableScanOperator || op instanceof ReduceSinkOperator || op instanceof CommonJoinOperator)) {
            // If op has parents it is guaranteed to be 1.
            List<Operator<?>> parents = op.getParentOperators();
            Preconditions.checkState(parents.size() == 0 || parents.size() == 1);
            op = parents.size() == 1 ? parents.get(0) : null;
        }
        if (op instanceof TableScanOperator) {
            int localVersion = ((TableScanOperator) op).getConf().getTableMetadata().getBucketingVersion();
            if (bucketingVersion == -1) {
                bucketingVersion = localVersion;
            } else if (bucketingVersion != localVersion) {
                // versions dont match, return false.
                LOG.debug("SMB Join can't be performed due to bucketing version mismatch");
                return false;
            }
        }
    }
    LOG.info("We can convert the join to an SMB join.");
    return true;
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 4 with OpTraits

use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.

the class SparkMapJoinOptimizer method process.

@Override
public /**
 * We should ideally not modify the tree we traverse. However,
 * since we need to walk the tree at any time when we modify the operator, we
 * might as well do it here.
 */
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    OptimizeSparkProcContext context = (OptimizeSparkProcContext) procCtx;
    HiveConf conf = context.getConf();
    JoinOperator joinOp = (JoinOperator) nd;
    if (!conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
        return null;
    }
    LOG.info("Check if operator " + joinOp + " can be converted to map join");
    long[] mapJoinInfo = getMapJoinConversionInfo(joinOp, context);
    int mapJoinConversionPos = (int) mapJoinInfo[0];
    if (mapJoinConversionPos < 0) {
        return null;
    }
    int numBuckets = -1;
    List<List<String>> bucketColNames = null;
    LOG.info("Convert to non-bucketed map join");
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos);
    // but changing SerDe won't hurt correctness
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED) && conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
        mapJoinOp.getConf().getKeyTblDesc().getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, BinarySortableSerDe.class.getName());
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) {
        LOG.info("Check if it can be converted to bucketed map join");
        numBuckets = convertJoinBucketMapJoin(joinOp, mapJoinOp, context, mapJoinConversionPos);
        if (numBuckets > 1) {
            LOG.info("Converted to map join with " + numBuckets + " buckets");
            bucketColNames = joinOp.getOpTraits().getBucketColNames();
            mapJoinInfo[2] /= numBuckets;
        } else {
            LOG.info("Can not convert to bucketed map join");
        }
    }
    // we can set the traits for this join operator
    OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, joinOp.getOpTraits().getNumReduceSinks());
    mapJoinOp.setOpTraits(opTraits);
    mapJoinOp.setStatistics(joinOp.getStatistics());
    setNumberOfBucketsOnChildren(mapJoinOp);
    context.getMjOpSizes().put(mapJoinOp, mapJoinInfo[1] + mapJoinInfo[2]);
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) OptimizeSparkProcContext(org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ArrayList(java.util.ArrayList) List(java.util.List)

Example 5 with OpTraits

use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.

the class ConvertJoinMapJoin method convertJoinBucketMapJoin.

private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
    if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
        LOG.info("Check conversion to bucket map join failed.");
        return false;
    }
    // Incase the join has extra keys other than bucketed columns, partition keys need to be updated
    // on small table(s).
    ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
    OpTraits opTraits = bigTableRS.getOpTraits();
    List<List<String>> listBucketCols = opTraits.getBucketColNames();
    ArrayList<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
    boolean updatePartitionCols = false;
    List<Integer> positions = new ArrayList<>();
    if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
        updatePartitionCols = true;
        // Prepare updated partition columns for small table(s).
        // Get the positions of bucketed columns
        int i = 0;
        Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
        for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
            // It is guaranteed there is only 1 list within listBucketCols.
            for (String colName : listBucketCols.get(0)) {
                if (colExprMap.get(colName).isSame(bigTableExpr)) {
                    positions.add(i++);
                }
            }
        }
    }
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition, true);
    if (mapJoinOp == null) {
        LOG.debug("Conversion to bucket map join failed.");
        return false;
    }
    MapJoinDesc joinDesc = mapJoinOp.getConf();
    joinDesc.setBucketMapJoin(true);
    // we can set the traits for this join operator
    opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
    mapJoinOp.setOpTraits(opTraits);
    mapJoinOp.setStatistics(joinOp.getStatistics());
    setNumberOfBucketsOnChildren(mapJoinOp);
    // Once the conversion is done, we can set the partitioner to bucket cols on the small table
    Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
    bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
    joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
    // Update the partition columns in small table to ensure correct routing of hash tables.
    if (updatePartitionCols) {
        // on the small table side.
        for (Operator<?> op : mapJoinOp.getParentOperators()) {
            if (!(op instanceof ReduceSinkOperator))
                continue;
            ;
            ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
            ArrayList<ExprNodeDesc> newPartitionCols = new ArrayList<>();
            ArrayList<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
            for (Integer position : positions) {
                newPartitionCols.add(partitionCols.get(position));
            }
            rsOp.getConf().setPartitionCols(newPartitionCols);
        }
    }
    // Update the memory monitor info for LLAP.
    MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
    if (memoryMonitorInfo.isLlap()) {
        memoryMonitorInfo.setHashTableInflationFactor(1);
        memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
    }
    return true;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)9 OpTraits (org.apache.hadoop.hive.ql.plan.OpTraits)9 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)7 ArrayList (java.util.ArrayList)5 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)5 List (java.util.List)4 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 CommonJoinOperator (org.apache.hadoop.hive.ql.exec.CommonJoinOperator)3 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)3 MemoryMonitorInfo (org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo)3 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)3 HashMap (java.util.HashMap)2 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)2 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)2 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)2 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)2 MuxOperator (org.apache.hadoop.hive.ql.exec.MuxOperator)2 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)2