Search in sources :

Example 1 with MemoryMonitorInfo

use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.

the class ConvertJoinMapJoin method getMemoryMonitorInfo.

@VisibleForTesting
public MemoryMonitorInfo getMemoryMonitorInfo(final long maxSize, final HiveConf conf, LlapClusterStateForCompile llapInfo) {
    final double overSubscriptionFactor = conf.getFloatVar(ConfVars.LLAP_MAPJOIN_MEMORY_OVERSUBSCRIBE_FACTOR);
    final int maxSlotsPerQuery = conf.getIntVar(ConfVars.LLAP_MEMORY_OVERSUBSCRIPTION_MAX_EXECUTORS_PER_QUERY);
    final long memoryCheckInterval = conf.getLongVar(ConfVars.LLAP_MAPJOIN_MEMORY_MONITOR_CHECK_INTERVAL);
    final float inflationFactor = conf.getFloatVar(ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR);
    final MemoryMonitorInfo memoryMonitorInfo;
    if (llapInfo != null) {
        final int executorsPerNode;
        if (!llapInfo.hasClusterInfo()) {
            LOG.warn("LLAP cluster information not available. Falling back to getting #executors from hiveconf..");
            executorsPerNode = conf.getIntVar(ConfVars.LLAP_DAEMON_NUM_EXECUTORS);
        } else {
            final int numExecutorsPerNodeFromCluster = llapInfo.getNumExecutorsPerNode();
            if (numExecutorsPerNodeFromCluster == -1) {
                LOG.warn("Cannot determine executor count from LLAP cluster information. Falling back to getting #executors" + " from hiveconf..");
                executorsPerNode = conf.getIntVar(ConfVars.LLAP_DAEMON_NUM_EXECUTORS);
            } else {
                executorsPerNode = numExecutorsPerNodeFromCluster;
            }
        }
        // bounded by max executors
        final int slotsPerQuery = Math.min(maxSlotsPerQuery, executorsPerNode);
        final long llapMaxSize = (long) (maxSize + (maxSize * overSubscriptionFactor * slotsPerQuery));
        // prevents under subscription
        final long adjustedMaxSize = Math.max(maxSize, llapMaxSize);
        memoryMonitorInfo = new MemoryMonitorInfo(true, executorsPerNode, maxSlotsPerQuery, overSubscriptionFactor, maxSize, adjustedMaxSize, memoryCheckInterval, inflationFactor);
    } else {
        // for non-LLAP mode most of these are not relevant. Only noConditionalTaskSize is used by shared scan optimizer.
        memoryMonitorInfo = new MemoryMonitorInfo(false, 1, maxSlotsPerQuery, overSubscriptionFactor, maxSize, maxSize, memoryCheckInterval, inflationFactor);
    }
    if (LOG.isInfoEnabled()) {
        LOG.info("Memory monitor info set to : {}", memoryMonitorInfo);
    }
    return memoryMonitorInfo;
}
Also used : MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with MemoryMonitorInfo

use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.

the class VectorMapJoinFastHashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
    boolean doMemCheck = false;
    long effectiveThreshold = 0;
    if (memoryMonitorInfo != null) {
        effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
        // Flip the flag at runtime in case if we are running outside of LLAP
        if (!LlapDaemonInfo.INSTANCE.isLlap()) {
            memoryMonitorInfo.setLlap(false);
        }
        if (memoryMonitorInfo.doMemoryMonitoring()) {
            doMemCheck = true;
            LOG.info("Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
        }
    }
    if (!doMemCheck) {
        LOG.info("Not doing hash table memory monitoring. {}", memoryMonitorInfo);
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            Long keyCountObj = parentKeyCounts.get(pos);
            long estKeyCount = (keyCountObj == null) ? -1 : keyCountObj;
            long inputRecords = -1;
            try {
                // TODO : Need to use class instead of string.
                // https://issues.apache.org/jira/browse/HIVE-23981
                inputRecords = ((AbstractLogicalInput) input).getContext().getCounters().findCounter("org.apache.tez.common.counters.TaskCounter", "APPROXIMATE_INPUT_RECORDS").getValue();
            } catch (Exception e) {
                LOG.debug("Failed to get value for counter APPROXIMATE_INPUT_RECORDS", e);
            }
            long keyCount = Math.max(estKeyCount, inputRecords);
            initHTLoadingService(keyCount);
            VectorMapJoinFastTableContainer tableContainer = new VectorMapJoinFastTableContainer(desc, hconf, keyCount, numLoadThreads);
            LOG.info("Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {} " + "estKeyCount : {} keyCount : {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos, estKeyCount, keyCount);
            // No SerDes here.
            tableContainer.setSerde(null, null);
            // Submit parallel loading Threads
            submitQueueDrainThreads(tableContainer);
            long receivedEntries = 0;
            long startTime = System.currentTimeMillis();
            while (kvReader.next()) {
                BytesWritable currentKey = (BytesWritable) kvReader.getCurrentKey();
                BytesWritable currentValue = (BytesWritable) kvReader.getCurrentValue();
                long hashCode = tableContainer.getHashCode(currentKey);
                // numLoadThreads divisor must be a power of 2!
                int partitionId = (int) ((numLoadThreads - 1) & hashCode);
                // call getBytes as copy is called later
                HashTableElement h = new HashTableElement(hashCode, currentValue.copyBytes(), currentKey.copyBytes());
                if (elementBatches[partitionId].addElement(h)) {
                    loadBatchQueues[partitionId].add(elementBatches[partitionId]);
                    elementBatches[partitionId] = batchPool.take();
                }
                receivedEntries++;
                if (doMemCheck && (receivedEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
                    final long estMemUsage = tableContainer.getEstimatedMemorySize();
                    if (estMemUsage > effectiveThreshold) {
                        String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + receivedEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
                        LOG.error(msg);
                        throw new MapJoinMemoryExhaustionError(msg);
                    } else {
                        LOG.info("Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, receivedEntries, estMemUsage, effectiveThreshold);
                    }
                }
            }
            LOG.info("Finished loading the queue for input: {} waiting {} minutes for TPool shutdown", inputName, 2);
            addQueueDoneSentinel();
            loadExecService.shutdown();
            loadExecService.awaitTermination(2, TimeUnit.MINUTES);
            batchPool.clear();
            LOG.info("Total received entries: {} Threads {} HT entries: {}", receivedEntries, numLoadThreads, totalEntries.get());
            long delta = System.currentTimeMillis() - startTime;
            htLoadCounter.increment(delta);
            tableContainer.seal();
            mapJoinTables[pos] = tableContainer;
            if (doMemCheck) {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} " + "estimatedMemoryUsage: {} Load Time : {} ", inputName, cacheKey, receivedEntries, tableContainer.getEstimatedMemorySize(), delta);
            } else {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} Load Time : {} ", inputName, cacheKey, receivedEntries, delta);
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new HiveException(e);
        } catch (IOException e) {
            throw new HiveException(e);
        } catch (SerDeException e) {
            throw new HiveException(e);
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AbstractLogicalInput(org.apache.tez.runtime.api.AbstractLogicalInput) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinMemoryExhaustionError(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) LogicalInput(org.apache.tez.runtime.api.LogicalInput) AbstractLogicalInput(org.apache.tez.runtime.api.AbstractLogicalInput) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 3 with MemoryMonitorInfo

use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.

the class ConvertJoinMapJoin method process.

@Override
public /*
   * (non-Javadoc) we should ideally not modify the tree we traverse. However,
   * since we need to walk the tree at any time when we modify the operator, we
   * might as well do it here.
   */
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;
    hashTableLoadFactor = context.conf.getFloatVar(ConfVars.HIVEHASHTABLELOADFACTOR);
    fastHashTableAvailable = context.conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
    JoinOperator joinOp = (JoinOperator) nd;
    // adjust noconditional task size threshold for LLAP
    LlapClusterStateForCompile llapInfo = null;
    if ("llap".equalsIgnoreCase(context.conf.getVar(ConfVars.HIVE_EXECUTION_MODE))) {
        llapInfo = LlapClusterStateForCompile.getClusterInfo(context.conf);
        llapInfo.initClusterInfo();
    }
    MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(context.conf, llapInfo);
    joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
    maxJoinMemory = memoryMonitorInfo.getAdjustedNoConditionalTaskSize();
    LOG.info("maxJoinMemory: {}", maxJoinMemory);
    hashMapDataStructure = HashMapDataStructureType.of(joinOp.getConf());
    TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
    boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin();
    if (!hiveConvertJoin) {
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
        if (retval == null) {
            return retval;
        } else {
            fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
    }
    // if we have traits, and table info is present in the traits, we know the
    // exact number of buckets. Else choose the largest number of estimated
    // reducers from the parent operators.
    int numBuckets = -1;
    if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
        numBuckets = estimateNumBuckets(joinOp, true);
    } else {
        numBuckets = 1;
    }
    LOG.info("Estimated number of buckets " + numBuckets);
    MapJoinConversion mapJoinConversion = getMapJoinConversion(joinOp, context, numBuckets, false, maxJoinMemory, true);
    if (mapJoinConversion == null) {
        Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
        if (retval == null) {
            return retval;
        } else {
            // only case is full outer join with SMB enabled which is not possible. Convert to regular
            // join.
            fallbackToReduceSideJoin(joinOp, context);
            return null;
        }
    }
    if (numBuckets > 1) {
        if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
            // Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ
            if (llapInfo != null) {
                if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversion, numBuckets)) {
                    return null;
                }
            } else if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx)) {
                return null;
            }
        }
    }
    // check if we can convert to map join no bucket scaling.
    LOG.info("Convert to non-bucketed map join");
    if (numBuckets != 1) {
        mapJoinConversion = getMapJoinConversion(joinOp, context, 1, false, maxJoinMemory, true);
    }
    if (mapJoinConversion == null) {
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        fallbackToReduceSideJoin(joinOp, context);
        return null;
    }
    // Currently, this is a MJ path and we don's support FULL OUTER MapJoin yet.
    if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
        fallbackToReduceSideJoin(joinOp, context);
        return null;
    }
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
    if (mapJoinOp == null) {
        fallbackToReduceSideJoin(joinOp, context);
        return null;
    }
    // map join operator by default has no bucket cols and num of reduce sinks
    // reduced by 1
    mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
    preserveOperatorInfos(mapJoinOp, joinOp, context);
    // propagate this change till the next RS
    for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
        setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
    }
    return null;
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) LlapClusterStateForCompile(org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext)

Example 4 with MemoryMonitorInfo

use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.

the class ConvertJoinMapJoin method convertJoinBucketMapJoin.

private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, MapJoinConversion mapJoinConversion, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
    if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
        return false;
    }
    final int bigTablePosition = mapJoinConversion.getBigTablePos();
    if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
        LOG.info("Check conversion to bucket map join failed.");
        return false;
    }
    // Incase the join has extra keys other than bucketed columns, partition keys need to be updated
    // on small table(s).
    ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
    OpTraits opTraits = bigTableRS.getOpTraits();
    List<List<String>> listBucketCols = opTraits.getBucketColNames();
    List<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
    boolean updatePartitionCols = false;
    List<Integer> positions = new ArrayList<>();
    if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
        updatePartitionCols = true;
        // Prepare updated partition columns for small table(s).
        // Get the positions of bucketed columns
        int i = 0;
        Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
        for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
            // It is guaranteed there is only 1 list within listBucketCols.
            for (String colName : listBucketCols.get(0)) {
                if (colExprMap.get(colName).isSame(bigTableExpr)) {
                    positions.add(i++);
                }
            }
        }
    }
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
    if (mapJoinOp == null) {
        LOG.debug("Conversion to bucket map join failed.");
        return false;
    }
    MapJoinDesc joinDesc = mapJoinOp.getConf();
    joinDesc.setBucketMapJoin(true);
    // we can set the traits for this join operator
    opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
    mapJoinOp.setOpTraits(opTraits);
    preserveOperatorInfos(mapJoinOp, joinOp, context);
    setNumberOfBucketsOnChildren(mapJoinOp);
    // Once the conversion is done, we can set the partitioner to bucket cols on the small table
    Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
    bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
    joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
    // Update the partition columns in small table to ensure correct routing of hash tables.
    if (updatePartitionCols) {
        // on the small table side.
        for (Operator<?> op : mapJoinOp.getParentOperators()) {
            if (!(op instanceof ReduceSinkOperator)) {
                continue;
            }
            ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
            List<ExprNodeDesc> newPartitionCols = new ArrayList<>();
            List<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
            for (Integer position : positions) {
                newPartitionCols.add(partitionCols.get(position));
            }
            rsOp.getConf().setPartitionCols(newPartitionCols);
        }
    }
    // Update the memory monitor info for LLAP.
    MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
    if (memoryMonitorInfo.isLlap()) {
        memoryMonitorInfo.setHashTableInflationFactor(1);
        memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
    }
    return true;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 5 with MemoryMonitorInfo

use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.

the class ConvertJoinMapJoin method convertJoinBucketMapJoin.

private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
    if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
        LOG.info("Check conversion to bucket map join failed.");
        return false;
    }
    // Incase the join has extra keys other than bucketed columns, partition keys need to be updated
    // on small table(s).
    ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
    OpTraits opTraits = bigTableRS.getOpTraits();
    List<List<String>> listBucketCols = opTraits.getBucketColNames();
    ArrayList<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
    boolean updatePartitionCols = false;
    List<Integer> positions = new ArrayList<>();
    if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
        updatePartitionCols = true;
        // Prepare updated partition columns for small table(s).
        // Get the positions of bucketed columns
        int i = 0;
        Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
        for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
            // It is guaranteed there is only 1 list within listBucketCols.
            for (String colName : listBucketCols.get(0)) {
                if (colExprMap.get(colName).isSame(bigTableExpr)) {
                    positions.add(i++);
                }
            }
        }
    }
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition, true);
    if (mapJoinOp == null) {
        LOG.debug("Conversion to bucket map join failed.");
        return false;
    }
    MapJoinDesc joinDesc = mapJoinOp.getConf();
    joinDesc.setBucketMapJoin(true);
    // we can set the traits for this join operator
    opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
    mapJoinOp.setOpTraits(opTraits);
    mapJoinOp.setStatistics(joinOp.getStatistics());
    setNumberOfBucketsOnChildren(mapJoinOp);
    // Once the conversion is done, we can set the partitioner to bucket cols on the small table
    Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
    bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
    joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
    // Update the partition columns in small table to ensure correct routing of hash tables.
    if (updatePartitionCols) {
        // on the small table side.
        for (Operator<?> op : mapJoinOp.getParentOperators()) {
            if (!(op instanceof ReduceSinkOperator))
                continue;
            ;
            ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
            ArrayList<ExprNodeDesc> newPartitionCols = new ArrayList<>();
            ArrayList<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
            for (Integer position : positions) {
                newPartitionCols.add(partitionCols.get(position));
            }
            rsOp.getConf().setPartitionCols(newPartitionCols);
        }
    }
    // Update the memory monitor info for LLAP.
    MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
    if (memoryMonitorInfo.isLlap()) {
        memoryMonitorInfo.setHashTableInflationFactor(1);
        memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
    }
    return true;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

MemoryMonitorInfo (org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo)8 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 OpTraits (org.apache.hadoop.hive.ql.plan.OpTraits)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 IOException (java.io.IOException)2 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)2 MapJoinMemoryExhaustionError (org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)2 AbstractLogicalInput (org.apache.tez.runtime.api.AbstractLogicalInput)2 LogicalInput (org.apache.tez.runtime.api.LogicalInput)2 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)2 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Path (org.apache.hadoop.fs.Path)1 CommonJoinOperator (org.apache.hadoop.hive.ql.exec.CommonJoinOperator)1