use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.
the class ConvertJoinMapJoin method getMemoryMonitorInfo.
@VisibleForTesting
public MemoryMonitorInfo getMemoryMonitorInfo(final long maxSize, final HiveConf conf, LlapClusterStateForCompile llapInfo) {
final double overSubscriptionFactor = conf.getFloatVar(ConfVars.LLAP_MAPJOIN_MEMORY_OVERSUBSCRIBE_FACTOR);
final int maxSlotsPerQuery = conf.getIntVar(ConfVars.LLAP_MEMORY_OVERSUBSCRIPTION_MAX_EXECUTORS_PER_QUERY);
final long memoryCheckInterval = conf.getLongVar(ConfVars.LLAP_MAPJOIN_MEMORY_MONITOR_CHECK_INTERVAL);
final float inflationFactor = conf.getFloatVar(ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR);
final MemoryMonitorInfo memoryMonitorInfo;
if (llapInfo != null) {
final int executorsPerNode;
if (!llapInfo.hasClusterInfo()) {
LOG.warn("LLAP cluster information not available. Falling back to getting #executors from hiveconf..");
executorsPerNode = conf.getIntVar(ConfVars.LLAP_DAEMON_NUM_EXECUTORS);
} else {
final int numExecutorsPerNodeFromCluster = llapInfo.getNumExecutorsPerNode();
if (numExecutorsPerNodeFromCluster == -1) {
LOG.warn("Cannot determine executor count from LLAP cluster information. Falling back to getting #executors" + " from hiveconf..");
executorsPerNode = conf.getIntVar(ConfVars.LLAP_DAEMON_NUM_EXECUTORS);
} else {
executorsPerNode = numExecutorsPerNodeFromCluster;
}
}
// bounded by max executors
final int slotsPerQuery = Math.min(maxSlotsPerQuery, executorsPerNode);
final long llapMaxSize = (long) (maxSize + (maxSize * overSubscriptionFactor * slotsPerQuery));
// prevents under subscription
final long adjustedMaxSize = Math.max(maxSize, llapMaxSize);
memoryMonitorInfo = new MemoryMonitorInfo(true, executorsPerNode, maxSlotsPerQuery, overSubscriptionFactor, maxSize, adjustedMaxSize, memoryCheckInterval, inflationFactor);
} else {
// for non-LLAP mode most of these are not relevant. Only noConditionalTaskSize is used by shared scan optimizer.
memoryMonitorInfo = new MemoryMonitorInfo(false, 1, maxSlotsPerQuery, overSubscriptionFactor, maxSize, maxSize, memoryCheckInterval, inflationFactor);
}
if (LOG.isInfoEnabled()) {
LOG.info("Memory monitor info set to : {}", memoryMonitorInfo);
}
return memoryMonitorInfo;
}
use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.
the class VectorMapJoinFastHashTableLoader method load.
@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
Map<Integer, String> parentToInput = desc.getParentToInput();
Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
boolean doMemCheck = false;
long effectiveThreshold = 0;
if (memoryMonitorInfo != null) {
effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
// Flip the flag at runtime in case if we are running outside of LLAP
if (!LlapDaemonInfo.INSTANCE.isLlap()) {
memoryMonitorInfo.setLlap(false);
}
if (memoryMonitorInfo.doMemoryMonitoring()) {
doMemCheck = true;
LOG.info("Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
}
}
if (!doMemCheck) {
LOG.info("Not doing hash table memory monitoring. {}", memoryMonitorInfo);
}
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
continue;
}
String inputName = parentToInput.get(pos);
LogicalInput input = tezContext.getInput(inputName);
try {
input.start();
tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
} catch (Exception e) {
throw new HiveException(e);
}
try {
KeyValueReader kvReader = (KeyValueReader) input.getReader();
Long keyCountObj = parentKeyCounts.get(pos);
long estKeyCount = (keyCountObj == null) ? -1 : keyCountObj;
long inputRecords = -1;
try {
// TODO : Need to use class instead of string.
// https://issues.apache.org/jira/browse/HIVE-23981
inputRecords = ((AbstractLogicalInput) input).getContext().getCounters().findCounter("org.apache.tez.common.counters.TaskCounter", "APPROXIMATE_INPUT_RECORDS").getValue();
} catch (Exception e) {
LOG.debug("Failed to get value for counter APPROXIMATE_INPUT_RECORDS", e);
}
long keyCount = Math.max(estKeyCount, inputRecords);
initHTLoadingService(keyCount);
VectorMapJoinFastTableContainer tableContainer = new VectorMapJoinFastTableContainer(desc, hconf, keyCount, numLoadThreads);
LOG.info("Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {} " + "estKeyCount : {} keyCount : {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos, estKeyCount, keyCount);
// No SerDes here.
tableContainer.setSerde(null, null);
// Submit parallel loading Threads
submitQueueDrainThreads(tableContainer);
long receivedEntries = 0;
long startTime = System.currentTimeMillis();
while (kvReader.next()) {
BytesWritable currentKey = (BytesWritable) kvReader.getCurrentKey();
BytesWritable currentValue = (BytesWritable) kvReader.getCurrentValue();
long hashCode = tableContainer.getHashCode(currentKey);
// numLoadThreads divisor must be a power of 2!
int partitionId = (int) ((numLoadThreads - 1) & hashCode);
// call getBytes as copy is called later
HashTableElement h = new HashTableElement(hashCode, currentValue.copyBytes(), currentKey.copyBytes());
if (elementBatches[partitionId].addElement(h)) {
loadBatchQueues[partitionId].add(elementBatches[partitionId]);
elementBatches[partitionId] = batchPool.take();
}
receivedEntries++;
if (doMemCheck && (receivedEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
final long estMemUsage = tableContainer.getEstimatedMemorySize();
if (estMemUsage > effectiveThreshold) {
String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + receivedEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
LOG.error(msg);
throw new MapJoinMemoryExhaustionError(msg);
} else {
LOG.info("Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, receivedEntries, estMemUsage, effectiveThreshold);
}
}
}
LOG.info("Finished loading the queue for input: {} waiting {} minutes for TPool shutdown", inputName, 2);
addQueueDoneSentinel();
loadExecService.shutdown();
loadExecService.awaitTermination(2, TimeUnit.MINUTES);
batchPool.clear();
LOG.info("Total received entries: {} Threads {} HT entries: {}", receivedEntries, numLoadThreads, totalEntries.get());
long delta = System.currentTimeMillis() - startTime;
htLoadCounter.increment(delta);
tableContainer.seal();
mapJoinTables[pos] = tableContainer;
if (doMemCheck) {
LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} " + "estimatedMemoryUsage: {} Load Time : {} ", inputName, cacheKey, receivedEntries, tableContainer.getEstimatedMemorySize(), delta);
} else {
LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} Load Time : {} ", inputName, cacheKey, receivedEntries, delta);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new HiveException(e);
} catch (IOException e) {
throw new HiveException(e);
} catch (SerDeException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
}
use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.
the class ConvertJoinMapJoin method process.
@Override
public /*
* (non-Javadoc) we should ideally not modify the tree we traverse. However,
* since we need to walk the tree at any time when we modify the operator, we
* might as well do it here.
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;
hashTableLoadFactor = context.conf.getFloatVar(ConfVars.HIVEHASHTABLELOADFACTOR);
fastHashTableAvailable = context.conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
JoinOperator joinOp = (JoinOperator) nd;
// adjust noconditional task size threshold for LLAP
LlapClusterStateForCompile llapInfo = null;
if ("llap".equalsIgnoreCase(context.conf.getVar(ConfVars.HIVE_EXECUTION_MODE))) {
llapInfo = LlapClusterStateForCompile.getClusterInfo(context.conf);
llapInfo.initClusterInfo();
}
MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(context.conf, llapInfo);
joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
maxJoinMemory = memoryMonitorInfo.getAdjustedNoConditionalTaskSize();
LOG.info("maxJoinMemory: {}", maxJoinMemory);
hashMapDataStructure = HashMapDataStructureType.of(joinOp.getConf());
TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin();
if (!hiveConvertJoin) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
return retval;
} else {
fallbackToReduceSideJoin(joinOp, context);
return null;
}
}
// if we have traits, and table info is present in the traits, we know the
// exact number of buckets. Else choose the largest number of estimated
// reducers from the parent operators.
int numBuckets = -1;
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
numBuckets = estimateNumBuckets(joinOp, true);
} else {
numBuckets = 1;
}
LOG.info("Estimated number of buckets " + numBuckets);
MapJoinConversion mapJoinConversion = getMapJoinConversion(joinOp, context, numBuckets, false, maxJoinMemory, true);
if (mapJoinConversion == null) {
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
return retval;
} else {
// only case is full outer join with SMB enabled which is not possible. Convert to regular
// join.
fallbackToReduceSideJoin(joinOp, context);
return null;
}
}
if (numBuckets > 1) {
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
// Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ
if (llapInfo != null) {
if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversion, numBuckets)) {
return null;
}
} else if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx)) {
return null;
}
}
}
// check if we can convert to map join no bucket scaling.
LOG.info("Convert to non-bucketed map join");
if (numBuckets != 1) {
mapJoinConversion = getMapJoinConversion(joinOp, context, 1, false, maxJoinMemory, true);
}
if (mapJoinConversion == null) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
fallbackToReduceSideJoin(joinOp, context);
return null;
}
// Currently, this is a MJ path and we don's support FULL OUTER MapJoin yet.
if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
fallbackToReduceSideJoin(joinOp, context);
return null;
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
if (mapJoinOp == null) {
fallbackToReduceSideJoin(joinOp, context);
return null;
}
// map join operator by default has no bucket cols and num of reduce sinks
// reduced by 1
mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
preserveOperatorInfos(mapJoinOp, joinOp, context);
// propagate this change till the next RS
for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.
the class ConvertJoinMapJoin method convertJoinBucketMapJoin.
private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, MapJoinConversion mapJoinConversion, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
return false;
}
final int bigTablePosition = mapJoinConversion.getBigTablePos();
if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
LOG.info("Check conversion to bucket map join failed.");
return false;
}
// Incase the join has extra keys other than bucketed columns, partition keys need to be updated
// on small table(s).
ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
OpTraits opTraits = bigTableRS.getOpTraits();
List<List<String>> listBucketCols = opTraits.getBucketColNames();
List<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
boolean updatePartitionCols = false;
List<Integer> positions = new ArrayList<>();
if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
updatePartitionCols = true;
// Prepare updated partition columns for small table(s).
// Get the positions of bucketed columns
int i = 0;
Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
// It is guaranteed there is only 1 list within listBucketCols.
for (String colName : listBucketCols.get(0)) {
if (colExprMap.get(colName).isSame(bigTableExpr)) {
positions.add(i++);
}
}
}
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
if (mapJoinOp == null) {
LOG.debug("Conversion to bucket map join failed.");
return false;
}
MapJoinDesc joinDesc = mapJoinOp.getConf();
joinDesc.setBucketMapJoin(true);
// we can set the traits for this join operator
opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
mapJoinOp.setOpTraits(opTraits);
preserveOperatorInfos(mapJoinOp, joinOp, context);
setNumberOfBucketsOnChildren(mapJoinOp);
// Once the conversion is done, we can set the partitioner to bucket cols on the small table
Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
// Update the partition columns in small table to ensure correct routing of hash tables.
if (updatePartitionCols) {
// on the small table side.
for (Operator<?> op : mapJoinOp.getParentOperators()) {
if (!(op instanceof ReduceSinkOperator)) {
continue;
}
ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
List<ExprNodeDesc> newPartitionCols = new ArrayList<>();
List<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
for (Integer position : positions) {
newPartitionCols.add(partitionCols.get(position));
}
rsOp.getConf().setPartitionCols(newPartitionCols);
}
}
// Update the memory monitor info for LLAP.
MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
if (memoryMonitorInfo.isLlap()) {
memoryMonitorInfo.setHashTableInflationFactor(1);
memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
}
return true;
}
use of org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo in project hive by apache.
the class ConvertJoinMapJoin method convertJoinBucketMapJoin.
private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
LOG.info("Check conversion to bucket map join failed.");
return false;
}
// Incase the join has extra keys other than bucketed columns, partition keys need to be updated
// on small table(s).
ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
OpTraits opTraits = bigTableRS.getOpTraits();
List<List<String>> listBucketCols = opTraits.getBucketColNames();
ArrayList<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
boolean updatePartitionCols = false;
List<Integer> positions = new ArrayList<>();
if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
updatePartitionCols = true;
// Prepare updated partition columns for small table(s).
// Get the positions of bucketed columns
int i = 0;
Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
// It is guaranteed there is only 1 list within listBucketCols.
for (String colName : listBucketCols.get(0)) {
if (colExprMap.get(colName).isSame(bigTableExpr)) {
positions.add(i++);
}
}
}
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition, true);
if (mapJoinOp == null) {
LOG.debug("Conversion to bucket map join failed.");
return false;
}
MapJoinDesc joinDesc = mapJoinOp.getConf();
joinDesc.setBucketMapJoin(true);
// we can set the traits for this join operator
opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
mapJoinOp.setOpTraits(opTraits);
mapJoinOp.setStatistics(joinOp.getStatistics());
setNumberOfBucketsOnChildren(mapJoinOp);
// Once the conversion is done, we can set the partitioner to bucket cols on the small table
Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
// Update the partition columns in small table to ensure correct routing of hash tables.
if (updatePartitionCols) {
// on the small table side.
for (Operator<?> op : mapJoinOp.getParentOperators()) {
if (!(op instanceof ReduceSinkOperator))
continue;
;
ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
ArrayList<ExprNodeDesc> newPartitionCols = new ArrayList<>();
ArrayList<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
for (Integer position : positions) {
newPartitionCols.add(partitionCols.get(position));
}
rsOp.getConf().setPartitionCols(newPartitionCols);
}
}
// Update the memory monitor info for LLAP.
MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
if (memoryMonitorInfo.isLlap()) {
memoryMonitorInfo.setHashTableInflationFactor(1);
memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
}
return true;
}
Aggregations