use of org.apache.tez.runtime.api.AbstractLogicalInput in project hive by apache.
the class VectorMapJoinFastHashTableLoader method load.
@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
Map<Integer, String> parentToInput = desc.getParentToInput();
Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
boolean doMemCheck = false;
long effectiveThreshold = 0;
if (memoryMonitorInfo != null) {
effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
// Flip the flag at runtime in case if we are running outside of LLAP
if (!LlapDaemonInfo.INSTANCE.isLlap()) {
memoryMonitorInfo.setLlap(false);
}
if (memoryMonitorInfo.doMemoryMonitoring()) {
doMemCheck = true;
LOG.info("Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
}
}
if (!doMemCheck) {
LOG.info("Not doing hash table memory monitoring. {}", memoryMonitorInfo);
}
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
continue;
}
String inputName = parentToInput.get(pos);
LogicalInput input = tezContext.getInput(inputName);
try {
input.start();
tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
} catch (Exception e) {
throw new HiveException(e);
}
try {
KeyValueReader kvReader = (KeyValueReader) input.getReader();
Long keyCountObj = parentKeyCounts.get(pos);
long estKeyCount = (keyCountObj == null) ? -1 : keyCountObj;
long inputRecords = -1;
try {
// TODO : Need to use class instead of string.
// https://issues.apache.org/jira/browse/HIVE-23981
inputRecords = ((AbstractLogicalInput) input).getContext().getCounters().findCounter("org.apache.tez.common.counters.TaskCounter", "APPROXIMATE_INPUT_RECORDS").getValue();
} catch (Exception e) {
LOG.debug("Failed to get value for counter APPROXIMATE_INPUT_RECORDS", e);
}
long keyCount = Math.max(estKeyCount, inputRecords);
initHTLoadingService(keyCount);
VectorMapJoinFastTableContainer tableContainer = new VectorMapJoinFastTableContainer(desc, hconf, keyCount, numLoadThreads);
LOG.info("Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {} " + "estKeyCount : {} keyCount : {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos, estKeyCount, keyCount);
// No SerDes here.
tableContainer.setSerde(null, null);
// Submit parallel loading Threads
submitQueueDrainThreads(tableContainer);
long receivedEntries = 0;
long startTime = System.currentTimeMillis();
while (kvReader.next()) {
BytesWritable currentKey = (BytesWritable) kvReader.getCurrentKey();
BytesWritable currentValue = (BytesWritable) kvReader.getCurrentValue();
long hashCode = tableContainer.getHashCode(currentKey);
// numLoadThreads divisor must be a power of 2!
int partitionId = (int) ((numLoadThreads - 1) & hashCode);
// call getBytes as copy is called later
HashTableElement h = new HashTableElement(hashCode, currentValue.copyBytes(), currentKey.copyBytes());
if (elementBatches[partitionId].addElement(h)) {
loadBatchQueues[partitionId].add(elementBatches[partitionId]);
elementBatches[partitionId] = batchPool.take();
}
receivedEntries++;
if (doMemCheck && (receivedEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
final long estMemUsage = tableContainer.getEstimatedMemorySize();
if (estMemUsage > effectiveThreshold) {
String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + receivedEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
LOG.error(msg);
throw new MapJoinMemoryExhaustionError(msg);
} else {
LOG.info("Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, receivedEntries, estMemUsage, effectiveThreshold);
}
}
}
LOG.info("Finished loading the queue for input: {} waiting {} minutes for TPool shutdown", inputName, 2);
addQueueDoneSentinel();
loadExecService.shutdown();
loadExecService.awaitTermination(2, TimeUnit.MINUTES);
batchPool.clear();
LOG.info("Total received entries: {} Threads {} HT entries: {}", receivedEntries, numLoadThreads, totalEntries.get());
long delta = System.currentTimeMillis() - startTime;
htLoadCounter.increment(delta);
tableContainer.seal();
mapJoinTables[pos] = tableContainer;
if (doMemCheck) {
LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} " + "estimatedMemoryUsage: {} Load Time : {} ", inputName, cacheKey, receivedEntries, tableContainer.getEstimatedMemorySize(), delta);
} else {
LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} Load Time : {} ", inputName, cacheKey, receivedEntries, delta);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new HiveException(e);
} catch (IOException e) {
throw new HiveException(e);
} catch (SerDeException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
}
use of org.apache.tez.runtime.api.AbstractLogicalInput in project hive by apache.
the class HashTableLoader method load.
@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
Map<Integer, String> parentToInput = desc.getParentToInput();
Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
boolean isCrossProduct = false;
List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
if (joinExprs.size() == 0) {
isCrossProduct = true;
}
boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
boolean isFirstKey = true;
// Get the total available memory from memory manager
long totalMapJoinMemory = desc.getMemoryNeeded();
LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
if (totalMapJoinMemory <= 0) {
totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
}
long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
if (totalMapJoinMemory > processMaxMemory) {
float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
// Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
}
// Only applicable to n-way Hybrid Grace Hash Join
HybridHashTableConf nwayConf = null;
long totalSize = 0;
// position of the biggest small table
int biggest = 0;
Map<Integer, Long> tableMemorySizes = null;
if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
// Create a Conf for n-way HybridHashTableContainers
nwayConf = new HybridHashTableConf();
LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
// Find the biggest small table; also calculate total data size of all small tables
// the size of the biggest small table
long maxSize = Long.MIN_VALUE;
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
continue;
}
long smallTableSize = desc.getParentDataSizes().get(pos);
totalSize += smallTableSize;
if (maxSize < smallTableSize) {
maxSize = smallTableSize;
biggest = pos;
}
}
tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
// Using biggest small table, calculate number of partitions to create for each small table
long memory = tableMemorySizes.get(biggest);
int numPartitions = 0;
try {
numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
} catch (IOException e) {
throw new HiveException(e);
}
nwayConf.setNumberOfPartitions(numPartitions);
}
MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
boolean doMemCheck = false;
long effectiveThreshold = 0;
if (memoryMonitorInfo != null) {
effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
// Flip the flag at runtime in case if we are running outside of LLAP
if (!LlapDaemonInfo.INSTANCE.isLlap()) {
memoryMonitorInfo.setLlap(false);
}
if (memoryMonitorInfo.doMemoryMonitoring()) {
doMemCheck = true;
LOG.info("Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
}
}
if (!doMemCheck) {
LOG.info("Not doing hash table memory monitoring. {}", memoryMonitorInfo);
}
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
continue;
}
long numEntries = 0;
String inputName = parentToInput.get(pos);
LogicalInput input = tezContext.getInput(inputName);
try {
input.start();
tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
} catch (Exception e) {
throw new HiveException(e);
}
try {
KeyValueReader kvReader = (KeyValueReader) input.getReader();
MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
if (useOptimizedTables) {
ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
if (isFirstKey) {
useOptimizedTables = false;
LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
} else {
throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
}
}
}
isFirstKey = false;
Long keyCountObj = parentKeyCounts.get(pos);
long estKeyCount = (keyCountObj == null) ? -1 : keyCountObj;
long inputRecords = -1;
try {
// TODO : Need to use class instead of string.
// https://issues.apache.org/jira/browse/HIVE-23981
inputRecords = ((AbstractLogicalInput) input).getContext().getCounters().findCounter("org.apache.tez.common.counters.TaskCounter", "APPROXIMATE_INPUT_RECORDS").getValue();
} catch (Exception e) {
LOG.debug("Failed to get value for counter APPROXIMATE_INPUT_RECORDS", e);
}
long keyCount = Math.max(estKeyCount, inputRecords);
long memory = 0;
if (useHybridGraceHashJoin) {
if (mapJoinTables.length > 2) {
memory = tableMemorySizes.get(pos);
} else {
// binary join
memory = totalMapJoinMemory;
}
}
MapJoinTableContainer tableContainer;
if (useOptimizedTables) {
if (!useHybridGraceHashJoin || isCrossProduct) {
tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
} else {
tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
}
} else {
tableContainer = new HashMapWrapper(hconf, keyCount);
}
LOG.info("Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {} " + "estKeyCount : {} keyCount : {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos, estKeyCount, keyCount);
tableContainer.setSerde(keyCtx, valCtx);
long startTime = System.currentTimeMillis();
while (kvReader.next()) {
tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
numEntries++;
if (doMemCheck && (numEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
final long estMemUsage = tableContainer.getEstimatedMemorySize();
if (estMemUsage > effectiveThreshold) {
String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + numEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
LOG.error(msg);
throw new MapJoinMemoryExhaustionError(msg);
} else {
LOG.info("Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, numEntries, estMemUsage, effectiveThreshold);
}
}
}
long delta = System.currentTimeMillis() - startTime;
htLoadCounter.increment(delta);
tableContainer.seal();
mapJoinTables[pos] = tableContainer;
if (doMemCheck) {
LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} " + "estimatedMemoryUsage: {} Load Time : {} ", inputName, cacheKey, numEntries, tableContainer.getEstimatedMemorySize(), delta);
} else {
LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} Load Time : {} ", inputName, cacheKey, numEntries, delta);
}
} catch (Exception e) {
throw new HiveException(e);
}
}
}
Aggregations