use of org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper in project hive by apache.
the class MapJoinTestConfig method createMapJoin.
public static CreateMapJoinResult createMapJoin(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin, MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException {
final Byte bigTablePos = 0;
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
MapJoinObjectSerDeContext valCtx = mapJoinTableContainerSerDe.getValueContext();
MapJoinTableContainer mapJoinTableContainer = (isOriginalMapJoin ? new HashMapWrapper(testDesc.hiveConf, -1) : new MapJoinBytesTableContainer(testDesc.hiveConf, valCtx, testData.smallTableKeyHashMap.size(), 0));
mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
loadTableContainerData(testDesc, testData, mapJoinTableContainer);
MapJoinOperator operator;
if (!isVectorMapJoin) {
operator = new MapJoinOperator(new CompilationOpContext());
} else {
VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNameList);
// UNDONE: Unclear this belonds in the input VectorizationContext...
// Create scratch columns to hold small table results.
for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
// This is what the Vectorizer class does.
VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
List<ExprNodeDesc> bigTableFilters = mapJoinDesc.getFilters().get(bigTablePos);
boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
operator = new VectorMapJoinOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
} else {
operator = new VectorMapJoinOuterFilteredOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
HiveConf.setBoolVar(testDesc.hiveConf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true);
return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe);
use of org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper in project hive by apache.
the class MapJoinTestConfig method createMapJoin.
public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin) throws SerDeException, IOException, HiveException {
final Byte bigTablePos = 0;
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
MapJoinObjectSerDeContext valCtx = mapJoinTableContainerSerDe.getValueContext();
MapJoinTableContainer mapJoinTableContainer = (isOriginalMapJoin ? new HashMapWrapper(testDesc.hiveConf, -1) : new MapJoinBytesTableContainer(testDesc.hiveConf, valCtx, testData.smallTableKeyHashMap.size(), 0));
mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
loadTableContainerData(testDesc, testData, mapJoinTableContainer);
MapJoinOperator operator;
if (!isVectorMapJoin) {
operator = new MapJoinOperator(new CompilationOpContext());
} else {
VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNamesList);
// Create scratch columns to hold small table results.
for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
// This is what the Vectorizer class does.
VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
List<ExprNodeDesc> bigTableFilters = mapJoinDesc.getFilters().get(bigTablePos);
boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
operator = new VectorMapJoinOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
} else {
operator = new VectorMapJoinOuterFilteredOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
return operator;
use of org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper in project hive by apache.
the class HashTableSinkOperator method initializeOp.
protected void initializeOp(Configuration hconf) throws HiveException {
boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
console = new LogHelper(LOG, isSilent);
memoryExhaustionChecker = MemoryExhaustionCheckerFactory.getChecker(console, hconf, conf);
// for small tables only; so get the big table position first
posBigTableAlias = conf.getPosBigTable();
order = conf.getTagOrder();
// initialize some variables, which used to be initialized in CommonJoinOperator
this.hconf = hconf;
filterMaps = conf.getFilterMap();
int tagLen = conf.getTagLength();
// process join keys
joinKeys = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
// process join values
joinValues = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
// process join filters
joinFilters = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
if (!conf.isNoOuterJoin()) {
for (Byte alias : order) {
if (alias == posBigTableAlias || joinValues[alias] == null) {
List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
if (filterMaps != null && filterMaps[alias] != null) {
// for each alias, add object inspector for filter tag as the last element
rcOIs = new ArrayList<ObjectInspector>(rcOIs);
mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
if (hashTableScale <= 0) {
hashTableScale = 1;
try {
TableDesc keyTableDesc = conf.getKeyTblDesc();
AbstractSerDe keySerDe = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getSerDeClass(), null);
keySerDe.initialize(null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerDe, false);
for (Byte pos : order) {
if (pos == posBigTableAlias) {
mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getSerDeClass(), null);
valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
} catch (SerDeException e) {
throw new HiveException(e);
use of org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper in project hive by apache.
the class HashTableLoader method load.
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
Map<Integer, String> parentToInput = desc.getParentToInput();
Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
boolean isCrossProduct = false;
List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
if (joinExprs.size() == 0) {
isCrossProduct = true;
boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
boolean isFirstKey = true;
// Get the total available memory from memory manager
long totalMapJoinMemory = desc.getMemoryNeeded();"Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
if (totalMapJoinMemory <= 0) {
totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
if (totalMapJoinMemory > processMaxMemory) {
float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
// Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
// Only applicable to n-way Hybrid Grace Hash Join
HybridHashTableConf nwayConf = null;
long totalSize = 0;
// position of the biggest small table
int biggest = 0;
Map<Integer, Long> tableMemorySizes = null;
if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
// Create a Conf for n-way HybridHashTableContainers
nwayConf = new HybridHashTableConf();"N-way join: " + (mapJoinTables.length - 1) + " small tables.");
// Find the biggest small table; also calculate total data size of all small tables
// the size of the biggest small table
long maxSize = Long.MIN_VALUE;
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
long smallTableSize = desc.getParentDataSizes().get(pos);
totalSize += smallTableSize;
if (maxSize < smallTableSize) {
maxSize = smallTableSize;
biggest = pos;
tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
// Using biggest small table, calculate number of partitions to create for each small table
long memory = tableMemorySizes.get(biggest);
int numPartitions = 0;
try {
numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
} catch (IOException e) {
throw new HiveException(e);
MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
boolean doMemCheck = false;
long effectiveThreshold = 0;
if (memoryMonitorInfo != null) {
effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
// Flip the flag at runtime in case if we are running outside of LLAP
if (!LlapDaemonInfo.INSTANCE.isLlap()) {
if (memoryMonitorInfo.doMemoryMonitoring()) {
doMemCheck = true;"Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
if (!doMemCheck) {"Not doing hash table memory monitoring. {}", memoryMonitorInfo);
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
long numEntries = 0;
String inputName = parentToInput.get(pos);
LogicalInput input = tezContext.getInput(inputName);
try {
} catch (Exception e) {
throw new HiveException(e);
try {
KeyValueReader kvReader = (KeyValueReader) input.getReader();
MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
if (useOptimizedTables) {
ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
if (isFirstKey) {
useOptimizedTables = false;"Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
} else {
throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
isFirstKey = false;
Long keyCountObj = parentKeyCounts.get(pos);
long estKeyCount = (keyCountObj == null) ? -1 : keyCountObj;
long inputRecords = -1;
try {
// TODO : Need to use class instead of string.
inputRecords = ((AbstractLogicalInput) input).getContext().getCounters().findCounter("org.apache.tez.common.counters.TaskCounter", "APPROXIMATE_INPUT_RECORDS").getValue();
} catch (Exception e) {
LOG.debug("Failed to get value for counter APPROXIMATE_INPUT_RECORDS", e);
long keyCount = Math.max(estKeyCount, inputRecords);
long memory = 0;
if (useHybridGraceHashJoin) {
if (mapJoinTables.length > 2) {
memory = tableMemorySizes.get(pos);
} else {
// binary join
memory = totalMapJoinMemory;
MapJoinTableContainer tableContainer;
if (useOptimizedTables) {
if (!useHybridGraceHashJoin || isCrossProduct) {
tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
} else {
tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
} else {
tableContainer = new HashMapWrapper(hconf, keyCount);
}"Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {} " + "estKeyCount : {} keyCount : {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos, estKeyCount, keyCount);
tableContainer.setSerde(keyCtx, valCtx);
long startTime = System.currentTimeMillis();
while ( {
tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
if (doMemCheck && (numEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
final long estMemUsage = tableContainer.getEstimatedMemorySize();
if (estMemUsage > effectiveThreshold) {
String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + numEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
throw new MapJoinMemoryExhaustionError(msg);
} else {"Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, numEntries, estMemUsage, effectiveThreshold);
long delta = System.currentTimeMillis() - startTime;
mapJoinTables[pos] = tableContainer;
if (doMemCheck) {"Finished loading hash table for input: {} cacheKey: {} numEntries: {} " + "estimatedMemoryUsage: {} Load Time : {} ", inputName, cacheKey, numEntries, tableContainer.getEstimatedMemorySize(), delta);
} else {"Finished loading hash table for input: {} cacheKey: {} numEntries: {} Load Time : {} ", inputName, cacheKey, numEntries, delta);
} catch (Exception e) {
throw new HiveException(e);