use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer in project hive by apache.
the class MapJoinOperator method reloadHashTable.
/**
* Reload hashtable from the hash partition.
* It can have two steps:
* 1) Deserialize a serialized hash table, and
* 2) Merge every key/value pair from small table container into the hash table
* @param pos position of small table
* @param partitionId the partition of the small table to be reloaded from
* @throws IOException
* @throws HiveException
* @throws SerDeException
*/
protected void reloadHashTable(byte pos, int partitionId) throws IOException, HiveException, SerDeException, ClassNotFoundException {
HybridHashTableContainer container = (HybridHashTableContainer) mapJoinTables[pos];
HashPartition partition = container.getHashPartitions()[partitionId];
// Merge the sidefile into the newly created hash table
// This is where the spilling may happen again
LOG.info("Going to restore sidefile...");
KeyValueContainer kvContainer = partition.getSidefileKVContainer();
int rowCount = kvContainer.size();
LOG.info("Hybrid Grace Hash Join: Number of rows restored from KeyValueContainer: " + kvContainer.size());
// We're sure this part is smaller than memory limit
if (rowCount <= 0) {
// Since rowCount is used later to instantiate a BytesBytesMultiHashMap
rowCount = 1024 * 1024;
// as the initialCapacity which cannot be 0, we provide a reasonable
// positive number here
}
LOG.info("Going to restore hashmap...");
BytesBytesMultiHashMap restoredHashMap = partition.getHashMapFromDisk(rowCount);
rowCount += restoredHashMap.getNumValues();
LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition...");
LOG.info("Hybrid Grace Hash Join: Number of rows in hashmap: " + rowCount);
// The size of deserialized partition shouldn't exceed half of memory limit
if (rowCount * container.getTableRowSize() >= container.getMemoryThreshold() / 2) {
LOG.warn("Hybrid Grace Hash Join: Hash table cannot be reloaded since it" + " will be greater than memory limit. Recursive spilling is currently not supported");
}
KeyValueHelper writeHelper = container.getWriteHelper();
while (kvContainer.hasNext()) {
ObjectPair<HiveKey, BytesWritable> pair = kvContainer.next();
Writable key = pair.getFirst();
Writable val = pair.getSecond();
writeHelper.setKeyValue(key, val);
restoredHashMap.put(writeHelper, -1);
}
container.setTotalInMemRowCount(container.getTotalInMemRowCount() + restoredHashMap.getNumValues());
kvContainer.clear();
spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap);
spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi());
spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders());
spilledMapJoinTables[pos].setNullMarkers(container.getNullMarkers());
spilledMapJoinTables[pos].setNotNullMarkers(container.getNotNullMarkers());
}
use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer in project hive by apache.
the class HashTableLoader method load.
@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
Map<Integer, String> parentToInput = desc.getParentToInput();
Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
boolean isCrossProduct = false;
List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
if (joinExprs.size() == 0) {
isCrossProduct = true;
}
boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
boolean isFirstKey = true;
// Get the total available memory from memory manager
long totalMapJoinMemory = desc.getMemoryNeeded();
LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
if (totalMapJoinMemory <= 0) {
totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
}
long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
if (totalMapJoinMemory > processMaxMemory) {
float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
// Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
}
// Only applicable to n-way Hybrid Grace Hash Join
HybridHashTableConf nwayConf = null;
long totalSize = 0;
// position of the biggest small table
int biggest = 0;
Map<Integer, Long> tableMemorySizes = null;
if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
// Create a Conf for n-way HybridHashTableContainers
nwayConf = new HybridHashTableConf();
LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
// Find the biggest small table; also calculate total data size of all small tables
// the size of the biggest small table
long maxSize = Long.MIN_VALUE;
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
continue;
}
long smallTableSize = desc.getParentDataSizes().get(pos);
totalSize += smallTableSize;
if (maxSize < smallTableSize) {
maxSize = smallTableSize;
biggest = pos;
}
}
tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
// Using biggest small table, calculate number of partitions to create for each small table
long memory = tableMemorySizes.get(biggest);
int numPartitions = 0;
try {
numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
} catch (IOException e) {
throw new HiveException(e);
}
nwayConf.setNumberOfPartitions(numPartitions);
}
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable()) {
continue;
}
String inputName = parentToInput.get(pos);
LogicalInput input = tezContext.getInput(inputName);
try {
input.start();
tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
} catch (Exception e) {
throw new HiveException(e);
}
try {
KeyValueReader kvReader = (KeyValueReader) input.getReader();
MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
if (useOptimizedTables) {
ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
if (isFirstKey) {
useOptimizedTables = false;
LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
} else {
throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
}
}
}
isFirstKey = false;
Long keyCountObj = parentKeyCounts.get(pos);
long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
long memory = 0;
if (useHybridGraceHashJoin) {
if (mapJoinTables.length > 2) {
memory = tableMemorySizes.get(pos);
} else {
// binary join
memory = totalMapJoinMemory;
}
}
MapJoinTableContainer tableContainer;
if (useOptimizedTables) {
if (!useHybridGraceHashJoin || isCrossProduct) {
tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
} else {
tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
}
} else {
tableContainer = new HashMapWrapper(hconf, keyCount);
}
LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName());
tableContainer.setSerde(keyCtx, valCtx);
while (kvReader.next()) {
tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
}
tableContainer.seal();
LOG.info("Finished loading hashtable using " + tableContainer.getClass() + ". Small table position: " + pos);
mapJoinTables[pos] = tableContainer;
} catch (Exception e) {
throw new HiveException(e);
}
}
}
Aggregations