Search in sources :

Example 1 with MapJoinBytesTableContainer

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer in project hive by apache.

the class MapJoinOperator method reloadHashTable.

/**
   * Reload hashtable from the hash partition.
   * It can have two steps:
   * 1) Deserialize a serialized hash table, and
   * 2) Merge every key/value pair from small table container into the hash table
   * @param pos position of small table
   * @param partitionId the partition of the small table to be reloaded from
   * @throws IOException
   * @throws HiveException
   * @throws SerDeException
   */
protected void reloadHashTable(byte pos, int partitionId) throws IOException, HiveException, SerDeException, ClassNotFoundException {
    HybridHashTableContainer container = (HybridHashTableContainer) mapJoinTables[pos];
    HashPartition partition = container.getHashPartitions()[partitionId];
    // Merge the sidefile into the newly created hash table
    // This is where the spilling may happen again
    LOG.info("Going to restore sidefile...");
    KeyValueContainer kvContainer = partition.getSidefileKVContainer();
    int rowCount = kvContainer.size();
    LOG.info("Hybrid Grace Hash Join: Number of rows restored from KeyValueContainer: " + kvContainer.size());
    // We're sure this part is smaller than memory limit
    if (rowCount <= 0) {
        // Since rowCount is used later to instantiate a BytesBytesMultiHashMap
        rowCount = 1024 * 1024;
    // as the initialCapacity which cannot be 0, we provide a reasonable
    // positive number here
    }
    LOG.info("Going to restore hashmap...");
    BytesBytesMultiHashMap restoredHashMap = partition.getHashMapFromDisk(rowCount);
    rowCount += restoredHashMap.getNumValues();
    LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition...");
    LOG.info("Hybrid Grace Hash Join: Number of rows in hashmap: " + rowCount);
    // The size of deserialized partition shouldn't exceed half of memory limit
    if (rowCount * container.getTableRowSize() >= container.getMemoryThreshold() / 2) {
        LOG.warn("Hybrid Grace Hash Join: Hash table cannot be reloaded since it" + " will be greater than memory limit. Recursive spilling is currently not supported");
    }
    KeyValueHelper writeHelper = container.getWriteHelper();
    while (kvContainer.hasNext()) {
        ObjectPair<HiveKey, BytesWritable> pair = kvContainer.next();
        Writable key = pair.getFirst();
        Writable val = pair.getSecond();
        writeHelper.setKeyValue(key, val);
        restoredHashMap.put(writeHelper, -1);
    }
    container.setTotalInMemRowCount(container.getTotalInMemRowCount() + restoredHashMap.getNumValues());
    kvContainer.clear();
    spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap);
    spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi());
    spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders());
    spilledMapJoinTables[pos].setNullMarkers(container.getNullMarkers());
    spilledMapJoinTables[pos].setNotNullMarkers(container.getNotNullMarkers());
}
Also used : HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) KeyValueHelper(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) HashPartition(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition) BytesWritable(org.apache.hadoop.io.BytesWritable) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer) KeyValueContainer(org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer) BytesBytesMultiHashMap(org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap)

Example 2 with MapJoinBytesTableContainer

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    boolean isCrossProduct = false;
    List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        isCrossProduct = true;
    }
    boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
    boolean isFirstKey = true;
    // Get the total available memory from memory manager
    long totalMapJoinMemory = desc.getMemoryNeeded();
    LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
    if (totalMapJoinMemory <= 0) {
        totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    }
    long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
    if (totalMapJoinMemory > processMaxMemory) {
        float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
        LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
        // Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
        totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
    }
    // Only applicable to n-way Hybrid Grace Hash Join
    HybridHashTableConf nwayConf = null;
    long totalSize = 0;
    // position of the biggest small table
    int biggest = 0;
    Map<Integer, Long> tableMemorySizes = null;
    if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
        // Create a Conf for n-way HybridHashTableContainers
        nwayConf = new HybridHashTableConf();
        LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
        // Find the biggest small table; also calculate total data size of all small tables
        // the size of the biggest small table
        long maxSize = Long.MIN_VALUE;
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable()) {
                continue;
            }
            long smallTableSize = desc.getParentDataSizes().get(pos);
            totalSize += smallTableSize;
            if (maxSize < smallTableSize) {
                maxSize = smallTableSize;
                biggest = pos;
            }
        }
        tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
        // Using biggest small table, calculate number of partitions to create for each small table
        long memory = tableMemorySizes.get(biggest);
        int numPartitions = 0;
        try {
            numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
        } catch (IOException e) {
            throw new HiveException(e);
        }
        nwayConf.setNumberOfPartitions(numPartitions);
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
            if (useOptimizedTables) {
                ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
                if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
                    if (isFirstKey) {
                        useOptimizedTables = false;
                        LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    } else {
                        throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    }
                }
            }
            isFirstKey = false;
            Long keyCountObj = parentKeyCounts.get(pos);
            long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
            long memory = 0;
            if (useHybridGraceHashJoin) {
                if (mapJoinTables.length > 2) {
                    memory = tableMemorySizes.get(pos);
                } else {
                    // binary join
                    memory = totalMapJoinMemory;
                }
            }
            MapJoinTableContainer tableContainer;
            if (useOptimizedTables) {
                if (!useHybridGraceHashJoin || isCrossProduct) {
                    tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
                } else {
                    tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
                }
            } else {
                tableContainer = new HashMapWrapper(hconf, keyCount);
            }
            LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName());
            tableContainer.setSerde(keyCtx, valCtx);
            while (kvReader.next()) {
                tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
            }
            tableContainer.seal();
            LOG.info("Finished loading hashtable using " + tableContainer.getClass() + ". Small table position: " + pos);
            mapJoinTables[pos] = tableContainer;
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) HybridHashTableConf(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) LogicalInput(org.apache.tez.runtime.api.LogicalInput) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Aggregations

HybridHashTableContainer (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)2 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)2 IOException (java.io.IOException)1 BytesBytesMultiHashMap (org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap)1 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)1 HybridHashTableConf (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf)1 HashPartition (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition)1 KeyValueContainer (org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer)1 KeyValueHelper (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper)1 MapJoinObjectSerDeContext (org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext)1 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)1 HiveKey (org.apache.hadoop.hive.ql.io.HiveKey)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1 Writable (org.apache.hadoop.io.Writable)1 LogicalInput (org.apache.tez.runtime.api.LogicalInput)1