Search in sources :

Example 1 with MapJoinMemoryExhaustionError

use of org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError in project hive by apache.

the class VectorMapJoinFastHashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
    boolean doMemCheck = false;
    long effectiveThreshold = 0;
    if (memoryMonitorInfo != null) {
        effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
        // Flip the flag at runtime in case if we are running outside of LLAP
        if (!LlapDaemonInfo.INSTANCE.isLlap()) {
            memoryMonitorInfo.setLlap(false);
        }
        if (memoryMonitorInfo.doMemoryMonitoring()) {
            doMemCheck = true;
            if (LOG.isInfoEnabled()) {
                LOG.info("Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
            }
        }
    }
    if (!doMemCheck) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Not doing hash table memory monitoring. {}", memoryMonitorInfo);
        }
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        long numEntries = 0;
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            Long keyCountObj = parentKeyCounts.get(pos);
            long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
            VectorMapJoinFastTableContainer vectorMapJoinFastTableContainer = new VectorMapJoinFastTableContainer(desc, hconf, keyCount);
            LOG.info("Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {}", inputName, cacheKey, vectorMapJoinFastTableContainer.getClass().getSimpleName(), pos);
            // No SerDes here.
            vectorMapJoinFastTableContainer.setSerde(null, null);
            while (kvReader.next()) {
                vectorMapJoinFastTableContainer.putRow((BytesWritable) kvReader.getCurrentKey(), (BytesWritable) kvReader.getCurrentValue());
                numEntries++;
                if (doMemCheck && (numEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
                    final long estMemUsage = vectorMapJoinFastTableContainer.getEstimatedMemorySize();
                    if (estMemUsage > effectiveThreshold) {
                        String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + numEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
                        LOG.error(msg);
                        throw new MapJoinMemoryExhaustionError(msg);
                    } else {
                        if (LOG.isInfoEnabled()) {
                            LOG.info("Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, numEntries, estMemUsage, effectiveThreshold);
                        }
                    }
                }
            }
            vectorMapJoinFastTableContainer.seal();
            mapJoinTables[pos] = vectorMapJoinFastTableContainer;
            if (doMemCheck) {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} " + "estimatedMemoryUsage: {}", inputName, cacheKey, numEntries, vectorMapJoinFastTableContainer.getEstimatedMemorySize());
            } else {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {}", inputName, cacheKey, numEntries);
            }
        } catch (IOException e) {
            throw new HiveException(e);
        } catch (SerDeException e) {
            throw new HiveException(e);
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinMemoryExhaustionError(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError) IOException(java.io.IOException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) LogicalInput(org.apache.tez.runtime.api.LogicalInput) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 2 with MapJoinMemoryExhaustionError

use of org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    boolean isCrossProduct = false;
    List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        isCrossProduct = true;
    }
    boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
    boolean isFirstKey = true;
    // Get the total available memory from memory manager
    long totalMapJoinMemory = desc.getMemoryNeeded();
    LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
    if (totalMapJoinMemory <= 0) {
        totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    }
    long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
    if (totalMapJoinMemory > processMaxMemory) {
        float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
        LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
        // Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
        totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
    }
    // Only applicable to n-way Hybrid Grace Hash Join
    HybridHashTableConf nwayConf = null;
    long totalSize = 0;
    // position of the biggest small table
    int biggest = 0;
    Map<Integer, Long> tableMemorySizes = null;
    if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
        // Create a Conf for n-way HybridHashTableContainers
        nwayConf = new HybridHashTableConf();
        LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
        // Find the biggest small table; also calculate total data size of all small tables
        // the size of the biggest small table
        long maxSize = Long.MIN_VALUE;
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable()) {
                continue;
            }
            long smallTableSize = desc.getParentDataSizes().get(pos);
            totalSize += smallTableSize;
            if (maxSize < smallTableSize) {
                maxSize = smallTableSize;
                biggest = pos;
            }
        }
        tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
        // Using biggest small table, calculate number of partitions to create for each small table
        long memory = tableMemorySizes.get(biggest);
        int numPartitions = 0;
        try {
            numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
        } catch (IOException e) {
            throw new HiveException(e);
        }
        nwayConf.setNumberOfPartitions(numPartitions);
    }
    MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
    boolean doMemCheck = false;
    long effectiveThreshold = 0;
    if (memoryMonitorInfo != null) {
        effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
        // Flip the flag at runtime in case if we are running outside of LLAP
        if (!LlapDaemonInfo.INSTANCE.isLlap()) {
            memoryMonitorInfo.setLlap(false);
        }
        if (memoryMonitorInfo.doMemoryMonitoring()) {
            doMemCheck = true;
            if (LOG.isInfoEnabled()) {
                LOG.info("Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
            }
        }
    }
    if (!doMemCheck) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Not doing hash table memory monitoring. {}", memoryMonitorInfo);
        }
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        long numEntries = 0;
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
            if (useOptimizedTables) {
                ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
                if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
                    if (isFirstKey) {
                        useOptimizedTables = false;
                        LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    } else {
                        throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    }
                }
            }
            isFirstKey = false;
            Long keyCountObj = parentKeyCounts.get(pos);
            long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
            long memory = 0;
            if (useHybridGraceHashJoin) {
                if (mapJoinTables.length > 2) {
                    memory = tableMemorySizes.get(pos);
                } else {
                    // binary join
                    memory = totalMapJoinMemory;
                }
            }
            MapJoinTableContainer tableContainer;
            if (useOptimizedTables) {
                if (!useHybridGraceHashJoin || isCrossProduct) {
                    tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
                } else {
                    tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
                }
            } else {
                tableContainer = new HashMapWrapper(hconf, keyCount);
            }
            LOG.info("Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos);
            tableContainer.setSerde(keyCtx, valCtx);
            while (kvReader.next()) {
                tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
                numEntries++;
                if (doMemCheck && (numEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
                    final long estMemUsage = tableContainer.getEstimatedMemorySize();
                    if (estMemUsage > effectiveThreshold) {
                        String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + numEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
                        LOG.error(msg);
                        throw new MapJoinMemoryExhaustionError(msg);
                    } else {
                        if (LOG.isInfoEnabled()) {
                            LOG.info("Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, numEntries, estMemUsage, effectiveThreshold);
                        }
                    }
                }
            }
            tableContainer.seal();
            mapJoinTables[pos] = tableContainer;
            if (doMemCheck) {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} estimatedMemoryUsage: {}", inputName, cacheKey, numEntries, tableContainer.getEstimatedMemorySize());
            } else {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {}", inputName, cacheKey, numEntries);
            }
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) MapJoinMemoryExhaustionError(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) HybridHashTableConf(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) LogicalInput(org.apache.tez.runtime.api.LogicalInput) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Example 3 with MapJoinMemoryExhaustionError

use of org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError in project hive by apache.

the class TestVectorMapJoinFastBytesHashMap method testOutOfBounds.

@Test
public void testOutOfBounds() throws Exception {
    random = new Random(42662);
    int HIGHEST_INT_POWER_OF_2 = 1073741824;
    boolean error = false;
    try {
        // The c'tor should throw the error
        VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, HIGHEST_INT_POWER_OF_2, LOAD_FACTOR, MODERATE_WB_SIZE, -1);
    } catch (MapJoinMemoryExhaustionError e) {
        error = true;
    }
    assert error;
}
Also used : Random(java.util.Random) MapJoinMemoryExhaustionError(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError) Test(org.junit.Test)

Example 4 with MapJoinMemoryExhaustionError

use of org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError in project hive by apache.

the class TestVectorMapJoinFastLongHashMap method testOutOfBounds.

@Test
public void testOutOfBounds() throws Exception {
    random = new Random(42662);
    int HIGHEST_INT_POWER_OF_2 = 1073741824;
    boolean error = false;
    try {
        // The c'tor should throw the error
        VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, HIGHEST_INT_POWER_OF_2, LOAD_FACTOR, MODERATE_WB_SIZE, -1);
    } catch (MapJoinMemoryExhaustionError e) {
        error = true;
    }
    assert error;
}
Also used : Random(java.util.Random) MapJoinMemoryExhaustionError(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError) Test(org.junit.Test)

Aggregations

MapJoinMemoryExhaustionError (org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError)4 IOException (java.io.IOException)2 Random (java.util.Random)2 MemoryMonitorInfo (org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 LogicalInput (org.apache.tez.runtime.api.LogicalInput)2 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)2 Test (org.junit.Test)2 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)1 HybridHashTableConf (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf)1 HybridHashTableContainer (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)1 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)1 MapJoinObjectSerDeContext (org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext)1 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1