Search in sources :

Example 1 with AbstractLogicalInput

use of org.apache.tez.runtime.api.AbstractLogicalInput in project hive by apache.

the class VectorMapJoinFastHashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
    boolean doMemCheck = false;
    long effectiveThreshold = 0;
    if (memoryMonitorInfo != null) {
        effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
        // Flip the flag at runtime in case if we are running outside of LLAP
        if (!LlapDaemonInfo.INSTANCE.isLlap()) {
            memoryMonitorInfo.setLlap(false);
        }
        if (memoryMonitorInfo.doMemoryMonitoring()) {
            doMemCheck = true;
            LOG.info("Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
        }
    }
    if (!doMemCheck) {
        LOG.info("Not doing hash table memory monitoring. {}", memoryMonitorInfo);
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            Long keyCountObj = parentKeyCounts.get(pos);
            long estKeyCount = (keyCountObj == null) ? -1 : keyCountObj;
            long inputRecords = -1;
            try {
                // TODO : Need to use class instead of string.
                // https://issues.apache.org/jira/browse/HIVE-23981
                inputRecords = ((AbstractLogicalInput) input).getContext().getCounters().findCounter("org.apache.tez.common.counters.TaskCounter", "APPROXIMATE_INPUT_RECORDS").getValue();
            } catch (Exception e) {
                LOG.debug("Failed to get value for counter APPROXIMATE_INPUT_RECORDS", e);
            }
            long keyCount = Math.max(estKeyCount, inputRecords);
            initHTLoadingService(keyCount);
            VectorMapJoinFastTableContainer tableContainer = new VectorMapJoinFastTableContainer(desc, hconf, keyCount, numLoadThreads);
            LOG.info("Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {} " + "estKeyCount : {} keyCount : {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos, estKeyCount, keyCount);
            // No SerDes here.
            tableContainer.setSerde(null, null);
            // Submit parallel loading Threads
            submitQueueDrainThreads(tableContainer);
            long receivedEntries = 0;
            long startTime = System.currentTimeMillis();
            while (kvReader.next()) {
                BytesWritable currentKey = (BytesWritable) kvReader.getCurrentKey();
                BytesWritable currentValue = (BytesWritable) kvReader.getCurrentValue();
                long hashCode = tableContainer.getHashCode(currentKey);
                // numLoadThreads divisor must be a power of 2!
                int partitionId = (int) ((numLoadThreads - 1) & hashCode);
                // call getBytes as copy is called later
                HashTableElement h = new HashTableElement(hashCode, currentValue.copyBytes(), currentKey.copyBytes());
                if (elementBatches[partitionId].addElement(h)) {
                    loadBatchQueues[partitionId].add(elementBatches[partitionId]);
                    elementBatches[partitionId] = batchPool.take();
                }
                receivedEntries++;
                if (doMemCheck && (receivedEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
                    final long estMemUsage = tableContainer.getEstimatedMemorySize();
                    if (estMemUsage > effectiveThreshold) {
                        String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + receivedEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
                        LOG.error(msg);
                        throw new MapJoinMemoryExhaustionError(msg);
                    } else {
                        LOG.info("Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, receivedEntries, estMemUsage, effectiveThreshold);
                    }
                }
            }
            LOG.info("Finished loading the queue for input: {} waiting {} minutes for TPool shutdown", inputName, 2);
            addQueueDoneSentinel();
            loadExecService.shutdown();
            loadExecService.awaitTermination(2, TimeUnit.MINUTES);
            batchPool.clear();
            LOG.info("Total received entries: {} Threads {} HT entries: {}", receivedEntries, numLoadThreads, totalEntries.get());
            long delta = System.currentTimeMillis() - startTime;
            htLoadCounter.increment(delta);
            tableContainer.seal();
            mapJoinTables[pos] = tableContainer;
            if (doMemCheck) {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} " + "estimatedMemoryUsage: {} Load Time : {} ", inputName, cacheKey, receivedEntries, tableContainer.getEstimatedMemorySize(), delta);
            } else {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} Load Time : {} ", inputName, cacheKey, receivedEntries, delta);
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new HiveException(e);
        } catch (IOException e) {
            throw new HiveException(e);
        } catch (SerDeException e) {
            throw new HiveException(e);
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AbstractLogicalInput(org.apache.tez.runtime.api.AbstractLogicalInput) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinMemoryExhaustionError(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) LogicalInput(org.apache.tez.runtime.api.LogicalInput) AbstractLogicalInput(org.apache.tez.runtime.api.AbstractLogicalInput) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 2 with AbstractLogicalInput

use of org.apache.tez.runtime.api.AbstractLogicalInput in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    boolean isCrossProduct = false;
    List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        isCrossProduct = true;
    }
    boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
    boolean isFirstKey = true;
    // Get the total available memory from memory manager
    long totalMapJoinMemory = desc.getMemoryNeeded();
    LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
    if (totalMapJoinMemory <= 0) {
        totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    }
    long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
    if (totalMapJoinMemory > processMaxMemory) {
        float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
        LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
        // Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
        totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
    }
    // Only applicable to n-way Hybrid Grace Hash Join
    HybridHashTableConf nwayConf = null;
    long totalSize = 0;
    // position of the biggest small table
    int biggest = 0;
    Map<Integer, Long> tableMemorySizes = null;
    if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
        // Create a Conf for n-way HybridHashTableContainers
        nwayConf = new HybridHashTableConf();
        LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
        // Find the biggest small table; also calculate total data size of all small tables
        // the size of the biggest small table
        long maxSize = Long.MIN_VALUE;
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable()) {
                continue;
            }
            long smallTableSize = desc.getParentDataSizes().get(pos);
            totalSize += smallTableSize;
            if (maxSize < smallTableSize) {
                maxSize = smallTableSize;
                biggest = pos;
            }
        }
        tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
        // Using biggest small table, calculate number of partitions to create for each small table
        long memory = tableMemorySizes.get(biggest);
        int numPartitions = 0;
        try {
            numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
        } catch (IOException e) {
            throw new HiveException(e);
        }
        nwayConf.setNumberOfPartitions(numPartitions);
    }
    MemoryMonitorInfo memoryMonitorInfo = desc.getMemoryMonitorInfo();
    boolean doMemCheck = false;
    long effectiveThreshold = 0;
    if (memoryMonitorInfo != null) {
        effectiveThreshold = memoryMonitorInfo.getEffectiveThreshold(desc.getMaxMemoryAvailable());
        // Flip the flag at runtime in case if we are running outside of LLAP
        if (!LlapDaemonInfo.INSTANCE.isLlap()) {
            memoryMonitorInfo.setLlap(false);
        }
        if (memoryMonitorInfo.doMemoryMonitoring()) {
            doMemCheck = true;
            LOG.info("Memory monitoring for hash table loader enabled. {}", memoryMonitorInfo);
        }
    }
    if (!doMemCheck) {
        LOG.info("Not doing hash table memory monitoring. {}", memoryMonitorInfo);
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        long numEntries = 0;
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
            if (useOptimizedTables) {
                ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
                if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
                    if (isFirstKey) {
                        useOptimizedTables = false;
                        LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    } else {
                        throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    }
                }
            }
            isFirstKey = false;
            Long keyCountObj = parentKeyCounts.get(pos);
            long estKeyCount = (keyCountObj == null) ? -1 : keyCountObj;
            long inputRecords = -1;
            try {
                // TODO : Need to use class instead of string.
                // https://issues.apache.org/jira/browse/HIVE-23981
                inputRecords = ((AbstractLogicalInput) input).getContext().getCounters().findCounter("org.apache.tez.common.counters.TaskCounter", "APPROXIMATE_INPUT_RECORDS").getValue();
            } catch (Exception e) {
                LOG.debug("Failed to get value for counter APPROXIMATE_INPUT_RECORDS", e);
            }
            long keyCount = Math.max(estKeyCount, inputRecords);
            long memory = 0;
            if (useHybridGraceHashJoin) {
                if (mapJoinTables.length > 2) {
                    memory = tableMemorySizes.get(pos);
                } else {
                    // binary join
                    memory = totalMapJoinMemory;
                }
            }
            MapJoinTableContainer tableContainer;
            if (useOptimizedTables) {
                if (!useHybridGraceHashJoin || isCrossProduct) {
                    tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
                } else {
                    tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
                }
            } else {
                tableContainer = new HashMapWrapper(hconf, keyCount);
            }
            LOG.info("Loading hash table for input: {} cacheKey: {} tableContainer: {} smallTablePos: {} " + "estKeyCount : {} keyCount : {}", inputName, cacheKey, tableContainer.getClass().getSimpleName(), pos, estKeyCount, keyCount);
            tableContainer.setSerde(keyCtx, valCtx);
            long startTime = System.currentTimeMillis();
            while (kvReader.next()) {
                tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
                numEntries++;
                if (doMemCheck && (numEntries % memoryMonitorInfo.getMemoryCheckInterval() == 0)) {
                    final long estMemUsage = tableContainer.getEstimatedMemorySize();
                    if (estMemUsage > effectiveThreshold) {
                        String msg = "Hash table loading exceeded memory limits for input: " + inputName + " numEntries: " + numEntries + " estimatedMemoryUsage: " + estMemUsage + " effectiveThreshold: " + effectiveThreshold + " memoryMonitorInfo: " + memoryMonitorInfo;
                        LOG.error(msg);
                        throw new MapJoinMemoryExhaustionError(msg);
                    } else {
                        LOG.info("Checking hash table loader memory usage for input: {} numEntries: {} " + "estimatedMemoryUsage: {} effectiveThreshold: {}", inputName, numEntries, estMemUsage, effectiveThreshold);
                    }
                }
            }
            long delta = System.currentTimeMillis() - startTime;
            htLoadCounter.increment(delta);
            tableContainer.seal();
            mapJoinTables[pos] = tableContainer;
            if (doMemCheck) {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} " + "estimatedMemoryUsage: {} Load Time : {} ", inputName, cacheKey, numEntries, tableContainer.getEstimatedMemorySize(), delta);
            } else {
                LOG.info("Finished loading hash table for input: {} cacheKey: {} numEntries: {} Load Time : {} ", inputName, cacheKey, numEntries, delta);
            }
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AbstractLogicalInput(org.apache.tez.runtime.api.AbstractLogicalInput) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) MapJoinMemoryExhaustionError(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) HybridHashTableConf(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) LogicalInput(org.apache.tez.runtime.api.LogicalInput) AbstractLogicalInput(org.apache.tez.runtime.api.AbstractLogicalInput) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Aggregations

IOException (java.io.IOException)2 MemoryMonitorInfo (org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo)2 MapJoinMemoryExhaustionError (org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 AbstractLogicalInput (org.apache.tez.runtime.api.AbstractLogicalInput)2 LogicalInput (org.apache.tez.runtime.api.LogicalInput)2 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)2 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)1 HybridHashTableConf (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf)1 HybridHashTableContainer (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)1 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)1 MapJoinObjectSerDeContext (org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext)1 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1