Search in sources :

Example 6 with LogicalInput

use of org.apache.tez.runtime.api.LogicalInput in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    boolean isCrossProduct = false;
    List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        isCrossProduct = true;
    }
    boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
    boolean isFirstKey = true;
    // Get the total available memory from memory manager
    long totalMapJoinMemory = desc.getMemoryNeeded();
    LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
    if (totalMapJoinMemory <= 0) {
        totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    }
    long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
    if (totalMapJoinMemory > processMaxMemory) {
        float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
        LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
        // Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
        totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
    }
    // Only applicable to n-way Hybrid Grace Hash Join
    HybridHashTableConf nwayConf = null;
    long totalSize = 0;
    // position of the biggest small table
    int biggest = 0;
    Map<Integer, Long> tableMemorySizes = null;
    if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
        // Create a Conf for n-way HybridHashTableContainers
        nwayConf = new HybridHashTableConf();
        LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
        // Find the biggest small table; also calculate total data size of all small tables
        // the size of the biggest small table
        long maxSize = Long.MIN_VALUE;
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable()) {
                continue;
            }
            long smallTableSize = desc.getParentDataSizes().get(pos);
            totalSize += smallTableSize;
            if (maxSize < smallTableSize) {
                maxSize = smallTableSize;
                biggest = pos;
            }
        }
        tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
        // Using biggest small table, calculate number of partitions to create for each small table
        long memory = tableMemorySizes.get(biggest);
        int numPartitions = 0;
        try {
            numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
        } catch (IOException e) {
            throw new HiveException(e);
        }
        nwayConf.setNumberOfPartitions(numPartitions);
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
            if (useOptimizedTables) {
                ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
                if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
                    if (isFirstKey) {
                        useOptimizedTables = false;
                        LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    } else {
                        throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    }
                }
            }
            isFirstKey = false;
            Long keyCountObj = parentKeyCounts.get(pos);
            long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
            long memory = 0;
            if (useHybridGraceHashJoin) {
                if (mapJoinTables.length > 2) {
                    memory = tableMemorySizes.get(pos);
                } else {
                    // binary join
                    memory = totalMapJoinMemory;
                }
            }
            MapJoinTableContainer tableContainer;
            if (useOptimizedTables) {
                if (!useHybridGraceHashJoin || isCrossProduct) {
                    tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
                } else {
                    tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
                }
            } else {
                tableContainer = new HashMapWrapper(hconf, keyCount);
            }
            LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName());
            tableContainer.setSerde(keyCtx, valCtx);
            while (kvReader.next()) {
                tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
            }
            tableContainer.seal();
            LOG.info("Finished loading hashtable using " + tableContainer.getClass() + ". Small table position: " + pos);
            mapJoinTables[pos] = tableContainer;
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) HybridHashTableConf(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) LogicalInput(org.apache.tez.runtime.api.LogicalInput) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Example 7 with LogicalInput

use of org.apache.tez.runtime.api.LogicalInput in project hive by apache.

the class HivePreWarmProcessor method run.

@Override
public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception {
    if (prewarmed) {
        /* container reuse */
        return;
    }
    for (LogicalInput input : inputs.values()) {
        input.start();
    }
    for (LogicalOutput output : outputs.values()) {
        output.start();
    }
    /* these are things that goes through singleton initialization on most queries */
    FileSystem fs = FileSystem.get(conf);
    Mac mac = Mac.getInstance("HmacSHA1");
    ReadaheadPool rpool = ReadaheadPool.getInstance();
    ShimLoader.getHadoopShims();
    URL hiveurl = new URL("jar:" + DagUtils.getInstance().getExecJarPathLocal() + "!/");
    JarURLConnection hiveconn = (JarURLConnection) hiveurl.openConnection();
    JarFile hivejar = hiveconn.getJarFile();
    try {
        Enumeration<JarEntry> classes = hivejar.entries();
        while (classes.hasMoreElements()) {
            JarEntry je = classes.nextElement();
            if (je.getName().endsWith(".class")) {
                String klass = je.getName().replace(".class", "").replaceAll("/", "\\.");
                if (klass.indexOf("ql.exec") != -1 || klass.indexOf("ql.io") != -1) {
                    /* several hive classes depend on the metastore APIs, which is not included
             * in hive-exec.jar. These are the relatively safe ones - operators & io classes.
             */
                    if (klass.indexOf("vector") != -1 || klass.indexOf("Operator") != -1) {
                        JavaUtils.loadClass(klass);
                    }
                }
            }
        }
    } finally {
        hivejar.close();
    }
    prewarmed = true;
}
Also used : LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) ReadaheadPool(org.apache.hadoop.io.ReadaheadPool) JarURLConnection(java.net.JarURLConnection) FileSystem(org.apache.hadoop.fs.FileSystem) LogicalInput(org.apache.tez.runtime.api.LogicalInput) JarFile(java.util.jar.JarFile) JarEntry(java.util.jar.JarEntry) Mac(javax.crypto.Mac) URL(java.net.URL)

Aggregations

LogicalInput (org.apache.tez.runtime.api.LogicalInput)7 Input (org.apache.tez.runtime.api.Input)4 IOException (java.io.IOException)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 MRInputLegacy (org.apache.tez.mapreduce.input.MRInputLegacy)2 JarURLConnection (java.net.JarURLConnection)1 URL (java.net.URL)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 JarEntry (java.util.jar.JarEntry)1 JarFile (java.util.jar.JarFile)1 Mac (javax.crypto.Mac)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)1 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)1