Search in sources :

Example 6 with MapJoinTableContainer

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer in project hive by apache.

the class HashTableLoader method loadDirectly.

private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (directWorks == null || directWorks.isEmpty()) {
        return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
    HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
    sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
    for (Operator<?> operator : directWorks) {
        if (operator != null) {
            operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
        }
    }
    localTask.setExecContext(context);
    localTask.startForward(inputFileName);
    MapJoinTableContainer[] tables = sink.getMapJoinTables();
    for (int i = 0; i < sink.getNumParent(); i++) {
        if (sink.getParentOperators().get(i) != null) {
            mapJoinTables[i] = tables[i];
        }
    }
    Arrays.fill(tables, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 7 with MapJoinTableContainer

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer in project hive by apache.

the class HashTableLoader method load.

private MapJoinTableContainer load(FileSystem fs, Path path, MapJoinTableContainerSerDe mapJoinTableSerde) throws HiveException {
    LOG.info("\tLoad back all hashtable files from tmp folder uri:" + path);
    if (!SparkUtilities.isDedicatedCluster(hconf)) {
        return useFastContainer ? mapJoinTableSerde.loadFastContainer(desc, fs, path, hconf) : mapJoinTableSerde.load(fs, path, hconf);
    }
    MapJoinTableContainer mapJoinTable = SmallTableCache.get(path);
    if (mapJoinTable == null) {
        synchronized (path.toString().intern()) {
            mapJoinTable = SmallTableCache.get(path);
            if (mapJoinTable == null) {
                mapJoinTable = useFastContainer ? mapJoinTableSerde.loadFastContainer(desc, fs, path, hconf) : mapJoinTableSerde.load(fs, path, hconf);
                SmallTableCache.cache(path, mapJoinTable);
            }
        }
    }
    return mapJoinTable;
}
Also used : MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)

Example 8 with MapJoinTableContainer

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer in project hive by apache.

the class VectorMapJoinGenerateResultOperator method reloadHashTable.

@Override
protected void reloadHashTable(byte pos, int partitionId) throws IOException, HiveException, SerDeException, ClassNotFoundException {
    // The super method will reload a hash table partition of one of the small tables.
    // Currently, for native vector map join it will only be one small table.
    super.reloadHashTable(pos, partitionId);
    MapJoinTableContainer smallTable = spilledMapJoinTables[pos];
    vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf, smallTable);
    needHashTableSetup = true;
    LOG.info("Created " + vectorMapJoinHashTable.getClass().getSimpleName() + " from " + this.getClass().getSimpleName());
    if (isLogDebugEnabled) {
        LOG.debug(CLASS_NAME + " reloadHashTable!");
    }
}
Also used : MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)

Example 9 with MapJoinTableContainer

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    boolean isCrossProduct = false;
    List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        isCrossProduct = true;
    }
    boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
    boolean isFirstKey = true;
    // Get the total available memory from memory manager
    long totalMapJoinMemory = desc.getMemoryNeeded();
    LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
    if (totalMapJoinMemory <= 0) {
        totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    }
    long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
    if (totalMapJoinMemory > processMaxMemory) {
        float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
        LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
        // Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
        totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
    }
    // Only applicable to n-way Hybrid Grace Hash Join
    HybridHashTableConf nwayConf = null;
    long totalSize = 0;
    // position of the biggest small table
    int biggest = 0;
    Map<Integer, Long> tableMemorySizes = null;
    if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
        // Create a Conf for n-way HybridHashTableContainers
        nwayConf = new HybridHashTableConf();
        LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
        // Find the biggest small table; also calculate total data size of all small tables
        // the size of the biggest small table
        long maxSize = Long.MIN_VALUE;
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable()) {
                continue;
            }
            long smallTableSize = desc.getParentDataSizes().get(pos);
            totalSize += smallTableSize;
            if (maxSize < smallTableSize) {
                maxSize = smallTableSize;
                biggest = pos;
            }
        }
        tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
        // Using biggest small table, calculate number of partitions to create for each small table
        long memory = tableMemorySizes.get(biggest);
        int numPartitions = 0;
        try {
            numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
        } catch (IOException e) {
            throw new HiveException(e);
        }
        nwayConf.setNumberOfPartitions(numPartitions);
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
            if (useOptimizedTables) {
                ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
                if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
                    if (isFirstKey) {
                        useOptimizedTables = false;
                        LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    } else {
                        throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    }
                }
            }
            isFirstKey = false;
            Long keyCountObj = parentKeyCounts.get(pos);
            long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
            long memory = 0;
            if (useHybridGraceHashJoin) {
                if (mapJoinTables.length > 2) {
                    memory = tableMemorySizes.get(pos);
                } else {
                    // binary join
                    memory = totalMapJoinMemory;
                }
            }
            MapJoinTableContainer tableContainer;
            if (useOptimizedTables) {
                if (!useHybridGraceHashJoin || isCrossProduct) {
                    tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
                } else {
                    tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
                }
            } else {
                tableContainer = new HashMapWrapper(hconf, keyCount);
            }
            LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName());
            tableContainer.setSerde(keyCtx, valCtx);
            while (kvReader.next()) {
                tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
            }
            tableContainer.seal();
            LOG.info("Finished loading hashtable using " + tableContainer.getClass() + ". Small table position: " + pos);
            mapJoinTables[pos] = tableContainer;
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) HybridHashTableConf(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) LogicalInput(org.apache.tez.runtime.api.LogicalInput) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Aggregations

MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)9 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 IOException (java.io.IOException)2 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)2 Pair (org.apache.commons.lang3.tuple.Pair)2 ObjectPair (org.apache.hadoop.hive.common.ObjectPair)2 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)2 HashTableSinkOperator (org.apache.hadoop.hive.ql.exec.HashTableSinkOperator)2 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)2 Operator (org.apache.hadoop.hive.ql.exec.Operator)2 TemporaryHashSinkOperator (org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator)2 HybridHashTableContainer (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)2 MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)2 MapredLocalWork (org.apache.hadoop.hive.ql.plan.MapredLocalWork)2 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)2 JobConf (org.apache.hadoop.mapred.JobConf)2 KryoException (com.esotericsoftware.kryo.KryoException)1 ExecMapperContext (org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext)1 MapredLocalTask (org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask)1 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)1