Search in sources :

Example 1 with MapJoinObjectSerDeContext

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext in project hive by apache.

the class MapJoinOperator method generateMapMetaData.

public void generateMapMetaData() throws HiveException {
    try {
        TableDesc keyTableDesc = conf.getKeyTblDesc();
        AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
        for (int pos = 0; pos < order.length; pos++) {
            if (pos == posBigTable) {
                continue;
            }
            TableDesc valueTableDesc;
            if (conf.getNoOuterJoin()) {
                valueTableDesc = conf.getValueTblDescs().get(pos);
            } else {
                valueTableDesc = conf.getValueFilteredTblDescs().get(pos);
            }
            AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
            MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos));
            mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext);
        }
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 2 with MapJoinObjectSerDeContext

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    // Note: it's possible that a MJ operator is in a ReduceWork, in which case the
    // currentInputPath will be null. But, since currentInputPath is only interesting
    // for bucket join case, and for bucket join the MJ operator will always be in
    // a MapWork, this should be OK.
    String currentInputPath = context.getCurrentInputPath() == null ? null : context.getCurrentInputPath().toString();
    LOG.info("******* Load from HashTable for input file: " + currentInputPath);
    MapredLocalWork localWork = context.getLocalWork();
    try {
        if (localWork.getDirectFetchOp() != null) {
            loadDirectly(mapJoinTables, currentInputPath);
        }
        // All HashTables share the same base dir,
        // which is passed in as the tmp path
        Path baseDir = localWork.getTmpPath();
        if (baseDir == null) {
            return;
        }
        FileSystem fs = FileSystem.get(baseDir.toUri(), hconf);
        BucketMapJoinContext mapJoinCtx = localWork.getBucketMapjoinContext();
        boolean firstContainer = true;
        boolean useOptimizedContainer = !useFastContainer && HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable() || mapJoinTables[pos] != null) {
                continue;
            }
            if (useOptimizedContainer) {
                MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext();
                ObjectInspector keyOI = keyCtx.getSerDe().getObjectInspector();
                if (!MapJoinBytesTableContainer.isSupportedKey(keyOI)) {
                    if (firstContainer) {
                        LOG.warn("Not using optimized table container." + "Only a subset of mapjoin keys is supported.");
                        useOptimizedContainer = false;
                        HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE, false);
                    } else {
                        throw new HiveException("Only a subset of mapjoin keys is supported.");
                    }
                }
            }
            firstContainer = false;
            String bigInputPath = currentInputPath;
            if (currentInputPath != null && mapJoinCtx != null) {
                if (!desc.isBucketMapJoin()) {
                    bigInputPath = null;
                } else {
                    Set<String> aliases = ((SparkBucketMapJoinContext) mapJoinCtx).getPosToAliasMap().get(pos);
                    String alias = aliases.iterator().next();
                    // Any one small table input path
                    String smallInputPath = mapJoinCtx.getAliasBucketFileNameMapping().get(alias).get(bigInputPath).get(0);
                    bigInputPath = mapJoinCtx.getMappingBigFile(alias, smallInputPath);
                }
            }
            String fileName = localWork.getBucketFileName(bigInputPath);
            Path path = Utilities.generatePath(baseDir, desc.getDumpFilePrefix(), (byte) pos, fileName);
            mapJoinTables[pos] = load(fs, path, mapJoinTableSerdes[pos]);
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileSystem(org.apache.hadoop.fs.FileSystem) SparkBucketMapJoinContext(org.apache.hadoop.hive.ql.plan.SparkBucketMapJoinContext) BucketMapJoinContext(org.apache.hadoop.hive.ql.plan.BucketMapJoinContext) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 3 with MapJoinObjectSerDeContext

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext in project hive by apache.

the class HashTableSinkOperator method initializeOp.

@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
    console = new LogHelper(LOG, isSilent);
    memoryExhaustionHandler = new MapJoinMemoryExhaustionHandler(console, conf.getHashtableMemoryUsage());
    emptyRowContainer.addRow(emptyObjectArray);
    // for small tables only; so get the big table position first
    posBigTableAlias = conf.getPosBigTable();
    order = conf.getTagOrder();
    // initialize some variables, which used to be initialized in CommonJoinOperator
    this.hconf = hconf;
    filterMaps = conf.getFilterMap();
    int tagLen = conf.getTagLength();
    // process join keys
    joinKeys = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
    joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
    // process join values
    joinValues = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
    joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
    // process join filters
    joinFilters = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
    joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
    if (!conf.isNoOuterJoin()) {
        for (Byte alias : order) {
            if (alias == posBigTableAlias || joinValues[alias] == null) {
                continue;
            }
            List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
            if (filterMaps != null && filterMaps[alias] != null) {
                // for each alias, add object inspector for filter tag as the last element
                rcOIs = new ArrayList<ObjectInspector>(rcOIs);
                rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
            }
        }
    }
    mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
    mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
    hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
    if (hashTableScale <= 0) {
        hashTableScale = 1;
    }
    try {
        TableDesc keyTableDesc = conf.getKeyTblDesc();
        AbstractSerDe keySerde = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(keySerde, null, keyTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerde, false);
        for (Byte pos : order) {
            if (pos == posBigTableAlias) {
                continue;
            }
            mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
            TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
            AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
            mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
        }
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) MapJoinMemoryExhaustionHandler(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 4 with MapJoinObjectSerDeContext

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    Map<Integer, String> parentToInput = desc.getParentToInput();
    Map<Integer, Long> parentKeyCounts = desc.getParentKeyCounts();
    boolean isCrossProduct = false;
    List<ExprNodeDesc> joinExprs = desc.getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        isCrossProduct = true;
    }
    boolean useOptimizedTables = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
    boolean isFirstKey = true;
    // Get the total available memory from memory manager
    long totalMapJoinMemory = desc.getMemoryNeeded();
    LOG.info("Memory manager allocates " + totalMapJoinMemory + " bytes for the loading hashtable.");
    if (totalMapJoinMemory <= 0) {
        totalMapJoinMemory = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    }
    long processMaxMemory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
    if (totalMapJoinMemory > processMaxMemory) {
        float hashtableMemoryUsage = HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE);
        LOG.warn("totalMapJoinMemory value of " + totalMapJoinMemory + " is greater than the max memory size of " + processMaxMemory);
        // Don't want to attempt to grab more memory than we have available .. percentage is a bit arbitrary
        totalMapJoinMemory = (long) (processMaxMemory * hashtableMemoryUsage);
    }
    // Only applicable to n-way Hybrid Grace Hash Join
    HybridHashTableConf nwayConf = null;
    long totalSize = 0;
    // position of the biggest small table
    int biggest = 0;
    Map<Integer, Long> tableMemorySizes = null;
    if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
        // Create a Conf for n-way HybridHashTableContainers
        nwayConf = new HybridHashTableConf();
        LOG.info("N-way join: " + (mapJoinTables.length - 1) + " small tables.");
        // Find the biggest small table; also calculate total data size of all small tables
        // the size of the biggest small table
        long maxSize = Long.MIN_VALUE;
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable()) {
                continue;
            }
            long smallTableSize = desc.getParentDataSizes().get(pos);
            totalSize += smallTableSize;
            if (maxSize < smallTableSize) {
                maxSize = smallTableSize;
                biggest = pos;
            }
        }
        tableMemorySizes = divideHybridHashTableMemory(mapJoinTables, desc, totalSize, totalMapJoinMemory);
        // Using biggest small table, calculate number of partitions to create for each small table
        long memory = tableMemorySizes.get(biggest);
        int numPartitions = 0;
        try {
            numPartitions = HybridHashTableContainer.calcNumPartitions(memory, maxSize, HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS), HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE));
        } catch (IOException e) {
            throw new HiveException(e);
        }
        nwayConf.setNumberOfPartitions(numPartitions);
    }
    for (int pos = 0; pos < mapJoinTables.length; pos++) {
        if (pos == desc.getPosBigTable()) {
            continue;
        }
        String inputName = parentToInput.get(pos);
        LogicalInput input = tezContext.getInput(inputName);
        try {
            input.start();
            tezContext.getTezProcessorContext().waitForAnyInputReady(Collections.<Input>singletonList(input));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        try {
            KeyValueReader kvReader = (KeyValueReader) input.getReader();
            MapJoinObjectSerDeContext keyCtx = mapJoinTableSerdes[pos].getKeyContext(), valCtx = mapJoinTableSerdes[pos].getValueContext();
            if (useOptimizedTables) {
                ObjectInspector keyOi = keyCtx.getSerDe().getObjectInspector();
                if (!MapJoinBytesTableContainer.isSupportedKey(keyOi)) {
                    if (isFirstKey) {
                        useOptimizedTables = false;
                        LOG.info(describeOi("Not using optimized hash table. " + "Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    } else {
                        throw new HiveException(describeOi("Only a subset of mapjoin keys is supported. Unsupported key: ", keyOi));
                    }
                }
            }
            isFirstKey = false;
            Long keyCountObj = parentKeyCounts.get(pos);
            long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
            long memory = 0;
            if (useHybridGraceHashJoin) {
                if (mapJoinTables.length > 2) {
                    memory = tableMemorySizes.get(pos);
                } else {
                    // binary join
                    memory = totalMapJoinMemory;
                }
            }
            MapJoinTableContainer tableContainer;
            if (useOptimizedTables) {
                if (!useHybridGraceHashJoin || isCrossProduct) {
                    tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0);
                } else {
                    tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, desc.getParentDataSizes().get(pos), nwayConf);
                }
            } else {
                tableContainer = new HashMapWrapper(hconf, keyCount);
            }
            LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName());
            tableContainer.setSerde(keyCtx, valCtx);
            while (kvReader.next()) {
                tableContainer.putRow((Writable) kvReader.getCurrentKey(), (Writable) kvReader.getCurrentValue());
            }
            tableContainer.seal();
            LOG.info("Finished loading hashtable using " + tableContainer.getClass() + ". Small table position: " + pos);
            mapJoinTables[pos] = tableContainer;
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) IOException(java.io.IOException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) HybridHashTableConf(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) LogicalInput(org.apache.tez.runtime.api.LogicalInput) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) HybridHashTableContainer(org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)

Aggregations

MapJoinObjectSerDeContext (org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)2 MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)2 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)2 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 IOException (java.io.IOException)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 MapJoinMemoryExhaustionHandler (org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler)1 HybridHashTableConf (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf)1 HybridHashTableContainer (org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer)1 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)1 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)1 BucketMapJoinContext (org.apache.hadoop.hive.ql.plan.BucketMapJoinContext)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 MapredLocalWork (org.apache.hadoop.hive.ql.plan.MapredLocalWork)1 SparkBucketMapJoinContext (org.apache.hadoop.hive.ql.plan.SparkBucketMapJoinContext)1