Search in sources :

Example 6 with MapJoinTableContainerSerDe

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.

the class MapJoinTestConfig method createNativeVectorMapJoin.

public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) throws SerDeException, IOException, HiveException {
    VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
    // UNDONE
    mapJoinDesc.setVectorDesc(vectorDesc);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    MapJoinTableContainer mapJoinTableContainer;
    switch(vectorDesc.getHashTableImplementationType()) {
        case OPTIMIZED:
            mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
            MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
            mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
            break;
        case FAST:
            mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size());
            break;
        default:
            throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
    }
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
    byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
    VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
    Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
    VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
    VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
    MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
    operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null);
    return operator;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) VectorMapJoinFastTableContainer(org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)

Example 7 with MapJoinTableContainerSerDe

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.

the class MapJoinOperator method generateMapMetaData.

public void generateMapMetaData() throws HiveException {
    try {
        TableDesc keyTableDesc = conf.getKeyTblDesc();
        AbstractSerDe keySerDe = (AbstractSerDe) ReflectionUtil.newInstance(keyTableDesc.getSerDeClass(), null);
        keySerDe.initialize(null, keyTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerDe, false);
        for (int pos = 0; pos < order.length; pos++) {
            if (pos == posBigTable) {
                continue;
            }
            TableDesc valueTableDesc;
            if (conf.getNoOuterJoin()) {
                valueTableDesc = conf.getValueTblDescs().get(pos);
            } else {
                valueTableDesc = conf.getValueFilteredTblDescs().get(pos);
            }
            AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getSerDeClass(), null);
            valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
            MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos));
            mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext);
        }
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 8 with MapJoinTableContainerSerDe

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.

the class MapJoinOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    this.hconf = hconf;
    unwrapContainer = new UnwrapRowContainer[conf.getTagLength()];
    super.initializeOp(hconf);
    int tagLen = conf.getTagLength();
    // On Tez only: The hash map might already be cached in the container we run
    // the task in. On MR: The cache is a no-op.
    String queryId = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVEQUERYID);
    // The cacheKey may have already been defined in the MapJoin conf spec
    // as part of the Shared Work Optimization if it can be reused among
    // multiple mapjoin operators. In that case, we take that key from conf
    // and append this.getClass().getName() to disambiguate between different
    // classes that may be using the same source data, e.g.
    // VectorMapJoinInnerGenerateResultOperator and VectorMapJoinLeftSemiLongOperator.
    // If the cacheKey is not defined in the conf, then we generate it.
    cacheKey = conf.getCacheKey() == null ? MapJoinDesc.generateCacheKey(this.getOperatorId()) : conf.getCacheKey() + "_" + this.getClass().getName();
    cache = ObjectCacheFactory.getCache(hconf, queryId, false);
    loader = getHashTableLoader(hconf);
    bucketId = hconf.getInt(Constants.LLAP_BUCKET_ID, -1);
    numBuckets = hconf.getInt(Constants.LLAP_NUM_BUCKETS, -1);
    hashMapRowGetters = null;
    mapJoinTables = new MapJoinTableContainer[tagLen];
    mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
    hashTblInitedOnce = false;
    // Reset grace hashjoin context so that there is no state maintained when operator/work is
    // retrieved from object cache
    hybridMapJoinLeftover = false;
    firstSmallTable = null;
    doFullOuterMapJoinInit();
    generateMapMetaData();
    isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD);
    if (isTestingNoHashTableLoad) {
        return;
    }
    final ExecMapperContext mapContext = getExecContext();
    final MapredContext mrContext = MapredContext.get();
    if (!conf.isBucketMapJoin() && !conf.isDynamicPartitionHashJoin()) {
        /*
       * The issue with caching in case of bucket map join is that different tasks
       * process different buckets and if the container is reused to join a different bucket,
       * join results can be incorrect. The cache is keyed on operator id and for bucket map join
       * the operator does not change but data needed is different. For a proper fix, this
       * requires changes in the Tez API with regard to finding bucket id and
       * also ability to schedule tasks to re-use containers that have cached the specific bucket.
       */
        LOG.debug("This is not bucket map join, so cache");
        Future<Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]>> future = cache.retrieveAsync(cacheKey, () -> loadHashTable(mapContext, mrContext));
        asyncInitOperations.add(future);
    } else if (!isInputFileChangeSensitive(mapContext)) {
        loadHashTable(mapContext, mrContext);
        hashTblInitedOnce = true;
    }
}
Also used : ExecMapperContext(org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 9 with MapJoinTableContainerSerDe

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.

the class HashTableSinkOperator method initializeOp.

@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
    console = new LogHelper(LOG, isSilent);
    memoryExhaustionChecker = MemoryExhaustionCheckerFactory.getChecker(console, hconf, conf);
    emptyRowContainer.addRow(emptyObjectArray);
    // for small tables only; so get the big table position first
    posBigTableAlias = conf.getPosBigTable();
    order = conf.getTagOrder();
    // initialize some variables, which used to be initialized in CommonJoinOperator
    this.hconf = hconf;
    filterMaps = conf.getFilterMap();
    int tagLen = conf.getTagLength();
    // process join keys
    joinKeys = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
    joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
    // process join values
    joinValues = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
    joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
    // process join filters
    joinFilters = new List[tagLen];
    JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
    joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
    if (!conf.isNoOuterJoin()) {
        for (Byte alias : order) {
            if (alias == posBigTableAlias || joinValues[alias] == null) {
                continue;
            }
            List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
            if (filterMaps != null && filterMaps[alias] != null) {
                // for each alias, add object inspector for filter tag as the last element
                rcOIs = new ArrayList<ObjectInspector>(rcOIs);
                rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
            }
        }
    }
    mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
    mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
    hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
    if (hashTableScale <= 0) {
        hashTableScale = 1;
    }
    try {
        TableDesc keyTableDesc = conf.getKeyTblDesc();
        AbstractSerDe keySerDe = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getSerDeClass(), null);
        keySerDe.initialize(null, keyTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerDe, false);
        for (Byte pos : order) {
            if (pos == posBigTableAlias) {
                continue;
            }
            mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
            TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
            AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getSerDeClass(), null);
            valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
            mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
        }
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 10 with MapJoinTableContainerSerDe

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.

the class MapJoinTestConfig method createNativeVectorMapJoin.

public static CreateMapJoinResult createNativeVectorMapJoin(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException {
    VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
    mapJoinDesc.setVectorDesc(vectorDesc);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    MapJoinTableContainer mapJoinTableContainer;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null;
    switch(vectorDesc.getHashTableImplementationType()) {
        case OPTIMIZED:
            mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
            mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
            mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
            break;
        case FAST:
            mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size(), 1);
            break;
        default:
            throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
    }
    // if (shareMapJoinTableContainer == null) {
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    // } else {
    // setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer);
    // }
    VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
    byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
    VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
    Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
    VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
    VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
    HiveConf.setBoolVar(testDesc.hiveConf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true);
    return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe);
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) VectorMapJoinFastTableContainer(org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)

Aggregations

MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)12 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)8 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)6 VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)6 ArrayList (java.util.ArrayList)5 List (java.util.List)5 MapJoinObjectSerDeContext (org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext)5 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)4 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)4 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)3 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)2 Pair (org.apache.commons.lang3.tuple.Pair)2 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)2 CountCollectorTestOperator (org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator)2