Search in sources :

Example 6 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class VectorMapJoinFastTableContainer method createHashTable.

private VectorMapJoinFastHashTable createHashTable(int newThreshold) {
    boolean isOuterJoin = !desc.isNoOuterJoin();
    VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
    HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType();
    HashTableKind hashTableKind = vectorDesc.hashTableKind();
    HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType();
    boolean minMaxEnabled = vectorDesc.minMaxEnabled();
    int writeBufferSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE);
    VectorMapJoinFastHashTable hashTable = null;
    switch(hashTableKeyType) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinFastLongHashMap(minMaxEnabled, isOuterJoin, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinFastLongHashMultiSet(minMaxEnabled, isOuterJoin, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinFastLongHashSet(minMaxEnabled, isOuterJoin, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
            }
            break;
        case STRING:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinFastStringHashMap(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinFastStringHashMultiSet(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinFastStringHashSet(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
            }
            break;
        case MULTI_KEY:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinFastMultiKeyHashMap(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinFastMultiKeyHashMultiSet(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinFastMultiKeyHashSet(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
                    break;
            }
            break;
    }
    return hashTable;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind)

Example 7 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class Vectorizer method specializeMapJoinOperator.

Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinInfo vectorMapJoinInfo) throws HiveException {
    Operator<? extends OperatorDesc> vectorOp = null;
    Class<? extends Operator<?>> opClass = null;
    VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
    HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
    HashTableKind hashTableKind = HashTableKind.NONE;
    HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
    OperatorVariation operatorVariation = OperatorVariation.NONE;
    if (vectorDesc.getIsFastHashTableEnabled()) {
        hashTableImplementationType = HashTableImplementationType.FAST;
    } else {
        hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
    }
    int joinType = desc.getConds()[0].getType();
    boolean isInnerBigOnly = false;
    if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
        isInnerBigOnly = true;
    }
    // By default, we can always use the multi-key class.
    hashTableKeyType = HashTableKeyType.MULTI_KEY;
    if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
        // Look for single column optimization.
        byte posBigTable = (byte) desc.getPosBigTable();
        Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
        List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
        if (bigTableKeyExprs.size() == 1) {
            TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo();
            LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName());
            switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
                case BOOLEAN:
                    hashTableKeyType = HashTableKeyType.BOOLEAN;
                    break;
                case BYTE:
                    hashTableKeyType = HashTableKeyType.BYTE;
                    break;
                case SHORT:
                    hashTableKeyType = HashTableKeyType.SHORT;
                    break;
                case INT:
                    hashTableKeyType = HashTableKeyType.INT;
                    break;
                case LONG:
                    hashTableKeyType = HashTableKeyType.LONG;
                    break;
                case STRING:
                case CHAR:
                case VARCHAR:
                case BINARY:
                    hashTableKeyType = HashTableKeyType.STRING;
                default:
            }
        }
    }
    switch(joinType) {
        case JoinDesc.INNER_JOIN:
            if (!isInnerBigOnly) {
                operatorVariation = OperatorVariation.INNER;
                hashTableKind = HashTableKind.HASH_MAP;
            } else {
                operatorVariation = OperatorVariation.INNER_BIG_ONLY;
                hashTableKind = HashTableKind.HASH_MULTISET;
            }
            break;
        case JoinDesc.LEFT_OUTER_JOIN:
        case JoinDesc.RIGHT_OUTER_JOIN:
            operatorVariation = OperatorVariation.OUTER;
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        case JoinDesc.LEFT_SEMI_JOIN:
            operatorVariation = OperatorVariation.LEFT_SEMI;
            hashTableKind = HashTableKind.HASH_SET;
            break;
        default:
            throw new HiveException("Unknown join type " + joinType);
    }
    LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
    switch(hashTableKeyType) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            switch(operatorVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerLongOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiLongOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterLongOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + operatorVariation);
            }
            break;
        case STRING:
            switch(operatorVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerStringOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiStringOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterStringOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + operatorVariation);
            }
            break;
        case MULTI_KEY:
            switch(operatorVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerMultiKeyOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterMultiKeyOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + operatorVariation);
            }
            break;
        default:
            throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
    }
    boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    vectorDesc.setHashTableKind(hashTableKind);
    vectorDesc.setHashTableKeyType(hashTableKeyType);
    vectorDesc.setOperatorVariation(operatorVariation);
    vectorDesc.setMinMaxEnabled(minMaxEnabled);
    vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
    vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
    LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
    return vectorOp;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) OperatorVariation(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.OperatorVariation) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)7 HashTableImplementationType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType)4 HashTableKeyType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType)3 HashTableKind (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind)3 ArrayList (java.util.ArrayList)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 UDFToByte (org.apache.hadoop.hive.ql.udf.UDFToByte)2 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)2 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 HashSet (java.util.HashSet)1 List (java.util.List)1 HashTableLoader (org.apache.hadoop.hive.ql.exec.HashTableLoader)1 MapJoinKey (org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey)1 ReusableGetAdaptor (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor)1 VectorColumnOutputMapping (org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping)1 VectorColumnSourceMapping (org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping)1 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)1 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)1 VectorMapJoinFastHashTableLoader (org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader)1