Search in sources :

Example 6 with VectorMapJoinVariation

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation in project hive by apache.

the class TestMapJoinOperator method testMultiKey3.

@Test
public void testMultiKey3() throws Exception {
    long seed = 87543;
    int hiveConfVariation = 0;
    boolean hiveConfVariationsDone = false;
    do {
        for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) {
            hiveConfVariationsDone = doTestMultiKey3(seed, hiveConfVariation, vectorMapJoinVariation, MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
        }
        seed++;
        hiveConfVariation++;
    } while (!hiveConfVariationsDone);
}
Also used : VectorMapJoinVariation(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation) Test(org.junit.Test)

Example 7 with VectorMapJoinVariation

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation in project hive by apache.

the class TestMapJoinOperator method testLong3.

@Test
public void testLong3() throws Exception {
    long seed = 9934;
    int rowCount = 10;
    int hiveConfVariation = 0;
    boolean hiveConfVariationsDone = false;
    do {
        for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) {
            hiveConfVariationsDone = doTestLong3(seed, rowCount, hiveConfVariation, vectorMapJoinVariation, MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
        }
        seed++;
        hiveConfVariation++;
    } while (!hiveConfVariationsDone);
}
Also used : VectorMapJoinVariation(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation) Test(org.junit.Test)

Example 8 with VectorMapJoinVariation

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation in project hive by apache.

the class Vectorizer method specializeMapJoinOperator.

Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinDesc vectorDesc) throws HiveException {
    Operator<? extends OperatorDesc> vectorOp = null;
    Class<? extends Operator<?>> opClass = null;
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
    HashTableKind hashTableKind = HashTableKind.NONE;
    HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
    VectorMapJoinVariation vectorMapJoinVariation = null;
    if (vectorDesc.getIsFastHashTableEnabled()) {
        hashTableImplementationType = HashTableImplementationType.FAST;
    } else {
        hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
    }
    int joinType = desc.getConds()[0].getType();
    boolean isInnerBigOnly = false;
    if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
        isInnerBigOnly = true;
    }
    // By default, we can always use the multi-key class.
    hashTableKeyType = HashTableKeyType.MULTI_KEY;
    if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
        // Look for single column optimization.
        byte posBigTable = (byte) desc.getPosBigTable();
        Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
        List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
        if (bigTableKeyExprs.size() == 1) {
            TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo();
            LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName());
            switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
                case BOOLEAN:
                    hashTableKeyType = HashTableKeyType.BOOLEAN;
                    break;
                case BYTE:
                    hashTableKeyType = HashTableKeyType.BYTE;
                    break;
                case SHORT:
                    hashTableKeyType = HashTableKeyType.SHORT;
                    break;
                case INT:
                    hashTableKeyType = HashTableKeyType.INT;
                    break;
                case DATE:
                    hashTableKeyType = HashTableKeyType.DATE;
                    break;
                case LONG:
                    hashTableKeyType = HashTableKeyType.LONG;
                    break;
                case STRING:
                case CHAR:
                case VARCHAR:
                case BINARY:
                    hashTableKeyType = HashTableKeyType.STRING;
                default:
            }
        }
    }
    switch(joinType) {
        case JoinDesc.INNER_JOIN:
            if (!isInnerBigOnly) {
                vectorMapJoinVariation = VectorMapJoinVariation.INNER;
                hashTableKind = HashTableKind.HASH_MAP;
            } else {
                vectorMapJoinVariation = VectorMapJoinVariation.INNER_BIG_ONLY;
                hashTableKind = HashTableKind.HASH_MULTISET;
            }
            break;
        case JoinDesc.LEFT_OUTER_JOIN:
        case JoinDesc.RIGHT_OUTER_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.OUTER;
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        case JoinDesc.FULL_OUTER_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.FULL_OUTER;
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        case JoinDesc.LEFT_SEMI_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI;
            hashTableKind = HashTableKind.HASH_SET;
            break;
        case JoinDesc.ANTI_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.LEFT_ANTI;
            hashTableKind = HashTableKind.HASH_SET;
            break;
        default:
            throw new HiveException("Unknown join type " + joinType);
    }
    LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
    switch(hashTableKeyType) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case DATE:
        case LONG:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerLongOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiLongOperator.class;
                    break;
                case LEFT_ANTI:
                    opClass = VectorMapJoinAntiJoinLongOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterLongOperator.class;
                    break;
                case FULL_OUTER:
                    opClass = VectorMapJoinFullOuterLongOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        case STRING:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerStringOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiStringOperator.class;
                    break;
                case LEFT_ANTI:
                    opClass = VectorMapJoinAntiJoinStringOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterStringOperator.class;
                    break;
                case FULL_OUTER:
                    opClass = VectorMapJoinFullOuterStringOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        case MULTI_KEY:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerMultiKeyOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
                    break;
                case LEFT_ANTI:
                    opClass = VectorMapJoinAntiJoinMultiKeyOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterMultiKeyOperator.class;
                    break;
                case FULL_OUTER:
                    opClass = VectorMapJoinFullOuterMultiKeyOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        default:
            throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
    }
    boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    vectorDesc.setHashTableKind(hashTableKind);
    vectorDesc.setHashTableKeyType(hashTableKeyType);
    vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation);
    if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
        vectorDesc.setIsFullOuter(true);
    }
    vectorDesc.setMinMaxEnabled(minMaxEnabled);
    vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
    vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext, vectorDesc);
    LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
    return vectorOp;
}
Also used : VectorMapJoinVariation(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 9 with VectorMapJoinVariation

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation in project hive by apache.

the class TestMapJoinOperator method testMultiKey0.

@Test
public void testMultiKey0() throws Exception {
    long seed = 28322;
    int hiveConfVariation = 0;
    boolean hiveConfVariationsDone = false;
    do {
        for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) {
            hiveConfVariationsDone = doTestMultiKey0(seed, hiveConfVariation, vectorMapJoinVariation, MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
        }
        seed++;
        hiveConfVariation++;
    } while (!hiveConfVariationsDone);
}
Also used : VectorMapJoinVariation(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation) Test(org.junit.Test)

Example 10 with VectorMapJoinVariation

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation in project hive by apache.

the class TestMapJoinOperator method testLong4.

@Test
public void testLong4() throws Exception {
    long seed = 3982;
    int rowCount = 10;
    int hiveConfVariation = 0;
    boolean hiveConfVariationsDone = false;
    do {
        for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) {
            hiveConfVariationsDone = doTestLong4(seed, rowCount, hiveConfVariation, vectorMapJoinVariation, MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
        }
        seed++;
        hiveConfVariation++;
    } while (!hiveConfVariationsDone);
}
Also used : VectorMapJoinVariation(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation) Test(org.junit.Test)

Aggregations

VectorMapJoinVariation (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation)17 Test (org.junit.Test)16 Ignore (org.junit.Ignore)3 ArrayList (java.util.ArrayList)1 List (java.util.List)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 HashTableImplementationType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType)1 HashTableKeyType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType)1 HashTableKind (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind)1 VectorMapJoinInfo (org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo)1 UDFToByte (org.apache.hadoop.hive.ql.udf.UDFToByte)1 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)1 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)1 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1