Search in sources :

Example 1 with VectorMapJoinInfo

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.

the class Vectorizer method specializeMapJoinOperator.

Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinDesc vectorDesc) throws HiveException {
    Operator<? extends OperatorDesc> vectorOp = null;
    Class<? extends Operator<?>> opClass = null;
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
    HashTableKind hashTableKind = HashTableKind.NONE;
    HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
    VectorMapJoinVariation vectorMapJoinVariation = null;
    if (vectorDesc.getIsFastHashTableEnabled()) {
        hashTableImplementationType = HashTableImplementationType.FAST;
    } else {
        hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
    }
    int joinType = desc.getConds()[0].getType();
    boolean isInnerBigOnly = false;
    if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
        isInnerBigOnly = true;
    }
    // By default, we can always use the multi-key class.
    hashTableKeyType = HashTableKeyType.MULTI_KEY;
    if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
        // Look for single column optimization.
        byte posBigTable = (byte) desc.getPosBigTable();
        Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
        List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
        if (bigTableKeyExprs.size() == 1) {
            TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo();
            LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName());
            switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
                case BOOLEAN:
                    hashTableKeyType = HashTableKeyType.BOOLEAN;
                    break;
                case BYTE:
                    hashTableKeyType = HashTableKeyType.BYTE;
                    break;
                case SHORT:
                    hashTableKeyType = HashTableKeyType.SHORT;
                    break;
                case INT:
                    hashTableKeyType = HashTableKeyType.INT;
                    break;
                case DATE:
                    hashTableKeyType = HashTableKeyType.DATE;
                    break;
                case LONG:
                    hashTableKeyType = HashTableKeyType.LONG;
                    break;
                case STRING:
                case CHAR:
                case VARCHAR:
                case BINARY:
                    hashTableKeyType = HashTableKeyType.STRING;
                default:
            }
        }
    }
    switch(joinType) {
        case JoinDesc.INNER_JOIN:
            if (!isInnerBigOnly) {
                vectorMapJoinVariation = VectorMapJoinVariation.INNER;
                hashTableKind = HashTableKind.HASH_MAP;
            } else {
                vectorMapJoinVariation = VectorMapJoinVariation.INNER_BIG_ONLY;
                hashTableKind = HashTableKind.HASH_MULTISET;
            }
            break;
        case JoinDesc.LEFT_OUTER_JOIN:
        case JoinDesc.RIGHT_OUTER_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.OUTER;
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        case JoinDesc.FULL_OUTER_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.FULL_OUTER;
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        case JoinDesc.LEFT_SEMI_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI;
            hashTableKind = HashTableKind.HASH_SET;
            break;
        case JoinDesc.ANTI_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.LEFT_ANTI;
            hashTableKind = HashTableKind.HASH_SET;
            break;
        default:
            throw new HiveException("Unknown join type " + joinType);
    }
    LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
    switch(hashTableKeyType) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case DATE:
        case LONG:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerLongOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiLongOperator.class;
                    break;
                case LEFT_ANTI:
                    opClass = VectorMapJoinAntiJoinLongOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterLongOperator.class;
                    break;
                case FULL_OUTER:
                    opClass = VectorMapJoinFullOuterLongOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        case STRING:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerStringOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiStringOperator.class;
                    break;
                case LEFT_ANTI:
                    opClass = VectorMapJoinAntiJoinStringOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterStringOperator.class;
                    break;
                case FULL_OUTER:
                    opClass = VectorMapJoinFullOuterStringOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        case MULTI_KEY:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerMultiKeyOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
                    break;
                case LEFT_ANTI:
                    opClass = VectorMapJoinAntiJoinMultiKeyOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterMultiKeyOperator.class;
                    break;
                case FULL_OUTER:
                    opClass = VectorMapJoinFullOuterMultiKeyOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        default:
            throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
    }
    boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    vectorDesc.setHashTableKind(hashTableKind);
    vectorDesc.setHashTableKeyType(hashTableKeyType);
    vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation);
    if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
        vectorDesc.setIsFullOuter(true);
    }
    vectorDesc.setMinMaxEnabled(minMaxEnabled);
    vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
    vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext, vectorDesc);
    LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
    return vectorOp;
}
Also used : VectorMapJoinVariation(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 2 with VectorMapJoinInfo

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.

the class MapJoinTestConfig method createVectorMapJoinDesc.

public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription testDesc) {
    VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc();
    vectorDesc.setHashTableImplementationType(HashTableImplementationType.FAST);
    HashTableKind hashTableKind;
    switch(testDesc.vectorMapJoinVariation) {
        case INNER:
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        case INNER_BIG_ONLY:
            hashTableKind = HashTableKind.HASH_MULTISET;
            break;
        case LEFT_SEMI:
        case LEFT_ANTI:
            hashTableKind = HashTableKind.HASH_SET;
            break;
        case OUTER:
        case FULL_OUTER:
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        default:
            throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation);
    }
    vectorDesc.setHashTableKind(hashTableKind);
    // Assume.
    HashTableKeyType hashTableKeyType = HashTableKeyType.MULTI_KEY;
    if (testDesc.bigTableKeyTypeInfos.length == 1) {
        switch(((PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[0]).getPrimitiveCategory()) {
            case BOOLEAN:
                hashTableKeyType = HashTableKeyType.BOOLEAN;
                break;
            case BYTE:
                hashTableKeyType = HashTableKeyType.BYTE;
                break;
            case SHORT:
                hashTableKeyType = HashTableKeyType.SHORT;
                break;
            case INT:
                hashTableKeyType = HashTableKeyType.INT;
                break;
            case LONG:
                hashTableKeyType = HashTableKeyType.LONG;
                break;
            case STRING:
                hashTableKeyType = HashTableKeyType.STRING;
                break;
            default:
        }
    }
    vectorDesc.setHashTableKeyType(hashTableKeyType);
    vectorDesc.setVectorMapJoinVariation(testDesc.vectorMapJoinVariation);
    vectorDesc.setMinMaxEnabled(false);
    VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
    vectorMapJoinInfo.setBigTableKeyColumnMap(testDesc.bigTableKeyColumnNums);
    vectorMapJoinInfo.setBigTableKeyColumnNames(testDesc.bigTableKeyColumnNames);
    vectorMapJoinInfo.setBigTableKeyTypeInfos(testDesc.bigTableKeyTypeInfos);
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(null);
    vectorDesc.setAllBigTableKeyExpressions(null);
    vectorMapJoinInfo.setBigTableValueColumnMap(testDesc.bigTableColumnNums);
    vectorMapJoinInfo.setBigTableValueColumnNames(testDesc.bigTableColumnNames);
    vectorMapJoinInfo.setBigTableValueTypeInfos(testDesc.bigTableTypeInfos);
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null);
    vectorDesc.setAllBigTableValueExpressions(null);
    vectorMapJoinInfo.setBigTableFilterExpressions(new VectorExpression[0]);
    /*
     * Column mapping.
     */
    VectorColumnOutputMapping bigTableRetainMapping = new VectorColumnOutputMapping("Big Table Retain Mapping");
    VectorColumnOutputMapping nonOuterSmallTableKeyMapping = new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping");
    VectorColumnOutputMapping outerSmallTableKeyMapping = new VectorColumnOutputMapping("Outer Small Table Key Mapping");
    VectorColumnSourceMapping fullOuterSmallTableKeyMapping = new VectorColumnSourceMapping("Full Outer Small Table Key Mapping");
    VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
    int nextOutputColumn = 0;
    final int bigTableRetainedSize = testDesc.bigTableRetainColumnNums.length;
    for (int i = 0; i < bigTableRetainedSize; i++) {
        final int batchColumnIndex = testDesc.bigTableRetainColumnNums[i];
        TypeInfo typeInfo = testDesc.bigTableTypeInfos[i];
        projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
        // Collect columns we copy from the big table batch to the overflow batch.
        if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) {
            // Tolerate repeated use of a big table column.
            bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
        }
        nextOutputColumn++;
    }
    boolean isOuterJoin = (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER);
    int emulateScratchColumn = testDesc.bigTableTypeInfos.length;
    VectorColumnOutputMapping smallTableKeyOutputMapping = new VectorColumnOutputMapping("Small Table Key Output Mapping");
    final int smallTableKeyRetainSize = testDesc.smallTableRetainKeyColumnNums.length;
    for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) {
        final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i];
        final int bigTableKeyColumnNum = testDesc.bigTableKeyColumnNums[smallTableKeyColumnNum];
        TypeInfo keyTypeInfo = testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum];
        if (!isOuterJoin) {
            // Project the big table key into the small table result "area".
            projectionMapping.add(nextOutputColumn, bigTableKeyColumnNum, keyTypeInfo);
            if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumnNum)) {
                nonOuterSmallTableKeyMapping.add(bigTableKeyColumnNum, bigTableKeyColumnNum, keyTypeInfo);
            }
        } else {
            outerSmallTableKeyMapping.add(bigTableKeyColumnNum, emulateScratchColumn, keyTypeInfo);
            projectionMapping.add(nextOutputColumn, emulateScratchColumn, keyTypeInfo);
            // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key
            // into the output result.
            fullOuterSmallTableKeyMapping.add(smallTableKeyColumnNum, emulateScratchColumn, keyTypeInfo);
            emulateScratchColumn++;
        }
        nextOutputColumn++;
    }
    // The order of the fields in the LazyBinary small table value must be used, so
    // we use the source ordering flavor for the mapping.
    VectorColumnSourceMapping smallTableValueMapping = new VectorColumnSourceMapping("Small Table Value Mapping");
    for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
        smallTableValueMapping.add(i, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]);
        projectionMapping.add(nextOutputColumn, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]);
        emulateScratchColumn++;
        nextOutputColumn++;
    }
    // Convert dynamic arrays and maps to simple arrays.
    bigTableRetainMapping.finalize();
    vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns());
    vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos());
    nonOuterSmallTableKeyMapping.finalize();
    vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns());
    vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos());
    outerSmallTableKeyMapping.finalize();
    fullOuterSmallTableKeyMapping.finalize();
    vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping);
    vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping);
    smallTableValueMapping.finalize();
    vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping);
    projectionMapping.finalize();
    // Verify we added an entry for each output.
    assert projectionMapping.isSourceSequenceGood();
    vectorMapJoinInfo.setProjectionMapping(projectionMapping);
    if (projectionMapping.getCount() != testDesc.outputColumnNames.length) {
        throw new RuntimeException();
    }
    ;
    vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
    return vectorDesc;
}
Also used : HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorColumnOutputMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VectorColumnSourceMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping)

Example 3 with VectorMapJoinInfo

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.

the class Vectorizer method vectorizeOperator.

public Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException {
    Operator<? extends OperatorDesc> vectorOp = null;
    boolean isNative;
    switch(op.getType()) {
        case TABLESCAN:
            vectorOp = vectorizeTableScanOperator(op, vContext);
            isNative = true;
            break;
        case MAPJOIN:
            {
                if (op instanceof MapJoinOperator) {
                    VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
                    MapJoinDesc desc = (MapJoinDesc) op.getConf();
                    boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinInfo);
                    if (!specialize) {
                        Class<? extends Operator<?>> opClass = null;
                        // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
                        List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
                        boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
                        if (!isOuterAndFiltered) {
                            opClass = VectorMapJoinOperator.class;
                        } else {
                            opClass = VectorMapJoinOuterFilteredOperator.class;
                        }
                        vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
                        isNative = false;
                    } else {
                        // TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
                        // HiveConf.setBoolVar(physicalContext.getConf(),
                        //    HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
                        vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo);
                        isNative = true;
                        if (vectorTaskColumnInfo != null) {
                            if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                            if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                        }
                    }
                } else {
                    Preconditions.checkState(op instanceof SMBMapJoinOperator);
                    SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
                    VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
                    smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc);
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext);
                    isNative = false;
                }
            }
            break;
        case REDUCESINK:
            {
                VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
                ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf();
                boolean specialize = canSpecializeReduceSink(desc, isTezOrSpark, vContext, vectorReduceSinkInfo);
                if (!specialize) {
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), op.getConf(), vContext);
                    isNative = false;
                } else {
                    vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo);
                    isNative = true;
                    if (vectorTaskColumnInfo != null) {
                        if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                        if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
            }
            break;
        case FILTER:
            {
                vectorOp = vectorizeFilterOperator(op, vContext);
                isNative = true;
                if (vectorTaskColumnInfo != null) {
                    VectorFilterDesc vectorFilterDesc = (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
                    if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                }
            }
            break;
        case SELECT:
            {
                vectorOp = vectorizeSelectOperator(op, vContext);
                isNative = true;
                if (vectorTaskColumnInfo != null) {
                    VectorSelectDesc vectorSelectDesc = (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
                    if (usesVectorUDFAdaptor(vectorSelectExprs)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                }
            }
            break;
        case GROUPBY:
            {
                vectorOp = vectorizeGroupByOperator(op, vContext);
                isNative = false;
                if (vectorTaskColumnInfo != null) {
                    VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    if (!vectorGroupByDesc.isVectorOutput()) {
                        vectorTaskColumnInfo.setGroupByVectorOutput(false);
                    }
                    VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
                    if (usesVectorUDFAdaptor(vecKeyExpressions)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                    VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators();
                    for (VectorAggregateExpression vecAggr : vecAggregators) {
                        if (usesVectorUDFAdaptor(vecAggr.inputExpression())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
            }
            break;
        case FILESINK:
            {
                FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
                VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
                fileSinkDesc.setVectorDesc(vectorFileSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext);
                isNative = false;
            }
            break;
        case LIMIT:
            {
                LimitDesc limitDesc = (LimitDesc) op.getConf();
                VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
                limitDesc.setVectorDesc(vectorLimitDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext);
                isNative = true;
            }
            break;
        case EVENT:
            {
                AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
                VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
                eventDesc.setVectorDesc(vectorEventDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext);
                isNative = true;
            }
            break;
        case HASHTABLESINK:
            {
                SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
                VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
                sparkHashTableSinkDesc.setVectorDesc(vectorSparkHashTableSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext);
                isNative = true;
            }
            break;
        case SPARKPRUNINGSINK:
            {
                SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
                VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
                sparkPartitionPruningSinkDesc.setVectorDesc(vectorSparkPartitionPruningSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext);
                isNative = true;
            }
            break;
        default:
            // These are children of GROUP BY operators with non-vector outputs.
            isNative = false;
            vectorOp = op;
            break;
    }
    Preconditions.checkState(vectorOp != null);
    if (vectorTaskColumnInfo != null && !isNative) {
        vectorTaskColumnInfo.setAllNative(false);
    }
    LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
    LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
    if (vectorOp != op) {
        fixupParentChildOperators(op, vectorOp);
        ((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
    }
    return vectorOp;
}
Also used : VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) AppMasterEventDesc(org.apache.hadoop.hive.ql.plan.AppMasterEventDesc) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) VectorReduceSinkInfo(org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) ArrayList(java.util.ArrayList) List(java.util.List) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) SparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 4 with VectorMapJoinInfo

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.

the class MapJoinTestConfig method createNativeVectorMapJoin.

public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) throws SerDeException, IOException, HiveException {
    VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
    // UNDONE
    mapJoinDesc.setVectorDesc(vectorDesc);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    MapJoinTableContainer mapJoinTableContainer;
    switch(vectorDesc.getHashTableImplementationType()) {
        case OPTIMIZED:
            mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
            MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
            mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
            break;
        case FAST:
            mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size());
            break;
        default:
            throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
    }
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
    byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
    VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
    Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
    VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
    VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
    MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
    operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null);
    return operator;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) VectorMapJoinFastTableContainer(org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)

Example 5 with VectorMapJoinInfo

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.

the class Vectorizer method canSpecializeMapJoin.

private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinDesc vectorDesc) throws HiveException {
    Preconditions.checkState(op instanceof MapJoinOperator);
    VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
    boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean oneMapJoinCondition = (desc.getConds().length == 1);
    boolean hasNullSafes = onExpressionHasNullSafes(desc);
    byte posBigTable = (byte) desc.getPosBigTable();
    // Since we want to display all the met and not met conditions in EXPLAIN, we determine all
    // information first....
    List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
    boolean outerJoinHasNoKeys = (!desc.isNoOuterJoin() && keyDesc.size() == 0);
    // For now, we don't support joins on or using DECIMAL_64.
    VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keyDesc);
    final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length;
    // Assume.
    boolean supportsKeyTypes = true;
    HashSet<String> notSupportedKeyTypes = new HashSet<String>();
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength];
    String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength];
    TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength];
    ArrayList<VectorExpression> bigTableKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] slimmedBigTableKeyExpressions;
    for (int i = 0; i < allBigTableKeyExpressionsLength; i++) {
        VectorExpression ve = allBigTableKeyExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableKeyExpressionsList.add(ve);
        }
        bigTableKeyColumnMap[i] = ve.getOutputColumnNum();
        ExprNodeDesc exprNode = keyDesc.get(i);
        bigTableKeyColumnNames[i] = exprNode.toString();
        TypeInfo typeInfo = exprNode.getTypeInfo();
        // same check used in HashTableLoader.
        if (!MapJoinKey.isSupportedField(typeInfo)) {
            supportsKeyTypes = false;
            Category category = typeInfo.getCategory();
            notSupportedKeyTypes.add((category != Category.PRIMITIVE ? category.toString() : ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString()));
        }
        bigTableKeyTypeInfos[i] = typeInfo;
    }
    if (bigTableKeyExpressionsList.size() == 0) {
        slimmedBigTableKeyExpressions = null;
    } else {
        slimmedBigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable);
    // For now, we don't support joins on or using DECIMAL_64.
    VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs);
    boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
    // Especially since LLAP is prone to turn it off in the MapJoinDesc in later
    // physical optimizer stages...
    boolean isHybridHashJoin = desc.isHybridHashJoin();
    /*
     * Populate vectorMapJoininfo.
     */
    /*
     * Similarly, we need a mapping since a value expression can be a calculation and the value
     * will go into a scratch column.
     *
     * Value expressions include keys? YES.
     */
    // Assume.
    boolean supportsValueTypes = true;
    HashSet<String> notSupportedValueTypes = new HashSet<String>();
    int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length];
    String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length];
    TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length];
    ArrayList<VectorExpression> bigTableValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] slimmedBigTableValueExpressions;
    for (int i = 0; i < bigTableValueColumnMap.length; i++) {
        VectorExpression ve = allBigTableValueExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableValueExpressionsList.add(ve);
        }
        bigTableValueColumnMap[i] = ve.getOutputColumnNum();
        ExprNodeDesc exprNode = bigTableExprs.get(i);
        bigTableValueColumnNames[i] = exprNode.toString();
        TypeInfo typeInfo = exprNode.getTypeInfo();
        if (!(typeInfo instanceof PrimitiveTypeInfo)) {
            supportsValueTypes = false;
            Category category = typeInfo.getCategory();
            notSupportedValueTypes.add(category.toString());
        }
        bigTableValueTypeInfos[i] = typeInfo;
    }
    if (bigTableValueExpressionsList.size() == 0) {
        slimmedBigTableValueExpressions = null;
    } else {
        slimmedBigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap);
    vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames);
    vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos);
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
    vectorDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions);
    vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap);
    vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames);
    vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos);
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
    vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions);
    /*
     * Column mapping.
     */
    VectorColumnOutputMapping bigTableRetainMapping = new VectorColumnOutputMapping("Big Table Retain Mapping");
    VectorColumnOutputMapping nonOuterSmallTableKeyMapping = new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping");
    VectorColumnOutputMapping outerSmallTableKeyMapping = new VectorColumnOutputMapping("Outer Small Table Key Mapping");
    VectorColumnSourceMapping fullOuterSmallTableKeyMapping = new VectorColumnSourceMapping("Full Outer Small Table Key Mapping");
    // The order of the fields in the LazyBinary small table value must be used, so
    // we use the source ordering flavor for the mapping.
    VectorColumnSourceMapping smallTableValueMapping = new VectorColumnSourceMapping("Small Table Value Mapping");
    Byte[] order = desc.getTagOrder();
    Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
    boolean isOuterJoin = !desc.getNoOuterJoin();
    /*
     * Gather up big and small table output result information from the MapJoinDesc.
     */
    List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable);
    int[] smallTableIndices;
    int smallTableIndicesSize;
    List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
    if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
        smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
        smallTableIndicesSize = smallTableIndices.length;
    } else {
        smallTableIndices = null;
        smallTableIndicesSize = 0;
    }
    List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
    int smallTableRetainSize = smallTableRetainList.size();
    int smallTableResultSize = 0;
    if (smallTableIndicesSize > 0) {
        smallTableResultSize = smallTableIndicesSize;
    } else if (smallTableRetainSize > 0) {
        smallTableResultSize = smallTableRetainSize;
    }
    /*
     * Determine the big table retained mapping first so we can optimize out (with
     * projection) copying inner join big table keys in the subsequent small table results section.
     */
    // We use a mapping object here so we can build the projection in any order and
    // get the ordered by 0 to n-1 output columns at the end.
    // 
    // Also, to avoid copying a big table key into the small table result area for inner joins,
    // we reference it with the projection so there can be duplicate output columns
    // in the projection.
    VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
    int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize);
    final int bigTableRetainSize = bigTableRetainList.size();
    for (int i = 0; i < bigTableRetainSize; i++) {
        // Since bigTableValueExpressions may do a calculation and produce a scratch column, we
        // need to map to the right batch column.
        int retainColumn = bigTableRetainList.get(i);
        int batchColumnIndex = bigTableValueColumnMap[retainColumn];
        TypeInfo typeInfo = bigTableValueTypeInfos[i];
        // With this map we project the big table batch to make it look like an output batch.
        projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
        // Collect columns we copy from the big table batch to the overflow batch.
        if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) {
            // Tolerate repeated use of a big table column.
            bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
        }
        nextOutputColumn++;
    }
    /*
     * Now determine the small table results.
     */
    boolean smallTableExprVectorizes = true;
    int firstSmallTableOutputColumn;
    firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0);
    nextOutputColumn = firstSmallTableOutputColumn;
    // Small table indices has more information (i.e. keys) than retain, so use it if it exists...
    if (smallTableIndicesSize > 0) {
        for (int i = 0; i < smallTableIndicesSize; i++) {
            if (smallTableIndices[i] >= 0) {
                // Zero and above numbers indicate a big table key is needed for
                // small table result "area".
                int keyIndex = smallTableIndices[i];
                // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we
                // need to map the right column.
                int bigTableKeyColumn = bigTableKeyColumnMap[keyIndex];
                TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex];
                if (!isOuterJoin) {
                    // Optimize inner join keys of small table results.
                    // Project the big table key into the small table result "area".
                    projectionMapping.add(nextOutputColumn, bigTableKeyColumn, typeInfo);
                    if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumn)) {
                        // When the Big Key is not retained in the output result, we do need to copy the
                        // Big Table key into the overflow batch so the projection of it (Big Table key) to
                        // the Small Table key will work properly...
                        // 
                        nonOuterSmallTableKeyMapping.add(bigTableKeyColumn, bigTableKeyColumn, typeInfo);
                    }
                } else {
                    // For outer joins, since the small table key can be null when there for NOMATCH,
                    // we must have a physical (scratch) column for those keys.  We cannot use the
                    // projection optimization used by non-[FULL} OUTER joins above.
                    int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                    projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                    outerSmallTableKeyMapping.add(bigTableKeyColumn, scratchColumn, typeInfo);
                    // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key
                    // into the output result.
                    fullOuterSmallTableKeyMapping.add(keyIndex, scratchColumn, typeInfo);
                }
            } else {
                // Negative numbers indicate a column to be (deserialize) read from the small table's
                // LazyBinary value row.
                int smallTableValueIndex = -smallTableIndices[i] - 1;
                ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
                if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                    clearNotVectorizedReason();
                    smallTableExprVectorizes = false;
                }
                TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
                // Make a new big table scratch column for the small table value.
                int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            }
            nextOutputColumn++;
        }
    } else if (smallTableRetainSize > 0) {
        for (int i = 0; i < smallTableRetainSize; i++) {
            int smallTableValueIndex = smallTableRetainList.get(i);
            ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
            if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                clearNotVectorizedReason();
                smallTableExprVectorizes = false;
            }
            // Make a new big table scratch column for the small table value.
            TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
            int scratchColumn = vContext.allocateScratchColumn(typeInfo);
            projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
            smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            nextOutputColumn++;
        }
    }
    Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
    VectorExpression[] bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER);
    vectorMapJoinInfo.setBigTableFilterExpressions(bigTableFilterExpressions);
    boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    // Remember the condition variables for EXPLAIN regardless of whether we specialize or not.
    vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
    vectorDesc.setUseOptimizedTable(useOptimizedTable);
    vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setOneMapJoinCondition(oneMapJoinCondition);
    vectorDesc.setHasNullSafes(hasNullSafes);
    vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes);
    vectorDesc.setOuterJoinHasNoKeys(outerJoinHasNoKeys);
    vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled);
    vectorDesc.setIsHybridHashJoin(isHybridHashJoin);
    vectorDesc.setSupportsKeyTypes(supportsKeyTypes);
    if (!supportsKeyTypes) {
        vectorDesc.setNotSupportedKeyTypes(new ArrayList<>(notSupportedKeyTypes));
    }
    vectorDesc.setSupportsValueTypes(supportsValueTypes);
    if (!supportsValueTypes) {
        vectorDesc.setNotSupportedValueTypes(new ArrayList<>(notSupportedValueTypes));
    }
    // Check common conditions for both Optimized and Fast Hash Tables.
    // Assume.
    boolean result = true;
    if (!useOptimizedTable || !isVectorizationMapJoinNativeEnabled || !isTezOrSpark || !oneMapJoinCondition || hasNullSafes || !smallTableExprVectorizes || outerJoinHasNoKeys || !supportsValueTypes) {
        result = false;
    }
    if (!isFastHashTableEnabled) {
        // Check optimized-only hash table restrictions.
        if (!supportsKeyTypes) {
            result = false;
        }
    } else {
        if (isHybridHashJoin) {
            result = false;
        }
    }
    // Convert dynamic arrays and maps to simple arrays.
    bigTableRetainMapping.finalize();
    vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns());
    vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos());
    nonOuterSmallTableKeyMapping.finalize();
    vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns());
    vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos());
    outerSmallTableKeyMapping.finalize();
    fullOuterSmallTableKeyMapping.finalize();
    vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping);
    vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping);
    smallTableValueMapping.finalize();
    vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping);
    projectionMapping.finalize();
    // Verify we added an entry for each output.
    assert projectionMapping.isSourceSequenceGood();
    vectorMapJoinInfo.setProjectionMapping(projectionMapping);
    return result;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ArrayList(java.util.ArrayList) VectorColumnOutputMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VectorColumnSourceMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Aggregations

VectorMapJoinInfo (org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo)6 ArrayList (java.util.ArrayList)5 List (java.util.List)5 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)4 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)2 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)2 MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)2 VectorColumnOutputMapping (org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping)2 VectorColumnSourceMapping (org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping)2 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)2 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)2 VectorMapJoinFastTableContainer (org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 HashTableKeyType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType)2 HashTableKind (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind)2 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 HashSet (java.util.HashSet)1 VectorMapJoinOuterFilteredOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator)1