Examples with VectorMapJoinDesc - org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc

Example 1 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class Vectorizer method canSpecializeMapJoin.

private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinInfo vectorMapJoinInfo) throws HiveException {
    Preconditions.checkState(op instanceof MapJoinOperator);
    // Allocate a VectorReduceSinkDesc initially with implementation type NONE so EXPLAIN
    // can report this operator was vectorized, but not native.  And, the conditions.
    VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc();
    desc.setVectorDesc(vectorDesc);
    boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean oneMapJoinCondition = (desc.getConds().length == 1);
    boolean hasNullSafes = onExpressionHasNullSafes(desc);
    byte posBigTable = (byte) desc.getPosBigTable();
    // Since we want to display all the met and not met conditions in EXPLAIN, we determine all
    // information first....
    List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
    VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc);
    final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length;
    // Assume.
    boolean supportsKeyTypes = true;
    HashSet<String> notSupportedKeyTypes = new HashSet<String>();
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength];
    String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength];
    TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength];
    ArrayList<VectorExpression> bigTableKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] bigTableKeyExpressions;
    for (int i = 0; i < allBigTableKeyExpressionsLength; i++) {
        VectorExpression ve = allBigTableKeyExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableKeyExpressionsList.add(ve);
        }
        bigTableKeyColumnMap[i] = ve.getOutputColumn();
        ExprNodeDesc exprNode = keyDesc.get(i);
        bigTableKeyColumnNames[i] = exprNode.toString();
        TypeInfo typeInfo = exprNode.getTypeInfo();
        // same check used in HashTableLoader.
        if (!MapJoinKey.isSupportedField(typeInfo)) {
            supportsKeyTypes = false;
            Category category = typeInfo.getCategory();
            notSupportedKeyTypes.add((category != Category.PRIMITIVE ? category.toString() : ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString()));
        }
        bigTableKeyTypeInfos[i] = typeInfo;
    }
    if (bigTableKeyExpressionsList.size() == 0) {
        bigTableKeyExpressions = null;
    } else {
        bigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable);
    VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs);
    boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
    // Especially since LLAP is prone to turn it off in the MapJoinDesc in later
    // physical optimizer stages...
    boolean isHybridHashJoin = desc.isHybridHashJoin();
    /*
     * Populate vectorMapJoininfo.
     */
    /*
     * Similarly, we need a mapping since a value expression can be a calculation and the value
     * will go into a scratch column.
     */
    int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length];
    String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length];
    TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length];
    ArrayList<VectorExpression> bigTableValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] bigTableValueExpressions;
    for (int i = 0; i < bigTableValueColumnMap.length; i++) {
        VectorExpression ve = allBigTableValueExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableValueExpressionsList.add(ve);
        }
        bigTableValueColumnMap[i] = ve.getOutputColumn();
        ExprNodeDesc exprNode = bigTableExprs.get(i);
        bigTableValueColumnNames[i] = exprNode.toString();
        bigTableValueTypeInfos[i] = exprNode.getTypeInfo();
    }
    if (bigTableValueExpressionsList.size() == 0) {
        bigTableValueExpressions = null;
    } else {
        bigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap);
    vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames);
    vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos);
    vectorMapJoinInfo.setBigTableKeyExpressions(bigTableKeyExpressions);
    vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap);
    vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames);
    vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos);
    vectorMapJoinInfo.setBigTableValueExpressions(bigTableValueExpressions);
    /*
     * Small table information.
     */
    VectorColumnOutputMapping bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping");
    VectorColumnOutputMapping bigTableOuterKeyMapping = new VectorColumnOutputMapping("Big Table Outer Key Mapping");
    // The order of the fields in the LazyBinary small table value must be used, so
    // we use the source ordering flavor for the mapping.
    VectorColumnSourceMapping smallTableMapping = new VectorColumnSourceMapping("Small Table Mapping");
    Byte[] order = desc.getTagOrder();
    Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
    boolean isOuterJoin = !desc.getNoOuterJoin();
    /*
     * Gather up big and small table output result information from the MapJoinDesc.
     */
    List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable);
    int bigTableRetainSize = bigTableRetainList.size();
    int[] smallTableIndices;
    int smallTableIndicesSize;
    List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
    if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
        smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
        smallTableIndicesSize = smallTableIndices.length;
    } else {
        smallTableIndices = null;
        smallTableIndicesSize = 0;
    }
    List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
    int smallTableRetainSize = smallTableRetainList.size();
    int smallTableResultSize = 0;
    if (smallTableIndicesSize > 0) {
        smallTableResultSize = smallTableIndicesSize;
    } else if (smallTableRetainSize > 0) {
        smallTableResultSize = smallTableRetainSize;
    }
    /*
     * Determine the big table retained mapping first so we can optimize out (with
     * projection) copying inner join big table keys in the subsequent small table results section.
     */
    // We use a mapping object here so we can build the projection in any order and
    // get the ordered by 0 to n-1 output columns at the end.
    //
    // Also, to avoid copying a big table key into the small table result area for inner joins,
    // we reference it with the projection so there can be duplicate output columns
    // in the projection.
    VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
    int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize);
    for (int i = 0; i < bigTableRetainSize; i++) {
        // Since bigTableValueExpressions may do a calculation and produce a scratch column, we
        // need to map to the right batch column.
        int retainColumn = bigTableRetainList.get(i);
        int batchColumnIndex = bigTableValueColumnMap[retainColumn];
        TypeInfo typeInfo = bigTableValueTypeInfos[i];
        // With this map we project the big table batch to make it look like an output batch.
        projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
        // Collect columns we copy from the big table batch to the overflow batch.
        if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) {
            // Tolerate repeated use of a big table column.
            bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
        }
        nextOutputColumn++;
    }
    /*
     * Now determine the small table results.
     */
    boolean smallTableExprVectorizes = true;
    int firstSmallTableOutputColumn;
    firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0);
    int smallTableOutputCount = 0;
    nextOutputColumn = firstSmallTableOutputColumn;
    // Small table indices has more information (i.e. keys) than retain, so use it if it exists...
    String[] bigTableRetainedNames;
    if (smallTableIndicesSize > 0) {
        smallTableOutputCount = smallTableIndicesSize;
        bigTableRetainedNames = new String[smallTableOutputCount];
        for (int i = 0; i < smallTableIndicesSize; i++) {
            if (smallTableIndices[i] >= 0) {
                // Zero and above numbers indicate a big table key is needed for
                // small table result "area".
                int keyIndex = smallTableIndices[i];
                // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we
                // need to map the right column.
                int batchKeyColumn = bigTableKeyColumnMap[keyIndex];
                bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex];
                TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex];
                if (!isOuterJoin) {
                    // Optimize inner join keys of small table results.
                    // Project the big table key into the small table result "area".
                    projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo);
                    if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) {
                        // If necessary, copy the big table key into the overflow batch's small table
                        // result "area".
                        bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo);
                    }
                } else {
                    // For outer joins, since the small table key can be null when there is no match,
                    // we must have a physical (scratch) column for those keys.  We cannot use the
                    // projection optimization used by inner joins above.
                    int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                    projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                    bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo);
                    bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo);
                }
            } else {
                // Negative numbers indicate a column to be (deserialize) read from the small table's
                // LazyBinary value row.
                int smallTableValueIndex = -smallTableIndices[i] - 1;
                ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
                if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                    clearNotVectorizedReason();
                    smallTableExprVectorizes = false;
                }
                bigTableRetainedNames[i] = smallTableExprNode.toString();
                TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
                // Make a new big table scratch column for the small table value.
                int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            }
            nextOutputColumn++;
        }
    } else if (smallTableRetainSize > 0) {
        smallTableOutputCount = smallTableRetainSize;
        bigTableRetainedNames = new String[smallTableOutputCount];
        for (int i = 0; i < smallTableRetainSize; i++) {
            int smallTableValueIndex = smallTableRetainList.get(i);
            ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
            if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                clearNotVectorizedReason();
                smallTableExprVectorizes = false;
            }
            bigTableRetainedNames[i] = smallTableExprNode.toString();
            // Make a new big table scratch column for the small table value.
            TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
            int scratchColumn = vContext.allocateScratchColumn(typeInfo);
            projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
            smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            nextOutputColumn++;
        }
    } else {
        bigTableRetainedNames = new String[0];
    }
    boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    // Remember the condition variables for EXPLAIN regardless of whether we specialize or not.
    vectorDesc.setUseOptimizedTable(useOptimizedTable);
    vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setOneMapJoinCondition(oneMapJoinCondition);
    vectorDesc.setHasNullSafes(hasNullSafes);
    vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes);
    vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled);
    vectorDesc.setIsHybridHashJoin(isHybridHashJoin);
    vectorDesc.setSupportsKeyTypes(supportsKeyTypes);
    if (!supportsKeyTypes) {
        vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes));
    }
    // Check common conditions for both Optimized and Fast Hash Tables.
    // Assume.
    boolean result = true;
    if (!useOptimizedTable || !isVectorizationMapJoinNativeEnabled || !isTezOrSpark || !oneMapJoinCondition || hasNullSafes || !smallTableExprVectorizes) {
        result = false;
    }
    if (!isFastHashTableEnabled) {
        // Check optimized-only hash table restrictions.
        if (!supportsKeyTypes) {
            result = false;
        }
    } else {
        if (isHybridHashJoin) {
            result = false;
        }
    }
    // Convert dynamic arrays and maps to simple arrays.
    bigTableRetainedMapping.finalize();
    bigTableOuterKeyMapping.finalize();
    smallTableMapping.finalize();
    vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping);
    vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping);
    vectorMapJoinInfo.setSmallTableMapping(smallTableMapping);
    projectionMapping.finalize();
    // Verify we added an entry for each output.
    assert projectionMapping.isSourceSequenceGood();
    vectorMapJoinInfo.setProjectionMapping(projectionMapping);
    return result;
}

Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ArrayList(java.util.ArrayList) VectorColumnOutputMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VectorColumnSourceMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 2 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class HashTableLoader method init.

@Override
public void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf, MapJoinOperator joinOp) {
    this.context = context;
    this.hconf = hconf;
    this.joinOp = joinOp;
    this.desc = joinOp.getConf();
    if (desc.getVectorMode() && HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) {
        if (joinOp instanceof VectorizationOperator) {
            VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) ((VectorizationOperator) joinOp).getVectorDesc();
            useFastContainer = vectorDesc != null && vectorDesc.getHashTableImplementationType() == VectorMapJoinDesc.HashTableImplementationType.FAST;
        }
    }
}

Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator)

Example 3 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class VectorMapJoinFastTableContainer method createHashTables.

private VectorMapJoinFastHashTableContainerBase createHashTables(int newThreshold) {
    VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
    HashTableKind hashTableKind = vectorDesc.getHashTableKind();
    HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType();
    boolean isFullOuter = vectorDesc.getIsFullOuter();
    boolean minMaxEnabled = vectorDesc.getMinMaxEnabled();
    int writeBufferSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE);
    VectorMapJoinFastHashTableContainerBase htWrapper = null;
    switch(hashTableKeyType) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case DATE:
        case LONG:
            switch(hashTableKind) {
                case HASH_MAP:
                    htWrapper = new VectorMapJoinFastLongHashMapContainer(isFullOuter, minMaxEnabled, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, desc.getKeyTblDesc(), numHTs);
                    break;
                case HASH_MULTISET:
                    htWrapper = new VectorMapJoinFastLongHashMultiSetContainer(isFullOuter, minMaxEnabled, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, desc.getKeyTblDesc(), numHTs);
                    break;
                case HASH_SET:
                    htWrapper = new VectorMapJoinFastLongHashSetContainer(isFullOuter, minMaxEnabled, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, desc.getKeyTblDesc(), numHTs);
                    break;
            }
            break;
        case STRING:
            switch(hashTableKind) {
                case HASH_MAP:
                    htWrapper = new VectorMapJoinFastStringHashMapContainer(isFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, desc.getKeyTblDesc(), numHTs);
                    break;
                case HASH_MULTISET:
                    htWrapper = new VectorMapJoinFastStringHashMultiSetContainer(isFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, desc.getKeyTblDesc(), numHTs);
                    break;
                case HASH_SET:
                    htWrapper = new VectorMapJoinFastStringHashSetContainer(isFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, desc.getKeyTblDesc(), numHTs);
                    break;
            }
            break;
        case MULTI_KEY:
            switch(hashTableKind) {
                case HASH_MAP:
                    htWrapper = new VectorMapJoinFastMultiKeyHashMapContainer(isFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, numHTs);
                    break;
                case HASH_MULTISET:
                    htWrapper = new VectorMapJoinFastMultiKeyHashMultiSetContainer(isFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, numHTs);
                    break;
                case HASH_SET:
                    htWrapper = new VectorMapJoinFastMultiKeyHashSetContainer(isFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount, numHTs);
                    break;
            }
            break;
    }
    return htWrapper;
}

Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind)

Example 4 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class MapJoinTestConfig method createVectorMapJoinDesc.

public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription testDesc) {
    VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc();
    vectorDesc.setHashTableImplementationType(HashTableImplementationType.FAST);
    HashTableKind hashTableKind;
    switch(testDesc.vectorMapJoinVariation) {
        case INNER:
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        case INNER_BIG_ONLY:
            hashTableKind = HashTableKind.HASH_MULTISET;
            break;
        case LEFT_SEMI:
        case LEFT_ANTI:
            hashTableKind = HashTableKind.HASH_SET;
            break;
        case OUTER:
        case FULL_OUTER:
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        default:
            throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation);
    }
    vectorDesc.setHashTableKind(hashTableKind);
    // Assume.
    HashTableKeyType hashTableKeyType = HashTableKeyType.MULTI_KEY;
    if (testDesc.bigTableKeyTypeInfos.length == 1) {
        switch(((PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[0]).getPrimitiveCategory()) {
            case BOOLEAN:
                hashTableKeyType = HashTableKeyType.BOOLEAN;
                break;
            case BYTE:
                hashTableKeyType = HashTableKeyType.BYTE;
                break;
            case SHORT:
                hashTableKeyType = HashTableKeyType.SHORT;
                break;
            case INT:
                hashTableKeyType = HashTableKeyType.INT;
                break;
            case LONG:
                hashTableKeyType = HashTableKeyType.LONG;
                break;
            case STRING:
                hashTableKeyType = HashTableKeyType.STRING;
                break;
            default:
        }
    }
    vectorDesc.setHashTableKeyType(hashTableKeyType);
    vectorDesc.setVectorMapJoinVariation(testDesc.vectorMapJoinVariation);
    vectorDesc.setMinMaxEnabled(false);
    VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
    vectorMapJoinInfo.setBigTableKeyColumnMap(testDesc.bigTableKeyColumnNums);
    vectorMapJoinInfo.setBigTableKeyColumnNames(testDesc.bigTableKeyColumnNames);
    vectorMapJoinInfo.setBigTableKeyTypeInfos(testDesc.bigTableKeyTypeInfos);
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(null);
    vectorDesc.setAllBigTableKeyExpressions(null);
    vectorMapJoinInfo.setBigTableValueColumnMap(testDesc.bigTableColumnNums);
    vectorMapJoinInfo.setBigTableValueColumnNames(testDesc.bigTableColumnNames);
    vectorMapJoinInfo.setBigTableValueTypeInfos(testDesc.bigTableTypeInfos);
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null);
    vectorDesc.setAllBigTableValueExpressions(null);
    vectorMapJoinInfo.setBigTableFilterExpressions(new VectorExpression[0]);
    /*
     * Column mapping.
     */
    VectorColumnOutputMapping bigTableRetainMapping = new VectorColumnOutputMapping("Big Table Retain Mapping");
    VectorColumnOutputMapping nonOuterSmallTableKeyMapping = new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping");
    VectorColumnOutputMapping outerSmallTableKeyMapping = new VectorColumnOutputMapping("Outer Small Table Key Mapping");
    VectorColumnSourceMapping fullOuterSmallTableKeyMapping = new VectorColumnSourceMapping("Full Outer Small Table Key Mapping");
    VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
    int nextOutputColumn = 0;
    final int bigTableRetainedSize = testDesc.bigTableRetainColumnNums.length;
    for (int i = 0; i < bigTableRetainedSize; i++) {
        final int batchColumnIndex = testDesc.bigTableRetainColumnNums[i];
        TypeInfo typeInfo = testDesc.bigTableTypeInfos[i];
        projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
        // Collect columns we copy from the big table batch to the overflow batch.
        if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) {
            // Tolerate repeated use of a big table column.
            bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
        }
        nextOutputColumn++;
    }
    boolean isOuterJoin = (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER);
    int emulateScratchColumn = testDesc.bigTableTypeInfos.length;
    VectorColumnOutputMapping smallTableKeyOutputMapping = new VectorColumnOutputMapping("Small Table Key Output Mapping");
    final int smallTableKeyRetainSize = testDesc.smallTableRetainKeyColumnNums.length;
    for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) {
        final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i];
        final int bigTableKeyColumnNum = testDesc.bigTableKeyColumnNums[smallTableKeyColumnNum];
        TypeInfo keyTypeInfo = testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum];
        if (!isOuterJoin) {
            // Project the big table key into the small table result "area".
            projectionMapping.add(nextOutputColumn, bigTableKeyColumnNum, keyTypeInfo);
            if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumnNum)) {
                nonOuterSmallTableKeyMapping.add(bigTableKeyColumnNum, bigTableKeyColumnNum, keyTypeInfo);
            }
        } else {
            outerSmallTableKeyMapping.add(bigTableKeyColumnNum, emulateScratchColumn, keyTypeInfo);
            projectionMapping.add(nextOutputColumn, emulateScratchColumn, keyTypeInfo);
            // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key
            // into the output result.
            fullOuterSmallTableKeyMapping.add(smallTableKeyColumnNum, emulateScratchColumn, keyTypeInfo);
            emulateScratchColumn++;
        }
        nextOutputColumn++;
    }
    // The order of the fields in the LazyBinary small table value must be used, so
    // we use the source ordering flavor for the mapping.
    VectorColumnSourceMapping smallTableValueMapping = new VectorColumnSourceMapping("Small Table Value Mapping");
    for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
        smallTableValueMapping.add(i, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]);
        projectionMapping.add(nextOutputColumn, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]);
        emulateScratchColumn++;
        nextOutputColumn++;
    }
    // Convert dynamic arrays and maps to simple arrays.
    bigTableRetainMapping.finalize();
    vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns());
    vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos());
    nonOuterSmallTableKeyMapping.finalize();
    vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns());
    vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos());
    outerSmallTableKeyMapping.finalize();
    fullOuterSmallTableKeyMapping.finalize();
    vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping);
    vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping);
    smallTableValueMapping.finalize();
    vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping);
    projectionMapping.finalize();
    // Verify we added an entry for each output.
    assert projectionMapping.isSourceSequenceGood();
    vectorMapJoinInfo.setProjectionMapping(projectionMapping);
    if (projectionMapping.getCount() != testDesc.outputColumnNames.length) {
        throw new RuntimeException();
    }
    ;
    vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
    return vectorDesc;
}

Also used : HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorColumnOutputMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VectorColumnSourceMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping)

Example 5 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class MapJoinTestConfig method createMapJoin.

public static CreateMapJoinResult createMapJoin(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin, MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException {
    final Byte bigTablePos = 0;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
    MapJoinObjectSerDeContext valCtx = mapJoinTableContainerSerDe.getValueContext();
    MapJoinTableContainer mapJoinTableContainer = (isOriginalMapJoin ? new HashMapWrapper(testDesc.hiveConf, -1) : new MapJoinBytesTableContainer(testDesc.hiveConf, valCtx, testData.smallTableKeyHashMap.size(), 0));
    mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    MapJoinOperator operator;
    if (!isVectorMapJoin) {
        operator = new MapJoinOperator(new CompilationOpContext());
        operator.setConf(mapJoinDesc);
    } else {
        VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNameList);
        /*
      // UNDONE: Unclear this belonds in the input VectorizationContext...
      // Create scratch columns to hold small table results.
      for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
        vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]);
      }
      */
        // This is what the Vectorizer class does.
        VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
        byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
        VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
        vectorMapJoinDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions);
        Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
        VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
        vectorMapJoinDesc.setAllBigTableValueExpressions(allBigTableValueExpressions);
        List<ExprNodeDesc> bigTableFilters = mapJoinDesc.getFilters().get(bigTablePos);
        boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0);
        if (!isOuterAndFiltered) {
            operator = new VectorMapJoinOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
        } else {
            operator = new VectorMapJoinOuterFilteredOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
        }
    }
    HiveConf.setBoolVar(testDesc.hiveConf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true);
    return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe);
}

Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) HashMapWrapper(org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)13 ArrayList (java.util.ArrayList)7 List (java.util.List)6 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)6 HashTableKeyType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType)5 HashTableKind (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind)5 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)4 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)4 MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)4 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)4 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 VectorMapJoinOuterFilteredOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator)3 VectorMapJoinFastTableContainer (org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)3 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)2 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)2 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)2 MapJoinObjectSerDeContext (org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext)2