Search in sources :

Example 11 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class VectorMapJoinOptimizedCreateHashTable method createHashTable.

public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, MapJoinTableContainer mapJoinTableContainer) {
    MapJoinKey refKey = mapJoinTableContainer.getAnyKey();
    ReusableGetAdaptor hashMapRowGetter = mapJoinTableContainer.createGetter(refKey);
    boolean isOuterJoin = !desc.isNoOuterJoin();
    // UNDONE
    VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
    HashTableKind hashTableKind = vectorDesc.getHashTableKind();
    HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType();
    boolean minMaxEnabled = vectorDesc.getMinMaxEnabled();
    VectorMapJoinOptimizedHashTable hashTable = null;
    switch(hashTableKeyType) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinOptimizedLongHashMap(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinOptimizedLongHashMultiSet(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinOptimizedLongHashSet(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
                    break;
            }
            break;
        case STRING:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinOptimizedStringHashMap(isOuterJoin, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinOptimizedStringHashMultiSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinOptimizedStringHashSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
                    break;
            }
            break;
        case MULTI_KEY:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinOptimizedMultiKeyHashMap(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinOptimizedMultiKeyHashMultiSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinOptimizedMultiKeyHashSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
            }
            break;
    }
    return hashTable;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinKey(org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind) ReusableGetAdaptor(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor)

Example 12 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class TestVectorMapJoinFastHashTable method runEstimationCheck.

private void runEstimationCheck(HashTableKeyType l) throws SerDeException, IOException, HiveException {
    MapJoinDesc desc = new MapJoinDesc();
    VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc();
    vectorDesc.setHashTableKeyType(l);
    vectorDesc.setIsFastHashTableEnabled(true);
    vectorDesc.setHashTableImplementationType(HashTableImplementationType.FAST);
    vectorDesc.setHashTableKind(HashTableKind.HASH_MAP);
    desc.setVectorDesc(vectorDesc);
    TableDesc keyTblDesc = new TableDesc();
    keyTblDesc.setProperties(new Properties());
    desc.setKeyTblDesc(keyTblDesc);
    Configuration hconf = new HiveConf();
    VectorMapJoinFastTableContainer container = new VectorMapJoinFastTableContainer(desc, hconf, keyCount, 1);
    container.setSerde(null, null);
    long dataSize = 0;
    BinarySortableSerializeWrite bsw = new BinarySortableSerializeWrite(1);
    Output outp = new Output();
    BytesWritable key = new BytesWritable();
    BytesWritable value = new BytesWritable();
    for (int i = 0; i < keyCount; i++) {
        bsw.set(outp);
        bsw.writeLong(i);
        key = new BytesWritable(outp.getData(), outp.getLength());
        bsw.set(outp);
        bsw.writeLong(i * 2);
        value = new BytesWritable(outp.getData(), outp.getLength());
        container.putRow(key, value);
        dataSize += 8;
        dataSize += 8;
    }
    Statistics stat = new Statistics(keyCount, dataSize, 0, 0);
    Long realObjectSize = getObjectSize(container);
    Long executionEstimate = container.getEstimatedMemorySize();
    Long compilerEstimate = null;
    ConvertJoinMapJoin cjm = new ConvertJoinMapJoin();
    cjm.hashTableLoadFactor = .75f;
    switch(l) {
        case MULTI_KEY:
            compilerEstimate = cjm.computeOnlineDataSizeFastCompositeKeyed(stat);
            break;
        case LONG:
            compilerEstimate = cjm.computeOnlineDataSizeFastLongKeyed(stat);
            break;
    }
    LOG.info("stats: {}", stat);
    LOG.info("realObjectSize: {}", realObjectSize);
    LOG.info("executionEstimate : {}", executionEstimate);
    LOG.info("compilerEstimate: {}", compilerEstimate);
    checkRelativeError(realObjectSize, executionEstimate, .05);
    checkRelativeError(realObjectSize, compilerEstimate, .05);
    checkRelativeError(compilerEstimate, executionEstimate, .05);
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) Properties(java.util.Properties) Statistics(org.apache.hadoop.hive.ql.plan.Statistics) ConvertJoinMapJoin(org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorMapJoinFastTableContainer(org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 13 with VectorMapJoinDesc

use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.

the class MapJoinTestConfig method createNativeVectorMapJoin.

public static CreateMapJoinResult createNativeVectorMapJoin(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException {
    VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
    mapJoinDesc.setVectorDesc(vectorDesc);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    MapJoinTableContainer mapJoinTableContainer;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null;
    switch(vectorDesc.getHashTableImplementationType()) {
        case OPTIMIZED:
            mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
            mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
            mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
            break;
        case FAST:
            mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size(), 1);
            break;
        default:
            throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
    }
    // if (shareMapJoinTableContainer == null) {
    loadTableContainerData(testDesc, testData, mapJoinTableContainer);
    // } else {
    // setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer);
    // }
    VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
    byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
    VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
    Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
    VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
    vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
    VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
    HiveConf.setBoolVar(testDesc.hiveConf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true);
    return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe);
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinBytesTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) VectorMapJoinFastTableContainer(org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) List(java.util.List) ArrayList(java.util.ArrayList) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)

Aggregations

VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)13 ArrayList (java.util.ArrayList)7 List (java.util.List)6 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)6 HashTableKeyType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType)5 HashTableKind (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind)5 MapJoinBytesTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer)4 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)4 MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)4 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)4 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 VectorMapJoinOuterFilteredOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator)3 VectorMapJoinFastTableContainer (org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)3 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)2 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)2 HashMapWrapper (org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper)2 MapJoinObjectSerDeContext (org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext)2