Search in sources :

Example 1 with MapJoinKey

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey in project hive by apache.

the class MapJoinOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    try {
        alias = (byte) tag;
        if (hashMapRowGetters == null) {
            hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
            MapJoinKey refKey = getRefKey(alias);
            for (byte pos = 0; pos < order.length; pos++) {
                if (pos != alias) {
                    hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey);
                }
            }
        }
        // coming from the spilled matchfile. We need to recreate hashMapRowGetter against new hashtables
        if (hybridMapJoinLeftover) {
            MapJoinKey refKey = getRefKey(alias);
            for (byte pos = 0; pos < order.length; pos++) {
                if (pos != alias && spilledMapJoinTables[pos] != null) {
                    hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey);
                }
            }
        }
        // compute keys and values as StandardObjects
        ReusableGetAdaptor firstSetKey = null;
        int fieldCount = joinKeys[alias].size();
        boolean joinNeeded = false;
        boolean bigTableRowSpilled = false;
        for (byte pos = 0; pos < order.length; pos++) {
            if (pos != alias) {
                JoinUtil.JoinResult joinResult;
                ReusableGetAdaptor adaptor;
                if (firstSetKey == null) {
                    adaptor = firstSetKey = hashMapRowGetters[pos];
                    joinResult = setMapJoinKey(firstSetKey, row, alias);
                } else {
                    // Keys for all tables are the same, so only the first has to deserialize them.
                    adaptor = hashMapRowGetters[pos];
                    joinResult = adaptor.setFromOther(firstSetKey);
                }
                MapJoinRowContainer rowContainer = adaptor.getCurrentRows();
                if (joinResult != JoinUtil.JoinResult.MATCH) {
                    assert (rowContainer == null || !rowContainer.hasRows()) : "Expecting an empty result set for no match";
                }
                if (rowContainer != null && unwrapContainer[pos] != null) {
                    Object[] currentKey = firstSetKey.getCurrentKey();
                    rowContainer = unwrapContainer[pos].setInternal(rowContainer, currentKey);
                }
                // there is no join-value or join-key has all null elements
                if (rowContainer == null || firstSetKey.hasAnyNulls(fieldCount, nullsafes)) {
                    if (!noOuterJoin) {
                        // we only keep the LEFT side if the row is not spilled
                        if (!conf.isHybridHashJoin() || hybridMapJoinLeftover || (joinResult != JoinUtil.JoinResult.SPILL && !bigTableRowSpilled)) {
                            joinNeeded = true;
                            storage[pos] = dummyObjVectors[pos];
                        } else {
                            joinNeeded = false;
                        }
                    } else {
                        storage[pos] = emptyList;
                    }
                } else {
                    joinNeeded = true;
                    storage[pos] = rowContainer.copy();
                    aliasFilterTags[pos] = rowContainer.getAliasFilter();
                }
                // postpone the join processing for this pair by also spilling this big table row.
                if (joinResult == JoinUtil.JoinResult.SPILL && !bigTableRowSpilled) {
                    // For n-way join, only spill big table rows once
                    spillBigTableRow(mapJoinTables[pos], row);
                    bigTableRowSpilled = true;
                }
            }
        }
        if (joinNeeded) {
            List<Object> value = getFilteredValue(alias, row);
            // Add the value to the ArrayList
            storage[alias].addRow(value);
            // generate the output records
            checkAndGenObject();
        }
        // done with the row
        storage[tag].clearRows();
        for (byte pos = 0; pos < order.length; pos++) {
            if (pos != tag) {
                storage[pos] = null;
            }
        }
    } catch (Exception e) {
        String msg = "Unexpected exception from " + this.getClass().getSimpleName() + " : " + e.getMessage();
        LOG.error(msg, e);
        throw new HiveException(msg, e);
    }
}
Also used : MapJoinKey(org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) KryoException(com.esotericsoftware.kryo.KryoException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) MapJoinRowContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer) ReusableGetAdaptor(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor)

Example 2 with MapJoinKey

use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey in project hive by apache.

the class VectorMapJoinOptimizedCreateHashTable method createHashTable.

public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, MapJoinTableContainer mapJoinTableContainer) {
    MapJoinKey refKey = mapJoinTableContainer.getAnyKey();
    ReusableGetAdaptor hashMapRowGetter = mapJoinTableContainer.createGetter(refKey);
    boolean isOuterJoin = !desc.isNoOuterJoin();
    VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
    HashTableKind hashTableKind = vectorDesc.hashTableKind();
    HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType();
    boolean minMaxEnabled = vectorDesc.minMaxEnabled();
    VectorMapJoinOptimizedHashTable hashTable = null;
    switch(hashTableKeyType) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinOptimizedLongHashMap(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter);
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinOptimizedLongHashMultiSet(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter);
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinOptimizedLongHashSet(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter);
                    break;
            }
            break;
        case STRING:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinOptimizedStringHashMap(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinOptimizedStringHashMultiSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinOptimizedStringHashSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
            }
            break;
        case MULTI_KEY:
            switch(hashTableKind) {
                case HASH_MAP:
                    hashTable = new VectorMapJoinOptimizedMultiKeyHashMap(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
                case HASH_MULTISET:
                    hashTable = new VectorMapJoinOptimizedMultiKeyHashMultiSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
                case HASH_SET:
                    hashTable = new VectorMapJoinOptimizedMultiKeyHashSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
                    break;
            }
            break;
    }
    return hashTable;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinKey(org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind) ReusableGetAdaptor(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor)

Aggregations

MapJoinKey (org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey)2 ReusableGetAdaptor (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor)2 KryoException (com.esotericsoftware.kryo.KryoException)1 IOException (java.io.IOException)1 MapJoinRowContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)1 HashTableKeyType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType)1 HashTableKind (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1