use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey in project hive by apache.
the class MapJoinOperator method process.
@Override
public void process(Object row, int tag) throws HiveException {
try {
alias = (byte) tag;
if (hashMapRowGetters == null) {
hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
MapJoinKey refKey = getRefKey(alias);
for (byte pos = 0; pos < order.length; pos++) {
if (pos != alias) {
hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey);
}
}
}
// coming from the spilled matchfile. We need to recreate hashMapRowGetter against new hashtables
if (hybridMapJoinLeftover) {
MapJoinKey refKey = getRefKey(alias);
for (byte pos = 0; pos < order.length; pos++) {
if (pos != alias && spilledMapJoinTables[pos] != null) {
hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey);
}
}
}
// compute keys and values as StandardObjects
ReusableGetAdaptor firstSetKey = null;
int fieldCount = joinKeys[alias].size();
boolean joinNeeded = false;
boolean bigTableRowSpilled = false;
for (byte pos = 0; pos < order.length; pos++) {
if (pos != alias) {
JoinUtil.JoinResult joinResult;
ReusableGetAdaptor adaptor;
if (firstSetKey == null) {
adaptor = firstSetKey = hashMapRowGetters[pos];
joinResult = setMapJoinKey(firstSetKey, row, alias);
} else {
// Keys for all tables are the same, so only the first has to deserialize them.
adaptor = hashMapRowGetters[pos];
joinResult = adaptor.setFromOther(firstSetKey);
}
MapJoinRowContainer rowContainer = adaptor.getCurrentRows();
if (joinResult != JoinUtil.JoinResult.MATCH) {
assert (rowContainer == null || !rowContainer.hasRows()) : "Expecting an empty result set for no match";
}
if (rowContainer != null && unwrapContainer[pos] != null) {
Object[] currentKey = firstSetKey.getCurrentKey();
rowContainer = unwrapContainer[pos].setInternal(rowContainer, currentKey);
}
// there is no join-value or join-key has all null elements
if (rowContainer == null || firstSetKey.hasAnyNulls(fieldCount, nullsafes)) {
if (!noOuterJoin) {
// we only keep the LEFT side if the row is not spilled
if (!conf.isHybridHashJoin() || hybridMapJoinLeftover || (joinResult != JoinUtil.JoinResult.SPILL && !bigTableRowSpilled)) {
joinNeeded = true;
storage[pos] = dummyObjVectors[pos];
} else {
joinNeeded = false;
}
} else {
storage[pos] = emptyList;
}
} else {
joinNeeded = true;
storage[pos] = rowContainer.copy();
aliasFilterTags[pos] = rowContainer.getAliasFilter();
}
// postpone the join processing for this pair by also spilling this big table row.
if (joinResult == JoinUtil.JoinResult.SPILL && !bigTableRowSpilled) {
// For n-way join, only spill big table rows once
spillBigTableRow(mapJoinTables[pos], row);
bigTableRowSpilled = true;
}
}
}
if (joinNeeded) {
List<Object> value = getFilteredValue(alias, row);
// Add the value to the ArrayList
storage[alias].addRow(value);
// generate the output records
checkAndGenObject();
}
// done with the row
storage[tag].clearRows();
for (byte pos = 0; pos < order.length; pos++) {
if (pos != tag) {
storage[pos] = null;
}
}
} catch (Exception e) {
String msg = "Unexpected exception from " + this.getClass().getSimpleName() + " : " + e.getMessage();
LOG.error(msg, e);
throw new HiveException(msg, e);
}
}
use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey in project hive by apache.
the class VectorMapJoinOptimizedCreateHashTable method createHashTable.
public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, MapJoinTableContainer mapJoinTableContainer) {
MapJoinKey refKey = mapJoinTableContainer.getAnyKey();
ReusableGetAdaptor hashMapRowGetter = mapJoinTableContainer.createGetter(refKey);
boolean isOuterJoin = !desc.isNoOuterJoin();
VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
HashTableKind hashTableKind = vectorDesc.hashTableKind();
HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType();
boolean minMaxEnabled = vectorDesc.minMaxEnabled();
VectorMapJoinOptimizedHashTable hashTable = null;
switch(hashTableKeyType) {
case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinOptimizedLongHashMap(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter);
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinOptimizedLongHashMultiSet(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter);
break;
case HASH_SET:
hashTable = new VectorMapJoinOptimizedLongHashSet(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter);
break;
}
break;
case STRING:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinOptimizedStringHashMap(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinOptimizedStringHashMultiSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
case HASH_SET:
hashTable = new VectorMapJoinOptimizedStringHashSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
}
break;
case MULTI_KEY:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinOptimizedMultiKeyHashMap(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinOptimizedMultiKeyHashMultiSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
case HASH_SET:
hashTable = new VectorMapJoinOptimizedMultiKeyHashSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
}
break;
}
return hashTable;
}
Aggregations