use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.
the class VectorMapJoinFastTableContainer method createHashTable.
private VectorMapJoinFastHashTable createHashTable(int newThreshold) {
boolean isOuterJoin = !desc.isNoOuterJoin();
VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
HashTableImplementationType hashTableImplementationType = vectorDesc.hashTableImplementationType();
HashTableKind hashTableKind = vectorDesc.hashTableKind();
HashTableKeyType hashTableKeyType = vectorDesc.hashTableKeyType();
boolean minMaxEnabled = vectorDesc.minMaxEnabled();
int writeBufferSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE);
VectorMapJoinFastHashTable hashTable = null;
switch(hashTableKeyType) {
case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinFastLongHashMap(minMaxEnabled, isOuterJoin, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinFastLongHashMultiSet(minMaxEnabled, isOuterJoin, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
case HASH_SET:
hashTable = new VectorMapJoinFastLongHashSet(minMaxEnabled, isOuterJoin, hashTableKeyType, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
}
break;
case STRING:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinFastStringHashMap(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinFastStringHashMultiSet(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
case HASH_SET:
hashTable = new VectorMapJoinFastStringHashSet(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
}
break;
case MULTI_KEY:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinFastMultiKeyHashMap(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinFastMultiKeyHashMultiSet(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
case HASH_SET:
hashTable = new VectorMapJoinFastMultiKeyHashSet(isOuterJoin, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount);
break;
}
break;
}
return hashTable;
}
use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.
the class Vectorizer method specializeMapJoinOperator.
Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinInfo vectorMapJoinInfo) throws HiveException {
Operator<? extends OperatorDesc> vectorOp = null;
Class<? extends Operator<?>> opClass = null;
VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
HashTableKind hashTableKind = HashTableKind.NONE;
HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
OperatorVariation operatorVariation = OperatorVariation.NONE;
if (vectorDesc.getIsFastHashTableEnabled()) {
hashTableImplementationType = HashTableImplementationType.FAST;
} else {
hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
}
int joinType = desc.getConds()[0].getType();
boolean isInnerBigOnly = false;
if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
isInnerBigOnly = true;
}
// By default, we can always use the multi-key class.
hashTableKeyType = HashTableKeyType.MULTI_KEY;
if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
// Look for single column optimization.
byte posBigTable = (byte) desc.getPosBigTable();
Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
if (bigTableKeyExprs.size() == 1) {
TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo();
LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName());
switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
case BOOLEAN:
hashTableKeyType = HashTableKeyType.BOOLEAN;
break;
case BYTE:
hashTableKeyType = HashTableKeyType.BYTE;
break;
case SHORT:
hashTableKeyType = HashTableKeyType.SHORT;
break;
case INT:
hashTableKeyType = HashTableKeyType.INT;
break;
case LONG:
hashTableKeyType = HashTableKeyType.LONG;
break;
case STRING:
case CHAR:
case VARCHAR:
case BINARY:
hashTableKeyType = HashTableKeyType.STRING;
default:
}
}
}
switch(joinType) {
case JoinDesc.INNER_JOIN:
if (!isInnerBigOnly) {
operatorVariation = OperatorVariation.INNER;
hashTableKind = HashTableKind.HASH_MAP;
} else {
operatorVariation = OperatorVariation.INNER_BIG_ONLY;
hashTableKind = HashTableKind.HASH_MULTISET;
}
break;
case JoinDesc.LEFT_OUTER_JOIN:
case JoinDesc.RIGHT_OUTER_JOIN:
operatorVariation = OperatorVariation.OUTER;
hashTableKind = HashTableKind.HASH_MAP;
break;
case JoinDesc.LEFT_SEMI_JOIN:
operatorVariation = OperatorVariation.LEFT_SEMI;
hashTableKind = HashTableKind.HASH_SET;
break;
default:
throw new HiveException("Unknown join type " + joinType);
}
LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
switch(hashTableKeyType) {
case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
switch(operatorVariation) {
case INNER:
opClass = VectorMapJoinInnerLongOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiLongOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterLongOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + operatorVariation);
}
break;
case STRING:
switch(operatorVariation) {
case INNER:
opClass = VectorMapJoinInnerStringOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiStringOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterStringOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + operatorVariation);
}
break;
case MULTI_KEY:
switch(operatorVariation) {
case INNER:
opClass = VectorMapJoinInnerMultiKeyOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterMultiKeyOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + operatorVariation);
}
break;
default:
throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
}
boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
vectorDesc.setHashTableImplementationType(hashTableImplementationType);
vectorDesc.setHashTableKind(hashTableKind);
vectorDesc.setHashTableKeyType(hashTableKeyType);
vectorDesc.setOperatorVariation(operatorVariation);
vectorDesc.setMinMaxEnabled(minMaxEnabled);
vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
return vectorOp;
}
Aggregations