use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.
the class VectorMapJoinOptimizedCreateHashTable method createHashTable.
public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, MapJoinTableContainer mapJoinTableContainer) {
MapJoinKey refKey = mapJoinTableContainer.getAnyKey();
ReusableGetAdaptor hashMapRowGetter = mapJoinTableContainer.createGetter(refKey);
boolean isOuterJoin = !desc.isNoOuterJoin();
// UNDONE
VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc();
HashTableKind hashTableKind = vectorDesc.getHashTableKind();
HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType();
boolean minMaxEnabled = vectorDesc.getMinMaxEnabled();
VectorMapJoinOptimizedHashTable hashTable = null;
switch(hashTableKeyType) {
case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinOptimizedLongHashMap(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinOptimizedLongHashMultiSet(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
break;
case HASH_SET:
hashTable = new VectorMapJoinOptimizedLongHashSet(minMaxEnabled, isOuterJoin, hashTableKeyType, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
break;
}
break;
case STRING:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinOptimizedStringHashMap(isOuterJoin, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinOptimizedStringHashMultiSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
break;
case HASH_SET:
hashTable = new VectorMapJoinOptimizedStringHashSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter, desc.getKeyTblDesc());
break;
}
break;
case MULTI_KEY:
switch(hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinOptimizedMultiKeyHashMap(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
case HASH_MULTISET:
hashTable = new VectorMapJoinOptimizedMultiKeyHashMultiSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
case HASH_SET:
hashTable = new VectorMapJoinOptimizedMultiKeyHashSet(isOuterJoin, mapJoinTableContainer, hashMapRowGetter);
break;
}
break;
}
return hashTable;
}
use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.
the class TestVectorMapJoinFastHashTable method runEstimationCheck.
private void runEstimationCheck(HashTableKeyType l) throws SerDeException, IOException, HiveException {
MapJoinDesc desc = new MapJoinDesc();
VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc();
vectorDesc.setHashTableKeyType(l);
vectorDesc.setIsFastHashTableEnabled(true);
vectorDesc.setHashTableImplementationType(HashTableImplementationType.FAST);
vectorDesc.setHashTableKind(HashTableKind.HASH_MAP);
desc.setVectorDesc(vectorDesc);
TableDesc keyTblDesc = new TableDesc();
keyTblDesc.setProperties(new Properties());
desc.setKeyTblDesc(keyTblDesc);
Configuration hconf = new HiveConf();
VectorMapJoinFastTableContainer container = new VectorMapJoinFastTableContainer(desc, hconf, keyCount, 1);
container.setSerde(null, null);
long dataSize = 0;
BinarySortableSerializeWrite bsw = new BinarySortableSerializeWrite(1);
Output outp = new Output();
BytesWritable key = new BytesWritable();
BytesWritable value = new BytesWritable();
for (int i = 0; i < keyCount; i++) {
bsw.set(outp);
bsw.writeLong(i);
key = new BytesWritable(outp.getData(), outp.getLength());
bsw.set(outp);
bsw.writeLong(i * 2);
value = new BytesWritable(outp.getData(), outp.getLength());
container.putRow(key, value);
dataSize += 8;
dataSize += 8;
}
Statistics stat = new Statistics(keyCount, dataSize, 0, 0);
Long realObjectSize = getObjectSize(container);
Long executionEstimate = container.getEstimatedMemorySize();
Long compilerEstimate = null;
ConvertJoinMapJoin cjm = new ConvertJoinMapJoin();
cjm.hashTableLoadFactor = .75f;
switch(l) {
case MULTI_KEY:
compilerEstimate = cjm.computeOnlineDataSizeFastCompositeKeyed(stat);
break;
case LONG:
compilerEstimate = cjm.computeOnlineDataSizeFastLongKeyed(stat);
break;
}
LOG.info("stats: {}", stat);
LOG.info("realObjectSize: {}", realObjectSize);
LOG.info("executionEstimate : {}", executionEstimate);
LOG.info("compilerEstimate: {}", compilerEstimate);
checkRelativeError(realObjectSize, executionEstimate, .05);
checkRelativeError(realObjectSize, compilerEstimate, .05);
checkRelativeError(compilerEstimate, executionEstimate, .05);
}
use of org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc in project hive by apache.
the class MapJoinTestConfig method createNativeVectorMapJoin.
public static CreateMapJoinResult createNativeVectorMapJoin(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException {
VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
mapJoinDesc.setVectorDesc(vectorDesc);
vectorDesc.setHashTableImplementationType(hashTableImplementationType);
VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
MapJoinTableContainer mapJoinTableContainer;
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null;
switch(vectorDesc.getHashTableImplementationType()) {
case OPTIMIZED:
mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
break;
case FAST:
mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size(), 1);
break;
default:
throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
}
// if (shareMapJoinTableContainer == null) {
loadTableContainerData(testDesc, testData, mapJoinTableContainer);
// } else {
// setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer);
// }
VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
HiveConf.setBoolVar(testDesc.hiveConf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true);
return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe);
}
Aggregations