use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.
the class MapJoinTestConfig method createNativeVectorMapJoin.
public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) throws SerDeException, IOException, HiveException {
VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
// UNDONE
mapJoinDesc.setVectorDesc(vectorDesc);
vectorDesc.setHashTableImplementationType(hashTableImplementationType);
VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
MapJoinTableContainer mapJoinTableContainer;
switch(vectorDesc.getHashTableImplementationType()) {
case OPTIMIZED:
mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
break;
case FAST:
mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size());
break;
default:
throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
}
loadTableContainerData(testDesc, testData, mapJoinTableContainer);
VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null);
return operator;
}
use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.
the class MapJoinOperator method generateMapMetaData.
public void generateMapMetaData() throws HiveException {
try {
TableDesc keyTableDesc = conf.getKeyTblDesc();
AbstractSerDe keySerDe = (AbstractSerDe) ReflectionUtil.newInstance(keyTableDesc.getSerDeClass(), null);
keySerDe.initialize(null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerDe, false);
for (int pos = 0; pos < order.length; pos++) {
if (pos == posBigTable) {
continue;
}
TableDesc valueTableDesc;
if (conf.getNoOuterJoin()) {
valueTableDesc = conf.getValueTblDescs().get(pos);
} else {
valueTableDesc = conf.getValueFilteredTblDescs().get(pos);
}
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getSerDeClass(), null);
valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos));
mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext);
}
} catch (SerDeException e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.
the class MapJoinOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
this.hconf = hconf;
unwrapContainer = new UnwrapRowContainer[conf.getTagLength()];
super.initializeOp(hconf);
int tagLen = conf.getTagLength();
// On Tez only: The hash map might already be cached in the container we run
// the task in. On MR: The cache is a no-op.
String queryId = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVEQUERYID);
// The cacheKey may have already been defined in the MapJoin conf spec
// as part of the Shared Work Optimization if it can be reused among
// multiple mapjoin operators. In that case, we take that key from conf
// and append this.getClass().getName() to disambiguate between different
// classes that may be using the same source data, e.g.
// VectorMapJoinInnerGenerateResultOperator and VectorMapJoinLeftSemiLongOperator.
// If the cacheKey is not defined in the conf, then we generate it.
cacheKey = conf.getCacheKey() == null ? MapJoinDesc.generateCacheKey(this.getOperatorId()) : conf.getCacheKey() + "_" + this.getClass().getName();
cache = ObjectCacheFactory.getCache(hconf, queryId, false);
loader = getHashTableLoader(hconf);
bucketId = hconf.getInt(Constants.LLAP_BUCKET_ID, -1);
numBuckets = hconf.getInt(Constants.LLAP_NUM_BUCKETS, -1);
hashMapRowGetters = null;
mapJoinTables = new MapJoinTableContainer[tagLen];
mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
hashTblInitedOnce = false;
// Reset grace hashjoin context so that there is no state maintained when operator/work is
// retrieved from object cache
hybridMapJoinLeftover = false;
firstSmallTable = null;
doFullOuterMapJoinInit();
generateMapMetaData();
isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD);
if (isTestingNoHashTableLoad) {
return;
}
final ExecMapperContext mapContext = getExecContext();
final MapredContext mrContext = MapredContext.get();
if (!conf.isBucketMapJoin() && !conf.isDynamicPartitionHashJoin()) {
/*
* The issue with caching in case of bucket map join is that different tasks
* process different buckets and if the container is reused to join a different bucket,
* join results can be incorrect. The cache is keyed on operator id and for bucket map join
* the operator does not change but data needed is different. For a proper fix, this
* requires changes in the Tez API with regard to finding bucket id and
* also ability to schedule tasks to re-use containers that have cached the specific bucket.
*/
LOG.debug("This is not bucket map join, so cache");
Future<Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]>> future = cache.retrieveAsync(cacheKey, () -> loadHashTable(mapContext, mrContext));
asyncInitOperations.add(future);
} else if (!isInputFileChangeSensitive(mapContext)) {
loadHashTable(mapContext, mrContext);
hashTblInitedOnce = true;
}
}
use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.
the class HashTableSinkOperator method initializeOp.
@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
console = new LogHelper(LOG, isSilent);
memoryExhaustionChecker = MemoryExhaustionCheckerFactory.getChecker(console, hconf, conf);
emptyRowContainer.addRow(emptyObjectArray);
// for small tables only; so get the big table position first
posBigTableAlias = conf.getPosBigTable();
order = conf.getTagOrder();
// initialize some variables, which used to be initialized in CommonJoinOperator
this.hconf = hconf;
filterMaps = conf.getFilterMap();
int tagLen = conf.getTagLength();
// process join keys
joinKeys = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
// process join values
joinValues = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
// process join filters
joinFilters = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
if (!conf.isNoOuterJoin()) {
for (Byte alias : order) {
if (alias == posBigTableAlias || joinValues[alias] == null) {
continue;
}
List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
if (filterMaps != null && filterMaps[alias] != null) {
// for each alias, add object inspector for filter tag as the last element
rcOIs = new ArrayList<ObjectInspector>(rcOIs);
rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
}
}
}
mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
if (hashTableScale <= 0) {
hashTableScale = 1;
}
try {
TableDesc keyTableDesc = conf.getKeyTblDesc();
AbstractSerDe keySerDe = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getSerDeClass(), null);
keySerDe.initialize(null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerDe, false);
for (Byte pos : order) {
if (pos == posBigTableAlias) {
continue;
}
mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getSerDeClass(), null);
valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
}
} catch (SerDeException e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe in project hive by apache.
the class MapJoinTestConfig method createNativeVectorMapJoin.
public static CreateMapJoinResult createNativeVectorMapJoin(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException {
VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
mapJoinDesc.setVectorDesc(vectorDesc);
vectorDesc.setHashTableImplementationType(hashTableImplementationType);
VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
MapJoinTableContainer mapJoinTableContainer;
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null;
switch(vectorDesc.getHashTableImplementationType()) {
case OPTIMIZED:
mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
break;
case FAST:
mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size(), 1);
break;
default:
throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
}
// if (shareMapJoinTableContainer == null) {
loadTableContainerData(testDesc, testData, mapJoinTableContainer);
// } else {
// setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer);
// }
VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
HiveConf.setBoolVar(testDesc.hiveConf, HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true);
return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe);
}
Aggregations