use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class HashTableSinkOperator method initializeOp.
@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
boolean isSilent = HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVESESSIONSILENT);
console = new LogHelper(LOG, isSilent);
memoryExhaustionHandler = new MapJoinMemoryExhaustionHandler(console, conf.getHashtableMemoryUsage());
emptyRowContainer.addRow(emptyObjectArray);
// for small tables only; so get the big table position first
posBigTableAlias = conf.getPosBigTable();
order = conf.getTagOrder();
// initialize some variables, which used to be initialized in CommonJoinOperator
this.hconf = hconf;
filterMaps = conf.getFilterMap();
int tagLen = conf.getTagLength();
// process join keys
joinKeys = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), posBigTableAlias, hconf);
joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors, posBigTableAlias, tagLen);
// process join values
joinValues = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), posBigTableAlias, hconf);
joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, inputObjInspectors, posBigTableAlias, tagLen);
// process join filters
joinFilters = new List[tagLen];
JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(), posBigTableAlias, hconf);
joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, inputObjInspectors, posBigTableAlias, tagLen);
if (!conf.isNoOuterJoin()) {
for (Byte alias : order) {
if (alias == posBigTableAlias || joinValues[alias] == null) {
continue;
}
List<ObjectInspector> rcOIs = joinValuesObjectInspectors[alias];
if (filterMaps != null && filterMaps[alias] != null) {
// for each alias, add object inspector for filter tag as the last element
rcOIs = new ArrayList<ObjectInspector>(rcOIs);
rcOIs.add(PrimitiveObjectInspectorFactory.writableShortObjectInspector);
}
}
}
mapJoinTables = new MapJoinPersistableTableContainer[tagLen];
mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
hashTableScale = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVEHASHTABLESCALE);
if (hashTableScale <= 0) {
hashTableScale = 1;
}
try {
TableDesc keyTableDesc = conf.getKeyTblDesc();
AbstractSerDe keySerde = (AbstractSerDe) ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(keySerde, null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerde, false);
for (Byte pos : order) {
if (pos == posBigTableAlias) {
continue;
}
mapJoinTables[pos] = new HashMapWrapper(hconf, -1);
TableDesc valueTableDesc = conf.getValueTblFilteredDescs().get(pos);
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)));
}
} catch (SerDeException e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class FetchOperator method setupOutputObjectInspector.
private StructObjectInspector setupOutputObjectInspector() throws HiveException {
TableDesc tableDesc = work.getTblDesc();
try {
tableSerDe = tableDesc.getDeserializer(job, true);
tableOI = (StructObjectInspector) tableSerDe.getObjectInspector();
if (!isPartitioned) {
return getTableRowOI(tableOI);
}
partKeyOI = getPartitionKeyOI(tableDesc);
PartitionDesc partDesc = new PartitionDesc(tableDesc, null);
List<PartitionDesc> listParts = work.getPartDesc();
// use T1's schema to get the ObjectInspector.
if (listParts == null || listParts.isEmpty() || !needConversion(tableDesc, listParts)) {
return getPartitionedRowOI(tableOI);
}
convertedOI = (StructObjectInspector) ObjectInspectorConverters.getConvertedOI(tableOI, tableOI, null, false);
return getPartitionedRowOI(convertedOI);
} catch (Exception e) {
throw new HiveException("Failed with exception " + e.getMessage() + StringUtils.stringifyException(e));
}
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class Vectorizer method canSpecializeReduceSink.
private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
// Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this
// operator was vectorized, but not native. And, the conditions.
VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc();
desc.setVectorDesc(vectorDesc);
boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty();
boolean hasTopN = desc.getTopN() >= 0;
boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0;
TableDesc keyTableDesc = desc.getKeySerializeInfo();
Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
TableDesc valueTableDesc = desc.getValueSerializeInfo();
Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
// Remember the condition variables for EXPLAIN regardless.
vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
vectorDesc.setEngine(engine);
vectorDesc.setHasBuckets(hasBuckets);
vectorDesc.setHasTopN(hasTopN);
vectorDesc.setUseUniformHash(useUniformHash);
vectorDesc.setHasDistinctColumns(hasDistinctColumns);
vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
// Many restrictions.
if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasBuckets || hasTopN || !useUniformHash || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary) {
return false;
}
// We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
// So if we later decide not to specialize, we'll just waste any scratch columns allocated...
List<ExprNodeDesc> keysDescs = desc.getKeyCols();
VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
// Since a key expression can be a calculation and the key will go into a scratch column,
// we need the mapping and type information.
int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] reduceSinkKeyExpressions;
for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
VectorExpression ve = allKeyExpressions[i];
reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
groupByKeyExpressionsList.add(ve);
}
}
if (groupByKeyExpressionsList.size() == 0) {
reduceSinkKeyExpressions = null;
} else {
reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
}
ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
int[] reduceSinkValueColumnMap = new int[valueDescs.size()];
TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[valueDescs.size()];
Type[] reduceSinkValueColumnVectorTypes = new Type[valueDescs.size()];
ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] reduceSinkValueExpressions;
for (int i = 0; i < valueDescs.size(); ++i) {
VectorExpression ve = allValueExpressions[i];
reduceSinkValueColumnMap[i] = ve.getOutputColumn();
reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
reduceSinkValueExpressionsList.add(ve);
}
}
if (reduceSinkValueExpressionsList.size() == 0) {
reduceSinkValueExpressions = null;
} else {
reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
}
vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
return true;
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class TestAccumuloStorageHandler method testTableJobPropertiesCallsInputAndOutputMethods.
@Test
public void testTableJobPropertiesCallsInputAndOutputMethods() {
AccumuloStorageHandler mockStorageHandler = Mockito.mock(AccumuloStorageHandler.class);
TableDesc tableDesc = Mockito.mock(TableDesc.class);
Map<String, String> jobProperties = new HashMap<String, String>();
Mockito.doCallRealMethod().when(mockStorageHandler).configureTableJobProperties(tableDesc, jobProperties);
// configureTableJobProperties shouldn't be getting called by Hive, but, if it somehow does,
// we should just set all of the configurations for input and output.
mockStorageHandler.configureTableJobProperties(tableDesc, jobProperties);
Mockito.verify(mockStorageHandler).configureInputJobProperties(tableDesc, jobProperties);
Mockito.verify(mockStorageHandler).configureOutputJobProperties(tableDesc, jobProperties);
}
use of org.apache.hadoop.hive.ql.plan.TableDesc in project hive by apache.
the class TestAccumuloStorageHandler method testEmptyIteratorPushdownValue.
@Test(expected = IllegalArgumentException.class)
public void testEmptyIteratorPushdownValue() {
TableDesc tableDesc = Mockito.mock(TableDesc.class);
Properties props = new Properties();
Map<String, String> jobProperties = new HashMap<String, String>();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq1,cf:cq2,cf:cq3");
props.setProperty(AccumuloSerDeParameters.TABLE_NAME, "table");
props.setProperty(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, "");
Mockito.when(tableDesc.getProperties()).thenReturn(props);
storageHandler.configureInputJobProperties(tableDesc, jobProperties);
}
Aggregations