use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class VectorRandomRowSource method getObjectInspector.
private ObjectInspector getObjectInspector(TypeInfo typeInfo) {
final ObjectInspector objectInspector;
switch(typeInfo.getCategory()) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) typeInfo;
objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveType);
}
break;
case MAP:
{
final MapTypeInfo mapType = (MapTypeInfo) typeInfo;
final MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector(getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
objectInspector = mapInspector;
}
break;
case LIST:
{
final ListTypeInfo listType = (ListTypeInfo) typeInfo;
final ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector(getObjectInspector(listType.getListElementTypeInfo()));
objectInspector = listInspector;
}
break;
case STRUCT:
{
final StructTypeInfo structType = (StructTypeInfo) typeInfo;
final List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();
final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
for (TypeInfo fieldType : fieldTypes) {
fieldInspectors.add(getObjectInspector(fieldType));
}
final StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(structType.getAllStructFieldNames(), fieldInspectors);
objectInspector = structInspector;
}
break;
case UNION:
{
final UnionTypeInfo unionType = (UnionTypeInfo) typeInfo;
final List<TypeInfo> fieldTypes = unionType.getAllUnionObjectTypeInfos();
final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
for (TypeInfo fieldType : fieldTypes) {
fieldInspectors.add(getObjectInspector(fieldType));
}
final UnionObjectInspector unionInspector = ObjectInspectorFactory.getStandardUnionObjectInspector(fieldInspectors);
objectInspector = unionInspector;
}
break;
default:
throw new RuntimeException("Unexpected category " + typeInfo.getCategory());
}
Preconditions.checkState(objectInspector != null);
return objectInspector;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class VectorRandomRowSource method chooseSchema.
private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) {
HashSet hashSet = null;
final boolean allTypes;
final boolean onlyOne = (r.nextInt(100) == 7);
if (onlyOne) {
columnCount = 1;
allTypes = false;
} else {
allTypes = r.nextBoolean();
if (allTypes) {
switch(supportedTypes) {
case ALL:
columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
break;
case ALL_EXCEPT_MAP:
columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
break;
case PRIMITIVES:
columnCount = possibleHivePrimitiveTypeNames.length;
break;
}
hashSet = new HashSet<Integer>();
} else {
columnCount = 1 + r.nextInt(20);
}
}
typeNames = new ArrayList<String>(columnCount);
categories = new Category[columnCount];
typeInfos = new TypeInfo[columnCount];
objectInspectorList = new ArrayList<ObjectInspector>(columnCount);
primitiveCategories = new PrimitiveCategory[columnCount];
primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
List<String> columnNames = new ArrayList<String>(columnCount);
for (int c = 0; c < columnCount; c++) {
columnNames.add(String.format("col%d", c));
final String typeName;
if (onlyOne) {
typeName = getRandomTypeName(supportedTypes);
} else {
int typeNum;
if (allTypes) {
int maxTypeNum = 0;
switch(supportedTypes) {
case PRIMITIVES:
maxTypeNum = possibleHivePrimitiveTypeNames.length;
break;
case ALL_EXCEPT_MAP:
maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
break;
case ALL:
maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
break;
}
while (true) {
typeNum = r.nextInt(maxTypeNum);
Integer typeNumInteger = new Integer(typeNum);
if (!hashSet.contains(typeNumInteger)) {
hashSet.add(typeNumInteger);
break;
}
}
} else {
if (supportedTypes == SupportedTypes.PRIMITIVES || r.nextInt(10) != 0) {
typeNum = r.nextInt(possibleHivePrimitiveTypeNames.length);
} else {
typeNum = possibleHivePrimitiveTypeNames.length + r.nextInt(possibleHiveComplexTypeNames.length);
if (supportedTypes == SupportedTypes.ALL_EXCEPT_MAP) {
typeNum--;
}
}
}
if (typeNum < possibleHivePrimitiveTypeNames.length) {
typeName = possibleHivePrimitiveTypeNames[typeNum];
} else {
typeName = possibleHiveComplexTypeNames[typeNum - possibleHivePrimitiveTypeNames.length];
}
}
String decoratedTypeName = getDecoratedTypeName(typeName, supportedTypes, 0, maxComplexDepth);
final TypeInfo typeInfo;
try {
typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(decoratedTypeName);
} catch (Exception e) {
throw new RuntimeException("Cannot convert type name " + decoratedTypeName + " to a type " + e);
}
typeInfos[c] = typeInfo;
final Category category = typeInfo.getCategory();
categories[c] = category;
ObjectInspector objectInspector = getObjectInspector(typeInfo);
switch(category) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo);
primitiveTypeInfos[c] = primitiveTypeInfo;
PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
primitiveCategories[c] = primitiveCategory;
primitiveObjectInspectorList.add(objectInspector);
}
break;
case LIST:
case MAP:
case STRUCT:
case UNION:
primitiveObjectInspectorList.add(null);
break;
default:
throw new RuntimeException("Unexpected catagory " + category);
}
objectInspectorList.add(objectInspector);
if (category == Category.PRIMITIVE) {
}
typeNames.add(decoratedTypeName);
}
rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, objectInspectorList);
alphabets = new String[columnCount];
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class DemuxOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
// A DemuxOperator should have at least one child
if (childOperatorsArray.length == 0) {
throw new HiveException("Expected number of children is at least 1. Found : " + childOperatorsArray.length);
}
newTagToOldTag = toArray(conf.getNewTagToOldTag());
newTagToChildIndex = toArray(conf.getNewTagToChildIndex());
childInputObjInspectors = new ObjectInspector[childOperators.size()][];
cntrs = new long[newTagToOldTag.length];
nextCntrs = new long[newTagToOldTag.length];
try {
// Those inputObjectInspectors are stored in childInputObjInspectors.
for (int i = 0; i < newTagToOldTag.length; i++) {
int newTag = i;
int oldTag = newTagToOldTag[i];
int childIndex = newTagToChildIndex[newTag];
cntrs[newTag] = 0;
nextCntrs[newTag] = 0;
TableDesc keyTableDesc = conf.getKeysSerializeInfos().get(newTag);
Deserializer inputKeyDeserializer = ReflectionUtil.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
TableDesc valueTableDesc = conf.getValuesSerializeInfos().get(newTag);
Deserializer inputValueDeserializer = ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
List<ObjectInspector> oi = new ArrayList<ObjectInspector>();
oi.add(inputKeyDeserializer.getObjectInspector());
oi.add(inputValueDeserializer.getObjectInspector());
int childParentsCount = conf.getChildIndexToOriginalNumParents().get(childIndex);
// So, we first check if childInputObjInspectors contains the key of childIndex.
if (childInputObjInspectors[childIndex] == null) {
childInputObjInspectors[childIndex] = new ObjectInspector[childParentsCount];
}
ObjectInspector[] ois = childInputObjInspectors[childIndex];
ois[oldTag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, oi);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
childrenDone = 0;
newChildOperatorsTag = new int[childOperators.size()][];
for (int i = 0; i < childOperators.size(); i++) {
Operator<? extends OperatorDesc> child = childOperators.get(i);
List<Integer> childOperatorTags = new ArrayList<Integer>();
if (child instanceof MuxOperator) {
// This DemuxOperator can appear multiple times in MuxOperator's
// parentOperators
int index = 0;
for (Operator<? extends OperatorDesc> parent : child.getParentOperators()) {
if (this == parent) {
childOperatorTags.add(index);
}
index++;
}
} else {
childOperatorTags.add(child.getParentOperators().indexOf(this));
}
newChildOperatorsTag[i] = toArray(childOperatorTags);
}
if (LOG.isInfoEnabled()) {
LOG.info("newChildOperatorsTag " + Arrays.toString(newChildOperatorsTag));
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class VectorSelectOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
// Just forward the row as is
if (conf.isSelStarNoCompute()) {
return;
}
VectorExpression.doTransientInit(vExpressions);
List<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
List<ExprNodeDesc> colList = conf.getColList();
valueWriters = VectorExpressionWriterFactory.getExpressionWriters(colList);
for (VectorExpressionWriter vew : valueWriters) {
objectInspectors.add(vew.getObjectInspector());
}
List<String> outputFieldNames = conf.getOutputColumnNames();
outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, int vectorizedTestingReducerBatchSize) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
if (vectorizedTestingReducerBatchSize > VectorizedRowBatch.DEFAULT_SIZE) {
// For now, we don't go higher than the default batch size unless we do more work
// to verify every vectorized operator downstream can handle a larger batch size.
vectorizedTestingReducerBatchSize = VectorizedRowBatch.DEFAULT_SIZE;
}
this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
}
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
}
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueDeserializer.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector);
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
Aggregations