use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class VectorizedStructColumnReader method readBatch.
@Override
public void readBatch(int total, ColumnVector column, TypeInfo columnType) throws IOException {
StructColumnVector structColumnVector = (StructColumnVector) column;
StructTypeInfo structTypeInfo = (StructTypeInfo) columnType;
ColumnVector[] vectors = structColumnVector.fields;
for (int i = 0; i < vectors.length; i++) {
fieldReaders.get(i).readBatch(total, vectors[i], structTypeInfo.getAllStructFieldTypeInfos().get(i));
structColumnVector.isRepeating = structColumnVector.isRepeating && vectors[i].isRepeating;
for (int j = 0; j < vectors[i].isNull.length; j++) {
structColumnVector.isNull[j] = (i == 0) ? vectors[i].isNull[j] : structColumnVector.isNull[j] && vectors[i].isNull[j];
}
structColumnVector.noNulls = (i == 0) ? vectors[i].noNulls : structColumnVector.noNulls && vectors[i].noNulls;
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class VectorRandomRowSource method getObjectInspector.
public static ObjectInspector getObjectInspector(TypeInfo typeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) {
final ObjectInspector objectInspector;
switch(typeInfo.getCategory()) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
if (primitiveTypeInfo instanceof DecimalTypeInfo && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.longTypeInfo);
} else {
objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo);
}
}
break;
case MAP:
{
final MapTypeInfo mapType = (MapTypeInfo) typeInfo;
final MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector(getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
objectInspector = mapInspector;
}
break;
case LIST:
{
final ListTypeInfo listType = (ListTypeInfo) typeInfo;
final ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector(getObjectInspector(listType.getListElementTypeInfo()));
objectInspector = listInspector;
}
break;
case STRUCT:
{
final StructTypeInfo structType = (StructTypeInfo) typeInfo;
final List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();
final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
for (TypeInfo fieldType : fieldTypes) {
fieldInspectors.add(getObjectInspector(fieldType));
}
final StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(structType.getAllStructFieldNames(), fieldInspectors);
objectInspector = structInspector;
}
break;
case UNION:
{
final UnionTypeInfo unionType = (UnionTypeInfo) typeInfo;
final List<TypeInfo> fieldTypes = unionType.getAllUnionObjectTypeInfos();
final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
for (TypeInfo fieldType : fieldTypes) {
fieldInspectors.add(getObjectInspector(fieldType));
}
final UnionObjectInspector unionInspector = ObjectInspectorFactory.getStandardUnionObjectInspector(fieldInspectors);
objectInspector = unionInspector;
}
break;
default:
throw new RuntimeException("Unexpected category " + typeInfo.getCategory());
}
Preconditions.checkState(objectInspector != null);
return objectInspector;
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class TestVectorBetweenIn method doBetweenStructInVariation.
private boolean doBetweenStructInVariation(Random random, String structTypeName, BetweenInVariation betweenInVariation) throws Exception {
StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName);
ObjectInspector structObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(structTypeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, structTypeInfo, structObjectInspector, DataTypePhysicalVariation.NONE, /* allowNull */
false));
}
final boolean isInvert = false;
// No convenient WritableComparator / WritableComparable available for STRUCT.
List<Object> compareList = new ArrayList<Object>();
Set<Integer> includedSet = new HashSet<Integer>();
final int chooseLimit = 4 + random.nextInt(valueCount / 2);
int chooseCount = 0;
while (chooseCount < chooseLimit) {
final int index = random.nextInt(valueCount);
if (includedSet.contains(index)) {
continue;
}
includedSet.add(index);
compareList.add(valueList.get(index));
chooseCount++;
}
// ----------------------------------------------------------------------------------------------
GenerationSpec structGenerationSpec = GenerationSpec.createValueList(structTypeInfo, valueList);
List<GenerationSpec> structGenerationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> structExplicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
structGenerationSpecList.add(structGenerationSpec);
structExplicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
VectorRandomRowSource structRowSource = new VectorRandomRowSource();
structRowSource.initGenerationSpecSchema(random, structGenerationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, structExplicitDataTypePhysicalVariationList);
Object[][] structRandomRows = structRowSource.randomRows(100000);
// ---------------------------------------------------------------------------------------------
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<TypeInfo> fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfoList.size();
for (int i = 0; i < fieldCount; i++) {
GenerationSpec generationSpec = GenerationSpec.createOmitGeneration(fieldTypeInfoList.get(i));
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
}
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
final int rowCount = randomRows.length;
for (int r = 0; r < rowCount; r++) {
List<Object> fieldValueList = (ArrayList) structRandomRows[r][0];
for (int f = 0; f < fieldCount; f++) {
randomRows[r][f] = fieldValueList.get(f);
}
}
// ---------------------------------------------------------------------------------------------
// Currently, STRUCT IN vectorization assumes a GenericUDFStruct.
List<ObjectInspector> structUdfObjectInspectorList = new ArrayList<ObjectInspector>();
List<ExprNodeDesc> structUdfChildren = new ArrayList<ExprNodeDesc>(fieldCount);
List<String> rowColumnNameList = rowSource.columnNames();
for (int i = 0; i < fieldCount; i++) {
TypeInfo fieldTypeInfo = fieldTypeInfoList.get(i);
ExprNodeColumnDesc fieldExpr = new ExprNodeColumnDesc(fieldTypeInfo, rowColumnNameList.get(i), "table", false);
structUdfChildren.add(fieldExpr);
ObjectInspector fieldObjectInspector = VectorRandomRowSource.getObjectInspector(fieldTypeInfo, DataTypePhysicalVariation.NONE);
structUdfObjectInspectorList.add(fieldObjectInspector);
}
StandardStructObjectInspector structUdfObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(rowColumnNameList, structUdfObjectInspectorList);
String structUdfTypeName = structUdfObjectInspector.getTypeName();
TypeInfo structUdfTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(structUdfTypeName);
String structFuncText = "struct";
FunctionInfo fi = FunctionRegistry.getFunctionInfo(structFuncText);
GenericUDF genericUDF = fi.getGenericUDF();
ExprNodeDesc col1Expr = new ExprNodeGenericFuncDesc(structUdfObjectInspector, genericUDF, structFuncText, structUdfChildren);
// ---------------------------------------------------------------------------------------------
List<String> columns = new ArrayList<String>();
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
for (int i = 0; i < compareList.size(); i++) {
Object compareObject = compareList.get(i);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(structUdfTypeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, structUdfTypeInfo, structUdfObjectInspector));
children.add(constDesc);
}
for (int i = 0; i < fieldCount; i++) {
columns.add(rowColumnNameList.get(i));
}
String[] columnNames = columns.toArray(new String[0]);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
// ---------------------------------------------------------------------------------------------
final GenericUDF udf = new GenericUDFIn();
final int compareCount = compareList.size();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = structUdfObjectInspector;
}
final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(structUdfTypeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
true);
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class ArrowColumnarBatchSerDe method toField.
private static Field toField(String name, TypeInfo typeInfo) {
switch(typeInfo.getCategory()) {
case PRIMITIVE:
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
return Field.nullable(name, MinorType.BIT.getType());
case BYTE:
return Field.nullable(name, MinorType.TINYINT.getType());
case SHORT:
return Field.nullable(name, MinorType.SMALLINT.getType());
case INT:
return Field.nullable(name, MinorType.INT.getType());
case LONG:
return Field.nullable(name, MinorType.BIGINT.getType());
case FLOAT:
return Field.nullable(name, MinorType.FLOAT4.getType());
case DOUBLE:
return Field.nullable(name, MinorType.FLOAT8.getType());
case STRING:
case VARCHAR:
case CHAR:
return Field.nullable(name, MinorType.VARCHAR.getType());
case DATE:
return Field.nullable(name, MinorType.DATEDAY.getType());
case TIMESTAMP:
return Field.nullable(name, MinorType.TIMESTAMPMILLI.getType());
case TIMESTAMPLOCALTZ:
final TimestampLocalTZTypeInfo timestampLocalTZTypeInfo = (TimestampLocalTZTypeInfo) typeInfo;
final String timeZone = timestampLocalTZTypeInfo.getTimeZone().toString();
return Field.nullable(name, new ArrowType.Timestamp(TimeUnit.MILLISECOND, timeZone));
case BINARY:
return Field.nullable(name, MinorType.VARBINARY.getType());
case DECIMAL:
final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
final int precision = decimalTypeInfo.precision();
final int scale = decimalTypeInfo.scale();
return Field.nullable(name, new ArrowType.Decimal(precision, scale));
case INTERVAL_YEAR_MONTH:
return Field.nullable(name, MinorType.INTERVALYEAR.getType());
case INTERVAL_DAY_TIME:
return Field.nullable(name, MinorType.INTERVALDAY.getType());
default:
throw new IllegalArgumentException();
}
case LIST:
final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
return new Field(name, FieldType.nullable(MinorType.LIST.getType()), Lists.newArrayList(toField(DEFAULT_ARROW_FIELD_NAME, elementTypeInfo)));
case STRUCT:
final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
final List<Field> structFields = Lists.newArrayList();
final int structSize = fieldNames.size();
for (int i = 0; i < structSize; i++) {
structFields.add(toField(fieldNames.get(i), fieldTypeInfos.get(i)));
}
return new Field(name, FieldType.nullable(MinorType.STRUCT.getType()), structFields);
case UNION:
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final List<Field> unionFields = Lists.newArrayList();
final int unionSize = unionFields.size();
for (int i = 0; i < unionSize; i++) {
unionFields.add(toField(DEFAULT_ARROW_FIELD_NAME, objectTypeInfos.get(i)));
}
return new Field(name, FieldType.nullable(MinorType.UNION.getType()), unionFields);
case MAP:
final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
final TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
final TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
final List<Field> mapFields = Lists.newArrayList();
mapFields.add(toField(name + "_keys", keyTypeInfo));
mapFields.add(toField(name + "_values", valueTypeInfo));
FieldType struct = new FieldType(false, new ArrowType.Struct(), null);
List<Field> childrenOfList = Lists.newArrayList(new Field(name, struct, mapFields));
return new Field(name, FieldType.nullable(MinorType.LIST.getType()), childrenOfList);
default:
throw new IllegalArgumentException();
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.
the class Serializer method writeStruct.
private void writeStruct(NonNullableStructVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative, boolean isMapDataType) {
final List<String> fieldNames = typeInfo.getAllStructFieldNames();
final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
final ColumnVector[] hiveFieldVectors = hiveVector == null ? null : hiveVector.fields;
final int fieldSize = fieldTypeInfos.size();
// see - https://issues.apache.org/jira/browse/HIVE-25243
if (hiveVector != null && hiveFieldVectors != null) {
for (int i = 0; i < size; i++) {
if (hiveVector.isNull[i]) {
for (ColumnVector fieldVector : hiveFieldVectors) {
fieldVector.isNull[i] = true;
fieldVector.noNulls = false;
}
}
}
}
for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) {
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex);
final ColumnVector hiveFieldVector = hiveVector == null ? null : hiveFieldVectors[fieldIndex];
final String fieldName = fieldNames.get(fieldIndex);
// If the call is coming from writeMap(), then the structs within the list type should be non-nullable.
FieldType elementFieldType = (isMapDataType) ? (new FieldType(false, toArrowType(fieldTypeInfo), null)) : (toFieldType(fieldTypeInfos.get(fieldIndex)));
final FieldVector arrowFieldVector = arrowVector.addOrGet(fieldName, elementFieldType, FieldVector.class);
arrowFieldVector.setInitialCapacity(size);
arrowFieldVector.allocateNew();
write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative);
}
for (int rowIndex = 0; rowIndex < size; rowIndex++) {
if (hiveVector == null || hiveVector.isNull[rowIndex]) {
BitVectorHelper.setValidityBit(arrowVector.getValidityBuffer(), rowIndex, 0);
} else {
BitVectorHelper.setValidityBitToOne(arrowVector.getValidityBuffer(), rowIndex);
}
}
}
Aggregations