Search in sources :

Example 86 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class TestVectorStructField method doStructFieldTests.

private void doStructFieldTests(Random random) throws Exception {
    String structTypeName = VectorRandomRowSource.getDecoratedTypeName(random, "struct", SupportedTypes.ALL, /* allowedTypeNameSet */
    null, /* depth */
    0, /* maxDepth */
    2);
    StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName);
    List<String> fieldNameList = structTypeInfo.getAllStructFieldNames();
    final int fieldCount = fieldNameList.size();
    for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) {
        doOneStructFieldTest(random, structTypeInfo, structTypeName, fieldIndex);
    }
}
Also used : StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)

Example 87 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class ArrowColumnarBatchSerDe method toStructListTypeInfo.

static ListTypeInfo toStructListTypeInfo(MapTypeInfo mapTypeInfo) {
    final StructTypeInfo structTypeInfo = new StructTypeInfo();
    structTypeInfo.setAllStructFieldNames(Lists.newArrayList("key", "value"));
    structTypeInfo.setAllStructFieldTypeInfos(Lists.newArrayList(mapTypeInfo.getMapKeyTypeInfo(), mapTypeInfo.getMapValueTypeInfo()));
    final ListTypeInfo structListTypeInfo = new ListTypeInfo();
    structListTypeInfo.setListElementTypeInfo(structTypeInfo);
    return structListTypeInfo;
}
Also used : ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)

Example 88 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class Serializer method writeList.

private void writeList(ListVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative, boolean isMapDataType) {
    final int OFFSET_WIDTH = 4;
    final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo();
    final ColumnVector hiveElementVector = hiveVector == null ? null : hiveVector.child;
    // If the call is coming from writeMap(), then the List type should be non-nullable.
    FieldType elementFieldType = (isMapDataType) ? (new FieldType(false, toArrowType(elementTypeInfo), null)) : (toFieldType(elementTypeInfo));
    final FieldVector arrowElementVector = (FieldVector) arrowVector.addOrGetVector(elementFieldType).getVector();
    VectorizedRowBatch correctedVrb = vectorizedRowBatch;
    int correctedSize = hiveVector == null ? 0 : hiveVector.childCount;
    if (vectorizedRowBatch.selectedInUse) {
        correctedVrb = correctSelectedAndSize(vectorizedRowBatch, hiveVector);
        correctedSize = correctedVrb.size;
    }
    arrowElementVector.setInitialCapacity(correctedSize);
    arrowElementVector.allocateNew();
    // writeStruct() with the same flag value, as the map is converted as a list of structs.
    if (isMapDataType) {
        writeStruct((NonNullableStructVector) arrowElementVector, (StructColumnVector) hiveElementVector, (StructTypeInfo) elementTypeInfo, correctedSize, correctedVrb, isNative, isMapDataType);
    } else {
        write(arrowElementVector, hiveElementVector, elementTypeInfo, correctedSize, correctedVrb, isNative);
    }
    int nextOffset = 0;
    for (int rowIndex = 0; rowIndex < size; rowIndex++) {
        int selectedIndex = rowIndex;
        if (vectorizedRowBatch.selectedInUse) {
            selectedIndex = vectorizedRowBatch.selected[rowIndex];
        }
        if (hiveVector == null || hiveVector.isNull[selectedIndex]) {
            arrowVector.getOffsetBuffer().setInt(rowIndex * OFFSET_WIDTH, nextOffset);
        } else {
            arrowVector.getOffsetBuffer().setInt(rowIndex * OFFSET_WIDTH, nextOffset);
            nextOffset += (int) hiveVector.lengths[selectedIndex];
            arrowVector.setNotNull(rowIndex);
        }
    }
    arrowVector.getOffsetBuffer().setInt(size * OFFSET_WIDTH, nextOffset);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) FieldVector(org.apache.arrow.vector.FieldVector) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ArrowColumnarBatchSerDe.toStructListTypeInfo(org.apache.hadoop.hive.ql.io.arrow.ArrowColumnarBatchSerDe.toStructListTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) Decimal64ColumnVector(org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector) DateColumnVector(org.apache.hadoop.hive.ql.exec.vector.DateColumnVector) ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) VectorizedBatchUtil.createColumnVector(org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil.createColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) MultiValuedColumnVector(org.apache.hadoop.hive.ql.exec.vector.MultiValuedColumnVector) UnionColumnVector(org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector) IntervalDayTimeColumnVector(org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) FieldType(org.apache.arrow.vector.types.pojo.FieldType)

Example 89 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class Vectorizer method validateStructInExpression.

private boolean validateStructInExpression(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode) {
    for (ExprNodeDesc d : desc.getChildren()) {
        TypeInfo typeInfo = d.getTypeInfo();
        if (typeInfo.getCategory() != Category.STRUCT) {
            return false;
        }
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
        List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
        final int fieldCount = fieldTypeInfos.size();
        for (int f = 0; f < fieldCount; f++) {
            TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
            Category category = fieldTypeInfo.getCategory();
            if (category != Category.PRIMITIVE) {
                setExpressionIssue(expressionTitle, "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName());
                return false;
            }
            PrimitiveTypeInfo fieldPrimitiveTypeInfo = (PrimitiveTypeInfo) fieldTypeInfo;
            InConstantType inConstantType = VectorizationContext.getInConstantTypeFromPrimitiveCategory(fieldPrimitiveTypeInfo.getPrimitiveCategory());
            // For now, limit the data types we support for Vectorized Struct IN().
            if (inConstantType != InConstantType.INT_FAMILY && inConstantType != InConstantType.FLOAT_FAMILY && inConstantType != InConstantType.STRING_FAMILY) {
                setExpressionIssue(expressionTitle, "Cannot vectorize struct field " + fieldNames.get(f) + " of type " + fieldTypeInfo.getTypeName());
                return false;
            }
        }
    }
    return true;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 90 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VerifyLazy method lazyCompareStruct.

public static boolean lazyCompareStruct(StructTypeInfo structTypeInfo, List<Object> fields, List<Object> expectedFields) {
    List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    final int size = fieldTypeInfos.size();
    for (int i = 0; i < size; i++) {
        Object lazyEleObj = fields.get(i);
        Object expectedEleObj = expectedFields.get(i);
        if (!lazyCompare(fieldTypeInfos.get(i), lazyEleObj, expectedEleObj)) {
            throw new RuntimeException("SerDe deserialized value does not match");
        }
    }
    return true;
}
Also used : UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Aggregations

StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)100 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)78 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)59 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)54 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)54 ArrayList (java.util.ArrayList)42 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)32 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)30 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)24 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 List (java.util.List)21 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)21 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)17 IntWritable (org.apache.hadoop.io.IntWritable)12 Text (org.apache.hadoop.io.Text)12 BytesWritable (org.apache.hadoop.io.BytesWritable)11 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)10 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)10 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)10 BooleanWritable (org.apache.hadoop.io.BooleanWritable)10