Search in sources :

Example 31 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VectorizedStructColumnReader method readBatch.

@Override
public void readBatch(int total, ColumnVector column, TypeInfo columnType) throws IOException {
    StructColumnVector structColumnVector = (StructColumnVector) column;
    StructTypeInfo structTypeInfo = (StructTypeInfo) columnType;
    ColumnVector[] vectors = structColumnVector.fields;
    for (int i = 0; i < vectors.length; i++) {
        fieldReaders.get(i).readBatch(total, vectors[i], structTypeInfo.getAllStructFieldTypeInfos().get(i));
        structColumnVector.isRepeating = structColumnVector.isRepeating && vectors[i].isRepeating;
        for (int j = 0; j < vectors[i].isNull.length; j++) {
            structColumnVector.isNull[j] = (i == 0) ? vectors[i].isNull[j] : structColumnVector.isNull[j] && vectors[i].isNull[j];
        }
        structColumnVector.noNulls = (i == 0) ? vectors[i].noNulls : structColumnVector.noNulls && vectors[i].noNulls;
    }
}
Also used : StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)

Example 32 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class VectorRandomRowSource method getObjectInspector.

public static ObjectInspector getObjectInspector(TypeInfo typeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) {
    final ObjectInspector objectInspector;
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            {
                final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                if (primitiveTypeInfo instanceof DecimalTypeInfo && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
                    objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.longTypeInfo);
                } else {
                    objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo);
                }
            }
            break;
        case MAP:
            {
                final MapTypeInfo mapType = (MapTypeInfo) typeInfo;
                final MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector(getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
                objectInspector = mapInspector;
            }
            break;
        case LIST:
            {
                final ListTypeInfo listType = (ListTypeInfo) typeInfo;
                final ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector(getObjectInspector(listType.getListElementTypeInfo()));
                objectInspector = listInspector;
            }
            break;
        case STRUCT:
            {
                final StructTypeInfo structType = (StructTypeInfo) typeInfo;
                final List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();
                final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
                for (TypeInfo fieldType : fieldTypes) {
                    fieldInspectors.add(getObjectInspector(fieldType));
                }
                final StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(structType.getAllStructFieldNames(), fieldInspectors);
                objectInspector = structInspector;
            }
            break;
        case UNION:
            {
                final UnionTypeInfo unionType = (UnionTypeInfo) typeInfo;
                final List<TypeInfo> fieldTypes = unionType.getAllUnionObjectTypeInfos();
                final List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
                for (TypeInfo fieldType : fieldTypes) {
                    fieldInspectors.add(getObjectInspector(fieldType));
                }
                final UnionObjectInspector unionInspector = ObjectInspectorFactory.getStandardUnionObjectInspector(fieldInspectors);
                objectInspector = unionInspector;
            }
            break;
        default:
            throw new RuntimeException("Unexpected category " + typeInfo.getCategory());
    }
    Preconditions.checkState(objectInspector != null);
    return objectInspector;
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) WritableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector) WritableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableHiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) WritableHiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) List(java.util.List) ArrayList(java.util.ArrayList) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StandardUnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 33 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class TestVectorBetweenIn method doBetweenStructInVariation.

private boolean doBetweenStructInVariation(Random random, String structTypeName, BetweenInVariation betweenInVariation) throws Exception {
    StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName);
    ObjectInspector structObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(structTypeInfo);
    final int valueCount = 10 + random.nextInt(10);
    List<Object> valueList = new ArrayList<Object>(valueCount);
    for (int i = 0; i < valueCount; i++) {
        valueList.add(VectorRandomRowSource.randomWritable(random, structTypeInfo, structObjectInspector, DataTypePhysicalVariation.NONE, /* allowNull */
        false));
    }
    final boolean isInvert = false;
    // No convenient WritableComparator / WritableComparable available for STRUCT.
    List<Object> compareList = new ArrayList<Object>();
    Set<Integer> includedSet = new HashSet<Integer>();
    final int chooseLimit = 4 + random.nextInt(valueCount / 2);
    int chooseCount = 0;
    while (chooseCount < chooseLimit) {
        final int index = random.nextInt(valueCount);
        if (includedSet.contains(index)) {
            continue;
        }
        includedSet.add(index);
        compareList.add(valueList.get(index));
        chooseCount++;
    }
    // ----------------------------------------------------------------------------------------------
    GenerationSpec structGenerationSpec = GenerationSpec.createValueList(structTypeInfo, valueList);
    List<GenerationSpec> structGenerationSpecList = new ArrayList<GenerationSpec>();
    List<DataTypePhysicalVariation> structExplicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
    structGenerationSpecList.add(structGenerationSpec);
    structExplicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
    VectorRandomRowSource structRowSource = new VectorRandomRowSource();
    structRowSource.initGenerationSpecSchema(random, structGenerationSpecList, /* maxComplexDepth */
    0, /* allowNull */
    true, /* isUnicodeOk */
    true, structExplicitDataTypePhysicalVariationList);
    Object[][] structRandomRows = structRowSource.randomRows(100000);
    // ---------------------------------------------------------------------------------------------
    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
    List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
    List<TypeInfo> fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
    final int fieldCount = fieldTypeInfoList.size();
    for (int i = 0; i < fieldCount; i++) {
        GenerationSpec generationSpec = GenerationSpec.createOmitGeneration(fieldTypeInfoList.get(i));
        generationSpecList.add(generationSpec);
        explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
    }
    VectorRandomRowSource rowSource = new VectorRandomRowSource();
    rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
    0, /* allowNull */
    true, /* isUnicodeOk */
    true, explicitDataTypePhysicalVariationList);
    Object[][] randomRows = rowSource.randomRows(100000);
    final int rowCount = randomRows.length;
    for (int r = 0; r < rowCount; r++) {
        List<Object> fieldValueList = (ArrayList) structRandomRows[r][0];
        for (int f = 0; f < fieldCount; f++) {
            randomRows[r][f] = fieldValueList.get(f);
        }
    }
    // ---------------------------------------------------------------------------------------------
    // Currently, STRUCT IN vectorization assumes a GenericUDFStruct.
    List<ObjectInspector> structUdfObjectInspectorList = new ArrayList<ObjectInspector>();
    List<ExprNodeDesc> structUdfChildren = new ArrayList<ExprNodeDesc>(fieldCount);
    List<String> rowColumnNameList = rowSource.columnNames();
    for (int i = 0; i < fieldCount; i++) {
        TypeInfo fieldTypeInfo = fieldTypeInfoList.get(i);
        ExprNodeColumnDesc fieldExpr = new ExprNodeColumnDesc(fieldTypeInfo, rowColumnNameList.get(i), "table", false);
        structUdfChildren.add(fieldExpr);
        ObjectInspector fieldObjectInspector = VectorRandomRowSource.getObjectInspector(fieldTypeInfo, DataTypePhysicalVariation.NONE);
        structUdfObjectInspectorList.add(fieldObjectInspector);
    }
    StandardStructObjectInspector structUdfObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(rowColumnNameList, structUdfObjectInspectorList);
    String structUdfTypeName = structUdfObjectInspector.getTypeName();
    TypeInfo structUdfTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(structUdfTypeName);
    String structFuncText = "struct";
    FunctionInfo fi = FunctionRegistry.getFunctionInfo(structFuncText);
    GenericUDF genericUDF = fi.getGenericUDF();
    ExprNodeDesc col1Expr = new ExprNodeGenericFuncDesc(structUdfObjectInspector, genericUDF, structFuncText, structUdfChildren);
    // ---------------------------------------------------------------------------------------------
    List<String> columns = new ArrayList<String>();
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(col1Expr);
    for (int i = 0; i < compareList.size(); i++) {
        Object compareObject = compareList.get(i);
        ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(structUdfTypeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, structUdfTypeInfo, structUdfObjectInspector));
        children.add(constDesc);
    }
    for (int i = 0; i < fieldCount; i++) {
        columns.add(rowColumnNameList.get(i));
    }
    String[] columnNames = columns.toArray(new String[0]);
    VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
    // ---------------------------------------------------------------------------------------------
    final GenericUDF udf = new GenericUDFIn();
    final int compareCount = compareList.size();
    ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
    for (int i = 0; i < compareCount; i++) {
        argumentOIs[i] = structUdfObjectInspector;
    }
    final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
    TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
    return executeTestModesAndVerify(structUdfTypeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
    true);
}
Also used : ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) VectorRandomBatchSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource) FunctionInfo(org.apache.hadoop.hive.ql.exec.FunctionInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) GenerationSpec(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Example 34 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class ArrowColumnarBatchSerDe method toField.

private static Field toField(String name, TypeInfo typeInfo) {
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
            switch(primitiveTypeInfo.getPrimitiveCategory()) {
                case BOOLEAN:
                    return Field.nullable(name, MinorType.BIT.getType());
                case BYTE:
                    return Field.nullable(name, MinorType.TINYINT.getType());
                case SHORT:
                    return Field.nullable(name, MinorType.SMALLINT.getType());
                case INT:
                    return Field.nullable(name, MinorType.INT.getType());
                case LONG:
                    return Field.nullable(name, MinorType.BIGINT.getType());
                case FLOAT:
                    return Field.nullable(name, MinorType.FLOAT4.getType());
                case DOUBLE:
                    return Field.nullable(name, MinorType.FLOAT8.getType());
                case STRING:
                case VARCHAR:
                case CHAR:
                    return Field.nullable(name, MinorType.VARCHAR.getType());
                case DATE:
                    return Field.nullable(name, MinorType.DATEDAY.getType());
                case TIMESTAMP:
                    return Field.nullable(name, MinorType.TIMESTAMPMILLI.getType());
                case TIMESTAMPLOCALTZ:
                    final TimestampLocalTZTypeInfo timestampLocalTZTypeInfo = (TimestampLocalTZTypeInfo) typeInfo;
                    final String timeZone = timestampLocalTZTypeInfo.getTimeZone().toString();
                    return Field.nullable(name, new ArrowType.Timestamp(TimeUnit.MILLISECOND, timeZone));
                case BINARY:
                    return Field.nullable(name, MinorType.VARBINARY.getType());
                case DECIMAL:
                    final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
                    final int precision = decimalTypeInfo.precision();
                    final int scale = decimalTypeInfo.scale();
                    return Field.nullable(name, new ArrowType.Decimal(precision, scale));
                case INTERVAL_YEAR_MONTH:
                    return Field.nullable(name, MinorType.INTERVALYEAR.getType());
                case INTERVAL_DAY_TIME:
                    return Field.nullable(name, MinorType.INTERVALDAY.getType());
                default:
                    throw new IllegalArgumentException();
            }
        case LIST:
            final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
            return new Field(name, FieldType.nullable(MinorType.LIST.getType()), Lists.newArrayList(toField(DEFAULT_ARROW_FIELD_NAME, elementTypeInfo)));
        case STRUCT:
            final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            final List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
            final List<Field> structFields = Lists.newArrayList();
            final int structSize = fieldNames.size();
            for (int i = 0; i < structSize; i++) {
                structFields.add(toField(fieldNames.get(i), fieldTypeInfos.get(i)));
            }
            return new Field(name, FieldType.nullable(MinorType.STRUCT.getType()), structFields);
        case UNION:
            final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
            final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
            final List<Field> unionFields = Lists.newArrayList();
            final int unionSize = unionFields.size();
            for (int i = 0; i < unionSize; i++) {
                unionFields.add(toField(DEFAULT_ARROW_FIELD_NAME, objectTypeInfos.get(i)));
            }
            return new Field(name, FieldType.nullable(MinorType.UNION.getType()), unionFields);
        case MAP:
            final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            final TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
            final TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
            final List<Field> mapFields = Lists.newArrayList();
            mapFields.add(toField(name + "_keys", keyTypeInfo));
            mapFields.add(toField(name + "_values", valueTypeInfo));
            FieldType struct = new FieldType(false, new ArrowType.Struct(), null);
            List<Field> childrenOfList = Lists.newArrayList(new Field(name, struct, mapFields));
            return new Field(name, FieldType.nullable(MinorType.LIST.getType()), childrenOfList);
        default:
            throw new IllegalArgumentException();
    }
}
Also used : ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo) TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) FieldType(org.apache.arrow.vector.types.pojo.FieldType) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) Field(org.apache.arrow.vector.types.pojo.Field) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 35 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project hive by apache.

the class Serializer method writeStruct.

private void writeStruct(NonNullableStructVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative, boolean isMapDataType) {
    final List<String> fieldNames = typeInfo.getAllStructFieldNames();
    final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
    final ColumnVector[] hiveFieldVectors = hiveVector == null ? null : hiveVector.fields;
    final int fieldSize = fieldTypeInfos.size();
    // see - https://issues.apache.org/jira/browse/HIVE-25243
    if (hiveVector != null && hiveFieldVectors != null) {
        for (int i = 0; i < size; i++) {
            if (hiveVector.isNull[i]) {
                for (ColumnVector fieldVector : hiveFieldVectors) {
                    fieldVector.isNull[i] = true;
                    fieldVector.noNulls = false;
                }
            }
        }
    }
    for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) {
        final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex);
        final ColumnVector hiveFieldVector = hiveVector == null ? null : hiveFieldVectors[fieldIndex];
        final String fieldName = fieldNames.get(fieldIndex);
        // If the call is coming from writeMap(), then the structs within the list type should be non-nullable.
        FieldType elementFieldType = (isMapDataType) ? (new FieldType(false, toArrowType(fieldTypeInfo), null)) : (toFieldType(fieldTypeInfos.get(fieldIndex)));
        final FieldVector arrowFieldVector = arrowVector.addOrGet(fieldName, elementFieldType, FieldVector.class);
        arrowFieldVector.setInitialCapacity(size);
        arrowFieldVector.allocateNew();
        write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative);
    }
    for (int rowIndex = 0; rowIndex < size; rowIndex++) {
        if (hiveVector == null || hiveVector.isNull[rowIndex]) {
            BitVectorHelper.setValidityBit(arrowVector.getValidityBuffer(), rowIndex, 0);
        } else {
            BitVectorHelper.setValidityBitToOne(arrowVector.getValidityBuffer(), rowIndex);
        }
    }
}
Also used : FieldVector(org.apache.arrow.vector.FieldVector) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ArrowColumnarBatchSerDe.toStructListTypeInfo(org.apache.hadoop.hive.ql.io.arrow.ArrowColumnarBatchSerDe.toStructListTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) Decimal64ColumnVector(org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector) DateColumnVector(org.apache.hadoop.hive.ql.exec.vector.DateColumnVector) ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) VectorizedBatchUtil.createColumnVector(org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil.createColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) MultiValuedColumnVector(org.apache.hadoop.hive.ql.exec.vector.MultiValuedColumnVector) UnionColumnVector(org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector) IntervalDayTimeColumnVector(org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) FieldType(org.apache.arrow.vector.types.pojo.FieldType)

Aggregations

StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)97 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)76 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)57 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)52 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)52 ArrayList (java.util.ArrayList)41 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)30 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)29 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)24 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)21 List (java.util.List)20 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)17 IntWritable (org.apache.hadoop.io.IntWritable)12 Text (org.apache.hadoop.io.Text)12 BytesWritable (org.apache.hadoop.io.BytesWritable)11 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)10 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)10 BooleanWritable (org.apache.hadoop.io.BooleanWritable)10 LongWritable (org.apache.hadoop.io.LongWritable)10