Search in sources :

Example 36 with ListTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project incubator-gobblin by apache.

the class HiveAvroORCQueryGenerator method escapeHiveType.

/**
 * Escape the Hive nested field names.
 * @param type Primitive or nested Hive type.
 * @return Escaped Hive nested field.
 */
public static String escapeHiveType(String type) {
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type);
    // Primitive
    if (ObjectInspector.Category.PRIMITIVE.equals(typeInfo.getCategory())) {
        return type;
    } else // List
    if (ObjectInspector.Category.LIST.equals(typeInfo.getCategory())) {
        ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
        return org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME + "<" + escapeHiveType(listTypeInfo.getListElementTypeInfo().getTypeName()) + ">";
    } else // Map
    if (ObjectInspector.Category.MAP.equals(typeInfo.getCategory())) {
        MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        return org.apache.hadoop.hive.serde.serdeConstants.MAP_TYPE_NAME + "<" + escapeHiveType(mapTypeInfo.getMapKeyTypeInfo().getTypeName()) + "," + escapeHiveType(mapTypeInfo.getMapValueTypeInfo().getTypeName()) + ">";
    } else // Struct
    if (ObjectInspector.Category.STRUCT.equals(typeInfo.getCategory())) {
        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        List<String> allStructFieldNames = structTypeInfo.getAllStructFieldNames();
        List<TypeInfo> allStructFieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
        StringBuilder sb = new StringBuilder();
        sb.append(serdeConstants.STRUCT_TYPE_NAME + "<");
        for (int i = 0; i < allStructFieldNames.size(); i++) {
            if (i > 0) {
                sb.append(",");
            }
            sb.append("`");
            sb.append(allStructFieldNames.get(i));
            sb.append("`");
            sb.append(":");
            sb.append(escapeHiveType(allStructFieldTypeInfos.get(i).getTypeName()));
        }
        sb.append(">");
        return sb.toString();
    } else // Union
    if (ObjectInspector.Category.UNION.equals(typeInfo.getCategory())) {
        UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
        List<TypeInfo> allUnionObjectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
        StringBuilder sb = new StringBuilder();
        sb.append(serdeConstants.UNION_TYPE_NAME + "<");
        for (int i = 0; i < allUnionObjectTypeInfos.size(); i++) {
            if (i > 0) {
                sb.append(",");
            }
            sb.append(escapeHiveType(allUnionObjectTypeInfos.get(i).getTypeName()));
        }
        sb.append(">");
        return sb.toString();
    } else {
        throw new RuntimeException("Unknown type encountered: " + type);
    }
}
Also used : ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) List(java.util.List) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 37 with ListTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project incubator-gobblin by apache.

the class TypeInfoToSchemaParser method parseSchemaFromTypeInfo.

Schema parseSchemaFromTypeInfo(TypeInfo typeInfo, String recordNamespace, String recordName) {
    Category c = typeInfo.getCategory();
    Schema schema;
    switch(c) {
        case STRUCT:
            schema = this.parseSchemaFromStruct((StructTypeInfo) typeInfo, recordNamespace, recordName);
            break;
        case LIST:
            schema = this.parseSchemaFromList((ListTypeInfo) typeInfo, recordNamespace, recordName);
            break;
        case MAP:
            schema = this.parseSchemaFromMap((MapTypeInfo) typeInfo, recordNamespace, recordName);
            break;
        case PRIMITIVE:
            schema = this.parseSchemaFromPrimitive((PrimitiveTypeInfo) typeInfo);
            break;
        case UNION:
            schema = this.parseSchemaFromUnion((UnionTypeInfo) typeInfo, recordNamespace, recordName);
            break;
        default:
            throw new UnsupportedOperationException("Conversion from " + c + " not supported");
    }
    return this._mkFieldsOptional ? wrapInNullableUnion(schema) : schema;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) Schema(org.apache.avro.Schema) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 38 with ListTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project phoenix by apache.

the class PhoenixObjectInspectorFactory method createObjectInspector.

public static ObjectInspector createObjectInspector(TypeInfo type, LazySerDeParameters serdeParams) {
    ObjectInspector oi = null;
    if (LOG.isDebugEnabled()) {
        LOG.debug("Type : " + type);
    }
    switch(type.getCategory()) {
        case PRIMITIVE:
            switch(((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
                case BOOLEAN:
                    oi = new PhoenixBooleanObjectInspector();
                    break;
                case BYTE:
                    oi = new PhoenixByteObjectInspector();
                    break;
                case SHORT:
                    oi = new PhoenixShortObjectInspector();
                    break;
                case INT:
                    oi = new PhoenixIntObjectInspector();
                    break;
                case LONG:
                    oi = new PhoenixLongObjectInspector();
                    break;
                case FLOAT:
                    oi = new PhoenixFloatObjectInspector();
                    break;
                case DOUBLE:
                    oi = new PhoenixDoubleObjectInspector();
                    break;
                case VARCHAR:
                // same string
                case STRING:
                    oi = new PhoenixStringObjectInspector(serdeParams.isEscaped(), serdeParams.getEscapeChar());
                    break;
                case CHAR:
                    oi = new PhoenixCharObjectInspector((PrimitiveTypeInfo) type);
                    break;
                case DATE:
                    oi = new PhoenixDateObjectInspector();
                    break;
                case TIMESTAMP:
                    oi = new PhoenixTimestampObjectInspector();
                    break;
                case DECIMAL:
                    oi = new PhoenixDecimalObjectInspector((PrimitiveTypeInfo) type);
                    break;
                case BINARY:
                    oi = new PhoenixBinaryObjectInspector();
                    break;
                default:
                    throw new RuntimeException("Hive internal error. not supported data type " + ": " + type);
            }
            break;
        case LIST:
            if (LOG.isDebugEnabled()) {
                LOG.debug("List type started");
            }
            ObjectInspector listElementObjectInspector = createObjectInspector(((ListTypeInfo) type).getListElementTypeInfo(), serdeParams);
            if (LOG.isDebugEnabled()) {
                LOG.debug("List type ended");
            }
            oi = new PhoenixListObjectInspector(listElementObjectInspector, serdeParams.getSeparators()[0], serdeParams);
            break;
        default:
            throw new RuntimeException("Hive internal error. not supported data type : " + type);
    }
    return oi;
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 39 with ListTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project hive by apache.

the class VectorizedParquetRecordReader method buildVectorizedParquetReader.

// Build VectorizedParquetColumnReader via Hive typeInfo and Parquet schema
private VectorizedColumnReader buildVectorizedParquetReader(TypeInfo typeInfo, Type type, PageReadStore pages, List<ColumnDescriptor> columnDescriptors, boolean skipTimestampConversion, int depth) throws IOException {
    List<ColumnDescriptor> descriptors = getAllColumnDescriptorByType(depth, type, columnDescriptors);
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            if (fileSchema.getColumns().contains(descriptors.get(0))) {
                return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type, typeInfo);
            } else {
                // Support for schema evolution
                return new VectorizedDummyColumnReader();
            }
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<VectorizedColumnReader> fieldReaders = new ArrayList<>();
            List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
            List<Type> types = type.asGroupType().getFields();
            for (int i = 0; i < fieldTypes.size(); i++) {
                VectorizedColumnReader r = buildVectorizedParquetReader(fieldTypes.get(i), types.get(i), pages, descriptors, skipTimestampConversion, depth + 1);
                if (r != null) {
                    fieldReaders.add(r);
                } else {
                    throw new RuntimeException("Fail to build Parquet vectorized reader based on Hive type " + fieldTypes.get(i).getTypeName() + " and Parquet type" + types.get(i).toString());
                }
            }
            return new VectorizedStructColumnReader(fieldReaders);
        case LIST:
            checkListColumnSupport(((ListTypeInfo) typeInfo).getListElementTypeInfo());
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            return new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, getElementType(type), typeInfo);
        case MAP:
            if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
            }
            // to handle the different Map definition in Parquet, eg:
            // definition has 1 group:
            // repeated group map (MAP_KEY_VALUE)
            // {required binary key (UTF8); optional binary value (UTF8);}
            // definition has 2 groups:
            // optional group m1 (MAP) {
            // repeated group map (MAP_KEY_VALUE)
            // {required binary key (UTF8); optional binary value (UTF8);}
            // }
            int nestGroup = 0;
            GroupType groupType = type.asGroupType();
            // otherwise, continue to get the group type until MAP_DEFINITION_LEVEL_MAX.
            while (groupType.getFieldCount() < 2) {
                if (nestGroup > MAP_DEFINITION_LEVEL_MAX) {
                    throw new RuntimeException("More than " + MAP_DEFINITION_LEVEL_MAX + " level is found in Map definition, " + "Failed to get the field types for Map with type " + type);
                }
                groupType = groupType.getFields().get(0).asGroupType();
                nestGroup++;
            }
            List<Type> kvTypes = groupType.getFields();
            VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, kvTypes.get(0), typeInfo);
            VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader(descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, kvTypes.get(1), typeInfo);
            return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader);
        case UNION:
        default:
            throw new RuntimeException("Unsupported category " + typeInfo.getCategory().name());
    }
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveType(org.apache.parquet.schema.PrimitiveType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) ParquetRuntimeException(org.apache.parquet.ParquetRuntimeException) GroupType(org.apache.parquet.schema.GroupType)

Example 40 with ListTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo in project hive by apache.

the class VectorSerializeRow method serializeListWrite.

private void serializeListWrite(ListColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException {
    final ListTypeInfo typeInfo = (ListTypeInfo) field.typeInfo;
    final ListObjectInspector objectInspector = (ListObjectInspector) field.objectInspector;
    final ColumnVector childColumnVector = colVector.child;
    final Field elementField = field.children[0];
    final int offset = (int) colVector.offsets[adjustedBatchIndex];
    final int size = (int) colVector.lengths[adjustedBatchIndex];
    final ObjectInspector elementObjectInspector = objectInspector.getListElementObjectInspector();
    final List list = (List) vectorExtractRow.extractRowColumn(colVector, typeInfo, objectInspector, adjustedBatchIndex);
    serializeWrite.beginList(list);
    for (int i = 0; i < size; i++) {
        if (i > 0) {
            serializeWrite.separateList();
        }
        serializeWrite(childColumnVector, elementField, offset + i);
    }
    serializeWrite.finishList();
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) List(java.util.List)

Aggregations

ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)68 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)55 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)55 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)53 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)51 ArrayList (java.util.ArrayList)31 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)28 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)22 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)19 List (java.util.List)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)18 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)17 BytesWritable (org.apache.hadoop.io.BytesWritable)11 Map (java.util.Map)10 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)9 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)8 HiveIntervalDayTimeWritable (org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable)8 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)7 Timestamp (org.apache.hadoop.hive.common.type.Timestamp)7 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)7