Search in sources :

Example 11 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project parquet-mr by apache.

the class ParquetHiveSerDe method initialize.

@Override
public final void initialize(final Configuration conf, final Properties tbl) throws SerDeException {
    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String columnNameProperty = tbl.getProperty(IOConstants.COLUMNS);
    final String columnTypeProperty = tbl.getProperty(IOConstants.COLUMNS_TYPES);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + columnTypes);
    }
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    this.objInspector = new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo);
    // Stats part
    stats = new SerDeStats();
    serializedSize = 0;
    deserializedSize = 0;
    status = LAST_OPERATION.UNKNOWN;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 12 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project mongo-hadoop by mongodb.

the class BSONSerDe method initialize.

/**
 * Finds out the information of the table, including the column names and types.
 */
@SuppressWarnings("unchecked")
@Override
public void initialize(final Configuration conf, final Properties tblProps) throws SerDeException {
    // regex used to split column names between commas
    String splitCols = "\\s*,\\s*";
    // Get the table column names
    String colNamesStr = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
    columnNames = Arrays.asList(colNamesStr.split(splitCols));
    // Get mappings specified by the user
    if (tblProps.containsKey(MONGO_COLS)) {
        String mongoFieldsStr = tblProps.getProperty(MONGO_COLS);
        Map<String, String> rules = ((BasicBSONObject) JSON.parse(mongoFieldsStr)).toMap();
        // register the hive field mappings to mongo field mappings
        hiveToMongo = new HashMap<String, String>();
        registerMappings(rules);
    }
    // Get the table column types
    String colTypesStr = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
    if (columnNames.size() != columnTypes.size()) {
        throw new SerDeException("Column Names and Types don't match in size");
    }
    // Get the structure and object inspector
    docTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    docOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(docTypeInfo);
    // Create the BSONWritable instance for future use.
    bsonWritable = new BSONWritable();
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 13 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project presto by prestodb.

the class HiveType method getTypeSignature.

private static TypeSignature getTypeSignature(TypeInfo typeInfo) {
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            Type primitiveType = getPrimitiveType((PrimitiveTypeInfo) typeInfo);
            if (primitiveType == null) {
                break;
            }
            return primitiveType.getTypeSignature();
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            TypeSignature keyType = getTypeSignature(mapTypeInfo.getMapKeyTypeInfo());
            TypeSignature valueType = getTypeSignature(mapTypeInfo.getMapValueTypeInfo());
            return new TypeSignature(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(keyType), TypeSignatureParameter.of(valueType)));
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            TypeSignature elementType = getTypeSignature(listTypeInfo.getListElementTypeInfo());
            return new TypeSignature(StandardTypes.ARRAY, ImmutableList.of(TypeSignatureParameter.of(elementType)));
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<TypeInfo> structFieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            List<String> structFieldNames = structTypeInfo.getAllStructFieldNames();
            if (structFieldTypeInfos.size() != structFieldNames.size()) {
                throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, format("Invalid Hive struct type: %s", typeInfo));
            }
            ImmutableList.Builder<TypeSignatureParameter> typeSignatureBuilder = ImmutableList.builder();
            for (int i = 0; i < structFieldTypeInfos.size(); i++) {
                TypeSignature typeSignature = getTypeSignature(structFieldTypeInfos.get(i));
                // Lower case the struct field names.
                // Otherwise, Presto will refuse to write to columns whose struct type has field names containing upper case characters.
                // Users can't work around this by casting in their queries because Presto parser always lower case types.
                // TODO: This is a hack. Presto engine should be able to handle identifiers in a case insensitive way where necessary.
                String rowFieldName = structFieldNames.get(i).toLowerCase(Locale.US);
                typeSignatureBuilder.add(TypeSignatureParameter.of(new NamedTypeSignature(Optional.of(new RowFieldName(rowFieldName, false)), typeSignature)));
            }
            return new TypeSignature(StandardTypes.ROW, typeSignatureBuilder.build());
    }
    throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", typeInfo));
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) RowFieldName(com.facebook.presto.common.type.RowFieldName) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrestoException(com.facebook.presto.spi.PrestoException) TypeInfoUtils.getTypeInfosFromTypeString(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString) TypeInfoUtils.getTypeInfoFromTypeString(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString) TypeInfoFactory.doubleTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.doubleTypeInfo) TypeInfoFactory.dateTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.dateTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfoFactory.longTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.longTypeInfo) TypeInfoFactory.shortTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.shortTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) TypeInfoFactory.timestampTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.timestampTypeInfo) TypeInfoFactory.floatTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.floatTypeInfo) TypeInfoFactory.intTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.intTypeInfo) TypeInfoFactory.binaryTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.binaryTypeInfo) TypeInfoFactory.byteTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.byteTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfoFactory.booleanTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.booleanTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TypeInfoFactory.stringTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) Type(com.facebook.presto.common.type.Type) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) TypeSignature(com.facebook.presto.common.type.TypeSignature) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)

Example 14 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project flink by apache.

the class HiveParserUtils method toRelDataType.

// converts a hive TypeInfo to RelDataType
public static RelDataType toRelDataType(TypeInfo typeInfo, RelDataTypeFactory relTypeFactory) throws SemanticException {
    RelDataType res;
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            // hive sets NULLABLE for all primitive types, revert that
            res = HiveParserTypeConverter.convert(typeInfo, relTypeFactory);
            return relTypeFactory.createTypeWithNullability(res, false);
        case LIST:
            RelDataType elementType = toRelDataType(((ListTypeInfo) typeInfo).getListElementTypeInfo(), relTypeFactory);
            return relTypeFactory.createArrayType(elementType, -1);
        case MAP:
            RelDataType keyType = toRelDataType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo(), relTypeFactory);
            RelDataType valType = toRelDataType(((MapTypeInfo) typeInfo).getMapValueTypeInfo(), relTypeFactory);
            return relTypeFactory.createMapType(keyType, valType);
        case STRUCT:
            List<TypeInfo> types = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
            List<RelDataType> convertedTypes = new ArrayList<>(types.size());
            for (TypeInfo type : types) {
                convertedTypes.add(toRelDataType(type, relTypeFactory));
            }
            return relTypeFactory.createStructType(convertedTypes, ((StructTypeInfo) typeInfo).getAllStructFieldNames());
        case UNION:
        default:
            throw new SemanticException(String.format("%s type is not supported yet", typeInfo.getCategory().name()));
    }
}
Also used : ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 15 with StructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo in project haivvreo by jghoman.

the class AvroObjectInspectorGenerator method createObjectInspectorWorker.

private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeException {
    // at deserialization and the object inspector will never see the actual union.
    if (!supportedCategories(ti))
        throw new HaivvreoException("Don't yet support this type: " + ti);
    ObjectInspector result;
    switch(ti.getCategory()) {
        case PRIMITIVE:
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) ti;
            result = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti.getPrimitiveCategory());
            break;
        case STRUCT:
            StructTypeInfo sti = (StructTypeInfo) ti;
            ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
            for (TypeInfo typeInfo : sti.getAllStructFieldTypeInfos()) {
                ois.add(createObjectInspectorWorker(typeInfo));
            }
            result = ObjectInspectorFactory.getStandardStructObjectInspector(sti.getAllStructFieldNames(), ois);
            break;
        case MAP:
            MapTypeInfo mti = (MapTypeInfo) ti;
            result = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING), createObjectInspectorWorker(mti.getMapValueTypeInfo()));
            break;
        case LIST:
            ListTypeInfo ati = (ListTypeInfo) ti;
            result = ObjectInspectorFactory.getStandardListObjectInspector(createObjectInspectorWorker(ati.getListElementTypeInfo()));
            break;
        case UNION:
            UnionTypeInfo uti = (UnionTypeInfo) ti;
            List<TypeInfo> allUnionObjectTypeInfos = uti.getAllUnionObjectTypeInfos();
            List<ObjectInspector> unionObjectInspectors = new ArrayList<ObjectInspector>(allUnionObjectTypeInfos.size());
            for (TypeInfo typeInfo : allUnionObjectTypeInfos) {
                unionObjectInspectors.add(createObjectInspectorWorker(typeInfo));
            }
            result = ObjectInspectorFactory.getStandardUnionObjectInspector(unionObjectInspectors);
            break;
        default:
            throw new HaivvreoException("No Hive categories matched: " + ti);
    }
    return result;
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList)

Aggregations

StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)97 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)76 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)57 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)52 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)52 ArrayList (java.util.ArrayList)41 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)30 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)29 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)24 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)21 List (java.util.List)20 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)17 IntWritable (org.apache.hadoop.io.IntWritable)12 Text (org.apache.hadoop.io.Text)12 BytesWritable (org.apache.hadoop.io.BytesWritable)11 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)10 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)10 BooleanWritable (org.apache.hadoop.io.BooleanWritable)10 LongWritable (org.apache.hadoop.io.LongWritable)10