Search in sources :

Example 21 with MapTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project hive by apache.

the class FunctionRegistry method matchCost.

/**
   * Returns -1 if passed does not match accepted. Otherwise return the cost
   * (usually 0 for no conversion and 1 for conversion).
   */
public static int matchCost(TypeInfo argumentPassed, TypeInfo argumentAccepted, boolean exact) {
    if (argumentAccepted.equals(argumentPassed) || TypeInfoUtils.doPrimitiveCategoriesMatch(argumentPassed, argumentAccepted)) {
        // matches
        return 0;
    }
    if (argumentPassed.equals(TypeInfoFactory.voidTypeInfo)) {
        // passing null matches everything
        return 0;
    }
    if (argumentPassed.getCategory().equals(Category.LIST) && argumentAccepted.getCategory().equals(Category.LIST)) {
        // lists are compatible if and only-if the elements are compatible
        TypeInfo argumentPassedElement = ((ListTypeInfo) argumentPassed).getListElementTypeInfo();
        TypeInfo argumentAcceptedElement = ((ListTypeInfo) argumentAccepted).getListElementTypeInfo();
        return matchCost(argumentPassedElement, argumentAcceptedElement, exact);
    }
    if (argumentPassed.getCategory().equals(Category.MAP) && argumentAccepted.getCategory().equals(Category.MAP)) {
        // lists are compatible if and only-if the elements are compatible
        TypeInfo argumentPassedKey = ((MapTypeInfo) argumentPassed).getMapKeyTypeInfo();
        TypeInfo argumentAcceptedKey = ((MapTypeInfo) argumentAccepted).getMapKeyTypeInfo();
        TypeInfo argumentPassedValue = ((MapTypeInfo) argumentPassed).getMapValueTypeInfo();
        TypeInfo argumentAcceptedValue = ((MapTypeInfo) argumentAccepted).getMapValueTypeInfo();
        int cost1 = matchCost(argumentPassedKey, argumentAcceptedKey, exact);
        int cost2 = matchCost(argumentPassedValue, argumentAcceptedValue, exact);
        if (cost1 < 0 || cost2 < 0) {
            return -1;
        }
        return Math.max(cost1, cost2);
    }
    if (argumentAccepted.equals(TypeInfoFactory.unknownTypeInfo)) {
        // but there is a conversion cost.
        return 1;
    }
    if (!exact && TypeInfoUtils.implicitConvertible(argumentPassed, argumentAccepted)) {
        return 1;
    }
    return -1;
}
Also used : ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 22 with MapTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project hive by apache.

the class HCatSchemaUtils method getHCatFieldSchema.

private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo, String comment) throws HCatException {
    Category typeCategory = fieldTypeInfo.getCategory();
    HCatFieldSchema hCatFieldSchema;
    if (Category.PRIMITIVE == typeCategory) {
        hCatFieldSchema = new HCatFieldSchema(fieldName, (PrimitiveTypeInfo) fieldTypeInfo, comment);
    } else if (Category.STRUCT == typeCategory) {
        HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo);
        hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, comment);
    } else if (Category.LIST == typeCategory) {
        HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo());
        hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, comment);
    } else if (Category.MAP == typeCategory) {
        HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo());
        hCatFieldSchema = HCatFieldSchema.createMapTypeFieldSchema(fieldName, (PrimitiveTypeInfo) ((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo(), subSchema, comment);
    } else {
        throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null);
    }
    return hCatFieldSchema;
}
Also used : Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 23 with MapTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project cdap by caskdata.

the class ObjectSerializer method serializeMap.

private Object serializeMap(Map<Object, Object> map, MapTypeInfo typeInfo) {
    // need to recurse since it may contain structs
    Map<Object, Object> serialized = Maps.newHashMapWithExpectedSize(map.size());
    TypeInfo keyType = typeInfo.getMapKeyTypeInfo();
    TypeInfo valType = typeInfo.getMapValueTypeInfo();
    for (Map.Entry<Object, Object> mapEntry : map.entrySet()) {
        serialized.put(serialize(mapEntry.getKey(), keyType), serialize(mapEntry.getValue(), valType));
    }
    return serialized;
}
Also used : MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HashMap(java.util.HashMap) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap)

Example 24 with MapTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project cdap by caskdata.

the class ObjectSerializer method fromLazyObject.

private Object fromLazyObject(TypeInfo type, Object data) {
    if (data == null) {
        return null;
    }
    switch(type.getCategory()) {
        case PRIMITIVE:
            Writable writable = ((LazyPrimitive) data).getWritableObject();
            return fromWritable(writable);
        case LIST:
            ListTypeInfo listType = (ListTypeInfo) type;
            TypeInfo listElementType = listType.getListElementTypeInfo();
            List<Object> list = ((LazyArray) data).getList();
            if (list.isEmpty()) {
                return ImmutableList.of();
            }
            Object[] arrayContent = new Object[list.size()];
            for (int i = 0; i < arrayContent.length; i++) {
                arrayContent[i] = fromLazyObject(listElementType, list.get(i));
            }
            return arrayContent;
        case MAP:
            MapTypeInfo mapType = (MapTypeInfo) type;
            Map<Object, Object> mapContent = Maps.newConcurrentMap();
            Map<Object, Object> map = ((LazyMap) data).getMap();
            for (Map.Entry<Object, Object> entry : map.entrySet()) {
                mapContent.put(fromLazyObject(mapType.getMapKeyTypeInfo(), entry.getKey()), fromLazyObject(mapType.getMapValueTypeInfo(), entry.getValue()));
            }
            return mapContent;
        case STRUCT:
            StructTypeInfo structType = (StructTypeInfo) type;
            List<TypeInfo> info = structType.getAllStructFieldTypeInfos();
            List<String> names = structType.getAllStructFieldNames();
            Map<String, Object> structMap = Maps.newConcurrentMap();
            List<Object> struct = ((LazyStruct) data).getFieldsAsList();
            for (int structIndex = 0; structIndex < info.size(); structIndex++) {
                structMap.put(names.get(structIndex), fromLazyObject(info.get(structIndex), struct.get(structIndex)));
            }
            return structMap;
        case UNION:
            throw new UnsupportedOperationException("union not yet supported");
        default:
            return data.toString();
    }
}
Also used : LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) NullWritable(org.apache.hadoop.io.NullWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) ShortWritable(org.apache.hadoop.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveBaseCharWritable(org.apache.hadoop.hive.serde2.io.HiveBaseCharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) HashMap(java.util.HashMap) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct)

Example 25 with MapTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project cdap by caskdata.

the class ObjectDeserializer method deserializeField.

/**
   * Translate a field that fits a {@link Schema} field into a type that Hive understands.
   * For example, a ByteBuffer is allowed by schema but Hive only understands byte arrays, so all ByteBuffers must
   * be changed into byte arrays. Reflection is used to examine java objects if the expected hive type is a struct.
   *
   * @param field value of the field to deserialize.
   * @param typeInfo type of the field as expected by Hive.
   * @param schema schema of the field.
   * @return translated field.
   * @throws NoSuchFieldException if a struct field was expected but not found in the object.
   * @throws IllegalAccessException if a struct field was not accessible.
   */
private Object deserializeField(Object field, TypeInfo typeInfo, Schema schema) throws NoSuchFieldException, IllegalAccessException {
    boolean isNullable = schema.isNullable();
    if (field == null) {
        if (isNullable) {
            return null;
        } else {
            throw new UnexpectedFormatException("Non-nullable field was null.");
        }
    }
    if (isNullable) {
        schema = schema.getNonNullable();
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            return deserializePrimitive(field, (PrimitiveTypeInfo) typeInfo);
        case LIST:
            // HIVE!! some versions will turn bytes into array<tinyint> instead of binary... so special case it.
            // TODO: remove once CDAP-1556 is done
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            if (isByteArray(listTypeInfo) && !(field instanceof Collection)) {
                return deserializeByteArray(field);
            }
            return deserializeList(field, (ListTypeInfo) typeInfo, schema.getComponentSchema());
        case MAP:
            return deserializeMap(field, (MapTypeInfo) typeInfo, schema.getMapSchema());
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            ArrayList<String> innerFieldNames = structTypeInfo.getAllStructFieldNames();
            ArrayList<TypeInfo> innerFieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
            return flattenRecord(field, innerFieldNames, innerFieldTypes, schema);
        case UNION:
            // TODO: decide what to do here
            return field;
    }
    return null;
}
Also used : ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) UnexpectedFormatException(co.cask.cdap.api.data.format.UnexpectedFormatException) Collection(java.util.Collection) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Aggregations

MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)24 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)22 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)21 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)20 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)20 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)8 Map (java.util.Map)7 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)7 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)7 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)4 List (java.util.List)3 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)3 PrestoException (com.facebook.presto.spi.PrestoException)2 ImmutableList (com.google.common.collect.ImmutableList)2 Schema (org.apache.avro.Schema)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)2