Search in sources :

Example 11 with AvroSerdeException

use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.

the class AvroSerializer method serializeStruct.

private Object serializeStruct(StructTypeInfo typeInfo, StructObjectInspector ssoi, Object o, Schema schema) throws AvroSerdeException {
    int size = schema.getFields().size();
    List<? extends StructField> allStructFieldRefs = ssoi.getAllStructFieldRefs();
    List<Object> structFieldsDataAsList = ssoi.getStructFieldsDataAsList(o);
    GenericData.Record record = new GenericData.Record(schema);
    ArrayList<TypeInfo> allStructFieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
    for (int i = 0; i < size; i++) {
        Field field = schema.getFields().get(i);
        TypeInfo colTypeInfo = allStructFieldTypeInfos.get(i);
        StructField structFieldRef = allStructFieldRefs.get(i);
        Object structFieldData = structFieldsDataAsList.get(i);
        ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
        Object val = serialize(colTypeInfo, fieldOI, structFieldData, field.schema());
        record.put(field.name(), val);
    }
    return record;
}
Also used : Field(org.apache.avro.Schema.Field) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) GenericData(org.apache.avro.generic.GenericData) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 12 with AvroSerdeException

use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.

the class AvroSerializer method serialize.

// Hive is pretty simple (read: stupid) in writing out values via the serializer.
// We're just going to go through, matching indices.  Hive formats normally
// handle mismatches with null.  We don't have that option, so instead we'll
// end up throwing an exception for invalid records.
public Writable serialize(Object o, ObjectInspector objectInspector, List<String> columnNames, List<TypeInfo> columnTypes, Schema schema) throws AvroSerdeException {
    StructObjectInspector soi = (StructObjectInspector) objectInspector;
    GenericData.Record record = new GenericData.Record(schema);
    List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
    if (outputFieldRefs.size() != columnNames.size()) {
        throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size());
    }
    int size = schema.getFields().size();
    if (outputFieldRefs.size() != size) {
        throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() + ", Avro expected " + schema.getFields().size());
    }
    List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
    List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
    for (int i = 0; i < size; i++) {
        Field field = schema.getFields().get(i);
        TypeInfo typeInfo = columnTypes.get(i);
        StructField structFieldRef = allStructFieldRefs.get(i);
        Object structFieldData = structFieldsDataAsList.get(i);
        ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
        Object val = serialize(typeInfo, fieldOI, structFieldData, field.schema());
        record.put(field.name(), val);
    }
    if (!GenericData.get().validate(schema, record)) {
        throw new SerializeToAvroException(schema, record);
    }
    cache.setRecord(record);
    return cache;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) GenericData(org.apache.avro.generic.GenericData) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) Field(org.apache.avro.Schema.Field) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 13 with AvroSerdeException

use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.

the class AvroSerializer method serializeMap.

private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
    // Avro only allows maps with string keys
    if (!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) {
        throw new AvroSerdeException("Avro only supports maps with keys as Strings.  Current Map is: " + typeInfo.toString());
    }
    ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector();
    ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector();
    TypeInfo mapKeyTypeInfo = typeInfo.getMapKeyTypeInfo();
    TypeInfo mapValueTypeInfo = typeInfo.getMapValueTypeInfo();
    Map<?, ?> map = fieldOI.getMap(structFieldData);
    Schema valueType = schema.getValueType();
    Map<Object, Object> deserialized = new HashMap<Object, Object>(fieldOI.getMapSize(structFieldData));
    for (Map.Entry<?, ?> entry : map.entrySet()) {
        deserialized.put(serialize(mapKeyTypeInfo, mapKeyObjectInspector, entry.getKey(), STRING_SCHEMA), serialize(mapValueTypeInfo, mapValueObjectInspector, entry.getValue(), valueType));
    }
    return deserialized;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) HashMap(java.util.HashMap) Map(java.util.Map)

Example 14 with AvroSerdeException

use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.

the class AvroDeserializer method deserializePrimitive.

private Object deserializePrimitive(Object datum, Schema fileSchema, Schema recordSchema, PrimitiveTypeInfo columnType) throws AvroSerdeException {
    switch(columnType.getPrimitiveCategory()) {
        case STRING:
            // To workaround AvroUTF8
            return datum.toString();
        // and convert it to a string. Yay!
        case BINARY:
            if (recordSchema.getType() == Type.FIXED) {
                Fixed fixed = (Fixed) datum;
                return fixed.bytes();
            } else if (recordSchema.getType() == Type.BYTES) {
                return AvroSerdeUtils.getBytesFromByteBuffer((ByteBuffer) datum);
            } else {
                throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + recordSchema.getType());
            }
        case DECIMAL:
            if (fileSchema == null) {
                throw new AvroSerdeException("File schema is missing for decimal field. Reader schema is " + columnType);
            }
            int scale = 0;
            try {
                scale = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_SCALE).asInt();
            } catch (Exception ex) {
                throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex);
            }
            HiveDecimal dec = AvroSerdeUtils.getHiveDecimalFromByteBuffer((ByteBuffer) datum, scale);
            JavaHiveDecimalObjectInspector oi = (JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((DecimalTypeInfo) columnType);
            return oi.set(null, dec);
        case CHAR:
            if (fileSchema == null) {
                throw new AvroSerdeException("File schema is missing for char field. Reader schema is " + columnType);
            }
            int maxLength = 0;
            try {
                maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
            } catch (Exception ex) {
                throw new AvroSerdeException("Failed to obtain maxLength value for char field from file schema: " + fileSchema, ex);
            }
            String str = datum.toString();
            HiveChar hc = new HiveChar(str, maxLength);
            return hc;
        case VARCHAR:
            if (fileSchema == null) {
                throw new AvroSerdeException("File schema is missing for varchar field. Reader schema is " + columnType);
            }
            maxLength = 0;
            try {
                maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
            } catch (Exception ex) {
                throw new AvroSerdeException("Failed to obtain maxLength value for varchar field from file schema: " + fileSchema, ex);
            }
            str = datum.toString();
            HiveVarchar hvc = new HiveVarchar(str, maxLength);
            return hvc;
        case DATE:
            if (recordSchema.getType() != Type.INT) {
                throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
            }
            return new Date(DateWritable.daysToMillis((Integer) datum));
        case TIMESTAMP:
            if (recordSchema.getType() != Type.LONG) {
                throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
            }
            return new Timestamp((Long) datum);
        default:
            return datum;
    }
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) UnresolvedUnionException(org.apache.avro.UnresolvedUnionException) IOException(java.io.IOException) Date(java.sql.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) JavaHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Fixed(org.apache.avro.generic.GenericData.Fixed)

Example 15 with AvroSerdeException

use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.

the class AvroDeserializer method deserializeMap.

private Object deserializeMap(Object datum, Schema fileSchema, Schema mapSchema, MapTypeInfo columnType) throws AvroSerdeException {
    // Avro only allows maps with Strings for keys, so we only have to worry
    // about deserializing the values
    Map<String, Object> map = new HashMap<String, Object>();
    Map<CharSequence, Object> mapDatum = (Map) datum;
    Schema valueSchema = mapSchema.getValueType();
    TypeInfo valueTypeInfo = columnType.getMapValueTypeInfo();
    for (CharSequence key : mapDatum.keySet()) {
        Object value = mapDatum.get(key);
        map.put(key.toString(), worker(value, fileSchema == null ? null : fileSchema.getValueType(), valueSchema, valueTypeInfo));
    }
    return map;
}
Also used : HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) HashMap(java.util.HashMap) Map(java.util.Map) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)11 Schema (org.apache.avro.Schema)10 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)7 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)7 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)7 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)7 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)7 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)4 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)4 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)4 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)4 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)4 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)4 IOException (java.io.IOException)3 Properties (java.util.Properties)3 Configuration (org.apache.hadoop.conf.Configuration)3 AvroTableProperties (org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3