Search in sources :

Example 1 with Fixed

use of org.apache.avro.generic.GenericData.Fixed in project hive by apache.

the class AvroDeserializer method deserializePrimitive.

private Object deserializePrimitive(Object datum, Schema fileSchema, Schema recordSchema, PrimitiveTypeInfo columnType) throws AvroSerdeException {
    switch(columnType.getPrimitiveCategory()) {
        case STRING:
            // To workaround AvroUTF8
            return datum.toString();
        // and convert it to a string. Yay!
        case BINARY:
            if (recordSchema.getType() == Type.FIXED) {
                Fixed fixed = (Fixed) datum;
                return fixed.bytes();
            } else if (recordSchema.getType() == Type.BYTES) {
                return AvroSerdeUtils.getBytesFromByteBuffer((ByteBuffer) datum);
            } else {
                throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + recordSchema.getType());
            }
        case DECIMAL:
            if (fileSchema == null) {
                throw new AvroSerdeException("File schema is missing for decimal field. Reader schema is " + columnType);
            }
            int scale = 0;
            try {
                scale = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_SCALE).asInt();
            } catch (Exception ex) {
                throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex);
            }
            HiveDecimal dec = AvroSerdeUtils.getHiveDecimalFromByteBuffer((ByteBuffer) datum, scale);
            JavaHiveDecimalObjectInspector oi = (JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((DecimalTypeInfo) columnType);
            return oi.set(null, dec);
        case CHAR:
            if (fileSchema == null) {
                throw new AvroSerdeException("File schema is missing for char field. Reader schema is " + columnType);
            }
            int maxLength = 0;
            try {
                maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
            } catch (Exception ex) {
                throw new AvroSerdeException("Failed to obtain maxLength value for char field from file schema: " + fileSchema, ex);
            }
            String str = datum.toString();
            HiveChar hc = new HiveChar(str, maxLength);
            return hc;
        case VARCHAR:
            if (fileSchema == null) {
                throw new AvroSerdeException("File schema is missing for varchar field. Reader schema is " + columnType);
            }
            maxLength = 0;
            try {
                maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
            } catch (Exception ex) {
                throw new AvroSerdeException("Failed to obtain maxLength value for varchar field from file schema: " + fileSchema, ex);
            }
            str = datum.toString();
            HiveVarchar hvc = new HiveVarchar(str, maxLength);
            return hvc;
        case DATE:
            if (recordSchema.getType() != Type.INT) {
                throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
            }
            return new Date(DateWritable.daysToMillis((Integer) datum));
        case TIMESTAMP:
            if (recordSchema.getType() != Type.LONG) {
                throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
            }
            return new Timestamp((Long) datum);
        default:
            return datum;
    }
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) UnresolvedUnionException(org.apache.avro.UnresolvedUnionException) IOException(java.io.IOException) Date(java.sql.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) JavaHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Fixed(org.apache.avro.generic.GenericData.Fixed)

Example 2 with Fixed

use of org.apache.avro.generic.GenericData.Fixed in project hive by apache.

the class AvroDeserializer method deserializeList.

private Object deserializeList(Object datum, Schema fileSchema, Schema recordSchema, ListTypeInfo columnType) throws AvroSerdeException {
    // Need to check the original schema to see if this is actually a Fixed.
    if (recordSchema.getType().equals(Schema.Type.FIXED)) {
        // We're faking out Hive to work through a type system impedence mismatch.
        // Pull out the backing array and convert to a list.
        GenericData.Fixed fixed = (GenericData.Fixed) datum;
        List<Byte> asList = new ArrayList<Byte>(fixed.bytes().length);
        for (int j = 0; j < fixed.bytes().length; j++) {
            asList.add(fixed.bytes()[j]);
        }
        return asList;
    } else if (recordSchema.getType().equals(Schema.Type.BYTES)) {
        // This is going to be slow... hold on.
        ByteBuffer bb = (ByteBuffer) datum;
        List<Byte> asList = new ArrayList<Byte>(bb.capacity());
        byte[] array = bb.array();
        for (int j = 0; j < array.length; j++) {
            asList.add(array[j]);
        }
        return asList;
    } else {
        // An actual list, deser its values
        List listData = (List) datum;
        Schema listSchema = recordSchema.getElementType();
        List<Object> listContents = new ArrayList<Object>(listData.size());
        for (Object obj : listData) {
            listContents.add(worker(obj, fileSchema == null ? null : fileSchema.getElementType(), listSchema, columnType.getListElementTypeInfo()));
        }
        return listContents;
    }
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) ByteBuffer(java.nio.ByteBuffer) Fixed(org.apache.avro.generic.GenericData.Fixed) ArrayList(java.util.ArrayList) List(java.util.List) Fixed(org.apache.avro.generic.GenericData.Fixed)

Aggregations

ByteBuffer (java.nio.ByteBuffer)2 Fixed (org.apache.avro.generic.GenericData.Fixed)2 IOException (java.io.IOException)1 Date (java.sql.Date)1 Timestamp (java.sql.Timestamp)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Schema (org.apache.avro.Schema)1 UnresolvedUnionException (org.apache.avro.UnresolvedUnionException)1 GenericData (org.apache.avro.generic.GenericData)1 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)1 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)1 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)1 JavaHiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector)1 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)1