use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.
the class AvroSerializer method serializeStruct.
private Object serializeStruct(StructTypeInfo typeInfo, StructObjectInspector ssoi, Object o, Schema schema) throws AvroSerdeException {
int size = schema.getFields().size();
List<? extends StructField> allStructFieldRefs = ssoi.getAllStructFieldRefs();
List<Object> structFieldsDataAsList = ssoi.getStructFieldsDataAsList(o);
GenericData.Record record = new GenericData.Record(schema);
ArrayList<TypeInfo> allStructFieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
for (int i = 0; i < size; i++) {
Field field = schema.getFields().get(i);
TypeInfo colTypeInfo = allStructFieldTypeInfos.get(i);
StructField structFieldRef = allStructFieldRefs.get(i);
Object structFieldData = structFieldsDataAsList.get(i);
ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
Object val = serialize(colTypeInfo, fieldOI, structFieldData, field.schema());
record.put(field.name(), val);
}
return record;
}
use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.
the class AvroSerializer method serialize.
// Hive is pretty simple (read: stupid) in writing out values via the serializer.
// We're just going to go through, matching indices. Hive formats normally
// handle mismatches with null. We don't have that option, so instead we'll
// end up throwing an exception for invalid records.
public Writable serialize(Object o, ObjectInspector objectInspector, List<String> columnNames, List<TypeInfo> columnTypes, Schema schema) throws AvroSerdeException {
StructObjectInspector soi = (StructObjectInspector) objectInspector;
GenericData.Record record = new GenericData.Record(schema);
List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
if (outputFieldRefs.size() != columnNames.size()) {
throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size());
}
int size = schema.getFields().size();
if (outputFieldRefs.size() != size) {
throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() + ", Avro expected " + schema.getFields().size());
}
List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
for (int i = 0; i < size; i++) {
Field field = schema.getFields().get(i);
TypeInfo typeInfo = columnTypes.get(i);
StructField structFieldRef = allStructFieldRefs.get(i);
Object structFieldData = structFieldsDataAsList.get(i);
ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
Object val = serialize(typeInfo, fieldOI, structFieldData, field.schema());
record.put(field.name(), val);
}
if (!GenericData.get().validate(schema, record)) {
throw new SerializeToAvroException(schema, record);
}
cache.setRecord(record);
return cache;
}
use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.
the class AvroSerializer method serializeMap.
private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
// Avro only allows maps with string keys
if (!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) {
throw new AvroSerdeException("Avro only supports maps with keys as Strings. Current Map is: " + typeInfo.toString());
}
ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector();
ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector();
TypeInfo mapKeyTypeInfo = typeInfo.getMapKeyTypeInfo();
TypeInfo mapValueTypeInfo = typeInfo.getMapValueTypeInfo();
Map<?, ?> map = fieldOI.getMap(structFieldData);
Schema valueType = schema.getValueType();
Map<Object, Object> deserialized = new HashMap<Object, Object>(fieldOI.getMapSize(structFieldData));
for (Map.Entry<?, ?> entry : map.entrySet()) {
deserialized.put(serialize(mapKeyTypeInfo, mapKeyObjectInspector, entry.getKey(), STRING_SCHEMA), serialize(mapValueTypeInfo, mapValueObjectInspector, entry.getValue(), valueType));
}
return deserialized;
}
use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.
the class AvroDeserializer method deserializePrimitive.
private Object deserializePrimitive(Object datum, Schema fileSchema, Schema recordSchema, PrimitiveTypeInfo columnType) throws AvroSerdeException {
switch(columnType.getPrimitiveCategory()) {
case STRING:
// To workaround AvroUTF8
return datum.toString();
// and convert it to a string. Yay!
case BINARY:
if (recordSchema.getType() == Type.FIXED) {
Fixed fixed = (Fixed) datum;
return fixed.bytes();
} else if (recordSchema.getType() == Type.BYTES) {
return AvroSerdeUtils.getBytesFromByteBuffer((ByteBuffer) datum);
} else {
throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + recordSchema.getType());
}
case DECIMAL:
if (fileSchema == null) {
throw new AvroSerdeException("File schema is missing for decimal field. Reader schema is " + columnType);
}
int scale = 0;
try {
scale = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_SCALE).asInt();
} catch (Exception ex) {
throw new AvroSerdeException("Failed to obtain scale value from file schema: " + fileSchema, ex);
}
HiveDecimal dec = AvroSerdeUtils.getHiveDecimalFromByteBuffer((ByteBuffer) datum, scale);
JavaHiveDecimalObjectInspector oi = (JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((DecimalTypeInfo) columnType);
return oi.set(null, dec);
case CHAR:
if (fileSchema == null) {
throw new AvroSerdeException("File schema is missing for char field. Reader schema is " + columnType);
}
int maxLength = 0;
try {
maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
} catch (Exception ex) {
throw new AvroSerdeException("Failed to obtain maxLength value for char field from file schema: " + fileSchema, ex);
}
String str = datum.toString();
HiveChar hc = new HiveChar(str, maxLength);
return hc;
case VARCHAR:
if (fileSchema == null) {
throw new AvroSerdeException("File schema is missing for varchar field. Reader schema is " + columnType);
}
maxLength = 0;
try {
maxLength = fileSchema.getJsonProp(AvroSerDe.AVRO_PROP_MAX_LENGTH).getValueAsInt();
} catch (Exception ex) {
throw new AvroSerdeException("Failed to obtain maxLength value for varchar field from file schema: " + fileSchema, ex);
}
str = datum.toString();
HiveVarchar hvc = new HiveVarchar(str, maxLength);
return hvc;
case DATE:
if (recordSchema.getType() != Type.INT) {
throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
}
return new Date(DateWritable.daysToMillis((Integer) datum));
case TIMESTAMP:
if (recordSchema.getType() != Type.LONG) {
throw new AvroSerdeException("Unexpected Avro schema for Date TypeInfo: " + recordSchema.getType());
}
return new Timestamp((Long) datum);
default:
return datum;
}
}
use of org.apache.hadoop.hive.serde2.avro.AvroSerdeException in project hive by apache.
the class AvroDeserializer method deserializeMap.
private Object deserializeMap(Object datum, Schema fileSchema, Schema mapSchema, MapTypeInfo columnType) throws AvroSerdeException {
// Avro only allows maps with Strings for keys, so we only have to worry
// about deserializing the values
Map<String, Object> map = new HashMap<String, Object>();
Map<CharSequence, Object> mapDatum = (Map) datum;
Schema valueSchema = mapSchema.getValueType();
TypeInfo valueTypeInfo = columnType.getMapValueTypeInfo();
for (CharSequence key : mapDatum.keySet()) {
Object value = mapDatum.get(key);
map.put(key.toString(), worker(value, fileSchema == null ? null : fileSchema.getValueType(), valueSchema, valueTypeInfo));
}
return map;
}
Aggregations