use of co.cask.cdap.api.data.format.UnexpectedFormatException in project cdap by caskdata.
the class ObjectDeserializer method flattenRecord.
private List<Object> flattenRecord(Object obj, List<String> fieldNames, List<TypeInfo> fieldTypes, Schema schema) throws NoSuchFieldException, IllegalAccessException {
boolean isNullable = schema.isNullable();
if (obj == null) {
if (isNullable) {
return null;
} else {
throw new UnexpectedFormatException("Non-nullable field is null.");
}
}
if (isNullable) {
schema = schema.getNonNullable();
}
Map<String, Schema.Field> fieldMap = getFieldMap(schema);
List<Object> objectFields = Lists.newArrayListWithCapacity(fieldNames.size());
for (int i = 0; i < fieldNames.size(); i++) {
String hiveName = fieldNames.get(i);
TypeInfo fieldType = fieldTypes.get(i);
Schema.Field schemaField = fieldMap.get(hiveName);
// use the name from the schema field in case it is not all lowercase
Object recordField = getRecordField(obj, schemaField.getName());
objectFields.add(deserializeField(recordField, fieldType, schemaField.getSchema()));
}
return objectFields;
}
use of co.cask.cdap.api.data.format.UnexpectedFormatException in project cdap by caskdata.
the class AvroRecordFormat method read.
@Override
public StructuredRecord read(StreamEvent event) {
try {
// Check if the event has different schema then the read schema. If it does update the datumReader
String eventSchemaStr = event.getHeaders().get(SCHEMA);
if (eventSchemaStr != null) {
String eventSchemaHash = event.getHeaders().get(SCHEMA_HASH);
if (!this.eventSchemaHash.equals(eventSchemaHash)) {
org.apache.avro.Schema eventSchema = new org.apache.avro.Schema.Parser().parse(eventSchemaStr);
datumReader.setSchema(eventSchema);
this.eventSchemaHash = eventSchemaHash;
}
} else {
// If no schema is available on the event, assume it's the same as read schema
datumReader.setSchema(avroFormatSchema);
eventSchemaHash = formatSchemaHash;
}
binaryDecoder = decoderFactory.binaryDecoder(byteBufferInput.reset(event.getBody()), binaryDecoder);
return datumReader.read(null, binaryDecoder);
} catch (IOException e) {
throw new UnexpectedFormatException("Unable to decode the stream body as avro.", e);
}
}
use of co.cask.cdap.api.data.format.UnexpectedFormatException in project cdap by caskdata.
the class ObjectDeserializer method deserializeField.
/**
* Translate a field that fits a {@link Schema} field into a type that Hive understands.
* For example, a ByteBuffer is allowed by schema but Hive only understands byte arrays, so all ByteBuffers must
* be changed into byte arrays. Reflection is used to examine java objects if the expected hive type is a struct.
*
* @param field value of the field to deserialize.
* @param typeInfo type of the field as expected by Hive.
* @param schema schema of the field.
* @return translated field.
* @throws NoSuchFieldException if a struct field was expected but not found in the object.
* @throws IllegalAccessException if a struct field was not accessible.
*/
private Object deserializeField(Object field, TypeInfo typeInfo, Schema schema) throws NoSuchFieldException, IllegalAccessException {
boolean isNullable = schema.isNullable();
if (field == null) {
if (isNullable) {
return null;
} else {
throw new UnexpectedFormatException("Non-nullable field was null.");
}
}
if (isNullable) {
schema = schema.getNonNullable();
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
return deserializePrimitive(field, (PrimitiveTypeInfo) typeInfo);
case LIST:
// HIVE!! some versions will turn bytes into array<tinyint> instead of binary... so special case it.
// TODO: remove once CDAP-1556 is done
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
if (isByteArray(listTypeInfo) && !(field instanceof Collection)) {
return deserializeByteArray(field);
}
return deserializeList(field, (ListTypeInfo) typeInfo, schema.getComponentSchema());
case MAP:
return deserializeMap(field, (MapTypeInfo) typeInfo, schema.getMapSchema());
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
ArrayList<String> innerFieldNames = structTypeInfo.getAllStructFieldNames();
ArrayList<TypeInfo> innerFieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
return flattenRecord(field, innerFieldNames, innerFieldTypes, schema);
case UNION:
// TODO: decide what to do here
return field;
}
return null;
}
Aggregations