use of org.apache.avro.Schema.Type in project gora by apache.
the class SchemaUtils method checkEqualSchema.
/**
* Checks a Pig field schema comparing with avro schema, based on pig field's name (for record fields).
*
* @param pigFieldSchema A Pig field schema
* @param avroSchema Avro schema related with pig field schema.
* @throws IOException
*/
private static void checkEqualSchema(ResourceFieldSchema pigFieldSchema, Schema avroSchema) throws IOException {
byte pigType = pigFieldSchema.getType();
String fieldName = pigFieldSchema.getName();
Type avroType = avroSchema.getType();
// Switch that checks if avro type matches pig type, or if avro is union and some nested type matches pig type.
switch(pigType) {
case // Avro Array
DataType.BAG:
LOG.trace(" Bag");
if (!avroType.equals(Type.ARRAY) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig BAG with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
checkEqualSchema(pigFieldSchema.getSchema().getFields()[0].getSchema().getFields()[0], avroSchema.getElementType());
break;
case DataType.BOOLEAN:
LOG.trace(" Boolean");
if (!avroType.equals(Type.BOOLEAN) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig BOOLEAN with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case DataType.BYTEARRAY:
LOG.trace(" Bytearray");
if (!avroType.equals(Type.BYTES) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig BYTEARRAY with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case // String
DataType.CHARARRAY:
LOG.trace(" Chararray");
if (!avroType.equals(Type.STRING) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig CHARARRAY with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case DataType.DOUBLE:
LOG.trace(" Double");
if (!avroType.equals(Type.DOUBLE) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig DOUBLE with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case DataType.FLOAT:
LOG.trace(" Float");
if (!avroType.equals(Type.FLOAT) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig FLOAT with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case // Int or Enum
DataType.INTEGER:
LOG.trace(" Integer");
if (!avroType.equals(Type.INT) && !avroType.equals(Type.ENUM) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig INTEGER with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case DataType.LONG:
LOG.trace(" Long");
if (!avroType.equals(Type.LONG) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig LONG with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case // Avro Map
DataType.MAP:
LOG.trace(" Map");
if (!avroType.equals(Type.MAP) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig MAP with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case // Avro nullable??
DataType.NULL:
LOG.trace(" Type Null");
if (!avroType.equals(Type.NULL) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig NULL with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
case // Avro Record
DataType.TUPLE:
LOG.trace(" Tuple");
if (!avroType.equals(Type.RECORD) && !checkUnionSchema(avroSchema, pigFieldSchema))
throw new IOException("Can not convert field [" + fieldName + "] from Pig TUPLE(record) with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
break;
default:
throw new IOException("Unexpected Pig schema type " + DataType.genTypeToNameMap().get(pigType) + " for avro schema field " + avroSchema.getName() + ": " + avroType.name());
}
}
use of org.apache.avro.Schema.Type in project gora by apache.
the class SchemaUtils method avro2ResouceFieldSchema.
private static ResourceFieldSchema avro2ResouceFieldSchema(Schema schema) throws IOException {
Type schemaType = schema.getType();
switch(schemaType) {
case NULL:
return new ResourceFieldSchema().setType(DataType.NULL);
case BOOLEAN:
return new ResourceFieldSchema().setType(DataType.BOOLEAN);
case ENUM:
return new ResourceFieldSchema().setType(DataType.INTEGER);
case BYTES:
return new ResourceFieldSchema().setType(DataType.BYTEARRAY);
case STRING:
return new ResourceFieldSchema().setType(DataType.CHARARRAY);
case FLOAT:
return new ResourceFieldSchema().setType(DataType.FLOAT);
case DOUBLE:
return new ResourceFieldSchema().setType(DataType.DOUBLE);
case INT:
return new ResourceFieldSchema().setType(DataType.INTEGER);
case LONG:
return new ResourceFieldSchema().setType(DataType.LONG);
case UNION:
// Returns the first not-null type
if (schema.getTypes().size() != 2) {
LOG.warn("Field UNION {} must be ['null','othertype']. Maybe wrong definition?");
}
for (Schema s : schema.getTypes()) {
if (s.getType() != Type.NULL)
return avro2ResouceFieldSchema(s);
}
LOG.error("Union with only ['null']?");
throw new RuntimeException("Union with only ['null']?");
case RECORD:
// A record in Gora is a Tuple in Pig
if (recursiveRecordSchema.incSchema(schema.getName()) > 1) {
// Recursivity detected (and we are 2 levels bellow desired)
// So we can put the esquema of bother leafs
recursiveRecordSchema.decSchema(schema.getName());
// Return a tuple schema with no fields
return new ResourceFieldSchema().setType(DataType.TUPLE);
}
int numRecordFields = schema.getFields().size();
Iterator<Field> recordFields = schema.getFields().iterator();
ResourceFieldSchema returnRecordResourceFieldSchema = new ResourceFieldSchema().setType(DataType.TUPLE);
ResourceFieldSchema[] recordFieldSchemas = new ResourceFieldSchema[numRecordFields];
for (int fieldIndex = 0; recordFields.hasNext(); fieldIndex++) {
Field schemaField = recordFields.next();
recordFieldSchemas[fieldIndex] = avro2ResouceFieldSchema(schemaField.schema()).setName(schemaField.name());
}
returnRecordResourceFieldSchema.setSchema(new ResourceSchema().setFields(recordFieldSchemas));
return returnRecordResourceFieldSchema;
case ARRAY:
// An array in Gora is a Bag in Pig
// Maybe should be a Map with string(numeric) index to ensure order, but Avro and Pig data model are different :\
ResourceFieldSchema returnArrayResourceFieldSchema = new ResourceFieldSchema().setType(DataType.BAG);
Schema arrayElementType = schema.getElementType();
returnArrayResourceFieldSchema.setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { new ResourceFieldSchema().setType(DataType.TUPLE).setName("t").setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { avro2ResouceFieldSchema(arrayElementType) })) }));
return returnArrayResourceFieldSchema;
case MAP:
// A map in Gora is a Map in Pig, but in pig is only chararray=>something
ResourceFieldSchema returnMapResourceFieldSchema = new ResourceFieldSchema().setType(DataType.MAP);
Schema mapValueType = schema.getValueType();
returnMapResourceFieldSchema.setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { avro2ResouceFieldSchema(mapValueType) }));
return returnMapResourceFieldSchema;
case FIXED:
// TODO Implement FIXED data type
throw new RuntimeException("Fixed type not implemented");
default:
throw new RuntimeException("Unexpected schema type " + schemaType);
}
}
use of org.apache.avro.Schema.Type in project gora by apache.
the class MongoStore method unionToMongo.
private Object unionToMongo(final String docf, final Schema fieldSchema, final DocumentFieldType storeType, final Object value) {
// schema [type0, type1]
Object result;
Type type0 = fieldSchema.getTypes().get(0).getType();
Type type1 = fieldSchema.getTypes().get(1).getType();
// or ["type","null"]
if (!type0.equals(type1) && (type0.equals(Type.NULL) || type1.equals(Type.NULL))) {
Schema innerSchema = fieldSchema.getTypes().get(1);
LOG.debug("Transform value to DBObject (UNION), schemaType:{}, type1:{}, storeType:{}", new Object[] { innerSchema.getType(), type1, storeType });
// Deserialize as if schema was ["type"]
result = toDocument(docf, innerSchema, type1, storeType, value);
} else {
throw new IllegalStateException("MongoStore doesn't support 3 types union field yet. Please update your mapping");
}
return result;
}
use of org.apache.avro.Schema.Type in project gora by apache.
the class MongoStore method recordToMongo.
private Document recordToMongo(final String docf, final Schema fieldSchema, final Object value) {
Document record = new Document();
for (Field member : fieldSchema.getFields()) {
Object innerValue = ((PersistentBase) value).get(member.pos());
String innerDoc = mapping.getDocumentField(member.name());
Type innerType = member.schema().getType();
DocumentFieldType innerStoreType = mapping.getDocumentFieldType(innerDoc);
LOG.debug("Transform value to DBObject (RECORD), docField:{}, schemaType:{}, storeType:{}", new Object[] { member.name(), member.schema().getType(), innerStoreType });
record.put(member.name(), toDocument(docf, member.schema(), innerType, innerStoreType, innerValue));
}
return record;
}
use of org.apache.avro.Schema.Type in project incubator-gobblin by apache.
the class AvroToJdbcEntryConverter method produceFlattenedHelper.
private static void produceFlattenedHelper(Field field, Map<String, Type> flattened) throws SchemaConversionException {
Schema actualSchema = determineType(field.schema());
if (Type.RECORD.equals(actualSchema.getType())) {
Map<String, Type> map = flatten(actualSchema);
for (Entry<String, Type> entry : map.entrySet()) {
String key = String.format("%s" + AVRO_NESTED_COLUMN_DELIMITER + "%s", field.name(), entry.getKey());
Type existing = flattened.put(key, entry.getValue());
Preconditions.checkArgument(existing == null, "Duplicate name detected in Avro schema. Field: " + key);
}
return;
}
Type existing = flattened.put(field.name(), actualSchema.getType());
if (existing != null) {
// No duplicate name allowed when flattening (not considering name space we don't have any assumption between namespace and actual database field name)
throw new SchemaConversionException("Duplicate name detected in Avro schema. " + field.name());
}
}
Aggregations