Search in sources :

Example 66 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class SchemaUtils method checkEqualSchema.

/**
 * Checks a Pig field schema comparing with avro schema, based on pig field's name (for record fields).
 *
 * @param pigFieldSchema A Pig field schema
 * @param avroSchema Avro schema related with pig field schema.
 * @throws IOException
 */
private static void checkEqualSchema(ResourceFieldSchema pigFieldSchema, Schema avroSchema) throws IOException {
    byte pigType = pigFieldSchema.getType();
    String fieldName = pigFieldSchema.getName();
    Type avroType = avroSchema.getType();
    // Switch that checks if avro type matches pig type, or if avro is union and some nested type matches pig type.
    switch(pigType) {
        case // Avro Array
        DataType.BAG:
            LOG.trace("    Bag");
            if (!avroType.equals(Type.ARRAY) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig BAG with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            checkEqualSchema(pigFieldSchema.getSchema().getFields()[0].getSchema().getFields()[0], avroSchema.getElementType());
            break;
        case DataType.BOOLEAN:
            LOG.trace("    Boolean");
            if (!avroType.equals(Type.BOOLEAN) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig BOOLEAN with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case DataType.BYTEARRAY:
            LOG.trace("    Bytearray");
            if (!avroType.equals(Type.BYTES) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig BYTEARRAY with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // String
        DataType.CHARARRAY:
            LOG.trace("    Chararray");
            if (!avroType.equals(Type.STRING) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig CHARARRAY with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case DataType.DOUBLE:
            LOG.trace("    Double");
            if (!avroType.equals(Type.DOUBLE) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig DOUBLE with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case DataType.FLOAT:
            LOG.trace("    Float");
            if (!avroType.equals(Type.FLOAT) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig FLOAT with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // Int or Enum
        DataType.INTEGER:
            LOG.trace("    Integer");
            if (!avroType.equals(Type.INT) && !avroType.equals(Type.ENUM) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig INTEGER with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case DataType.LONG:
            LOG.trace("    Long");
            if (!avroType.equals(Type.LONG) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig LONG with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // Avro Map
        DataType.MAP:
            LOG.trace("    Map");
            if (!avroType.equals(Type.MAP) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig MAP with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // Avro nullable??
        DataType.NULL:
            LOG.trace("    Type Null");
            if (!avroType.equals(Type.NULL) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig NULL with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // Avro Record
        DataType.TUPLE:
            LOG.trace("    Tuple");
            if (!avroType.equals(Type.RECORD) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig TUPLE(record) with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        default:
            throw new IOException("Unexpected Pig schema type " + DataType.genTypeToNameMap().get(pigType) + " for avro schema field " + avroSchema.getName() + ": " + avroType.name());
    }
}
Also used : DataType(org.apache.pig.data.DataType) Type(org.apache.avro.Schema.Type) IOException(java.io.IOException)

Example 67 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class SchemaUtils method avro2ResouceFieldSchema.

private static ResourceFieldSchema avro2ResouceFieldSchema(Schema schema) throws IOException {
    Type schemaType = schema.getType();
    switch(schemaType) {
        case NULL:
            return new ResourceFieldSchema().setType(DataType.NULL);
        case BOOLEAN:
            return new ResourceFieldSchema().setType(DataType.BOOLEAN);
        case ENUM:
            return new ResourceFieldSchema().setType(DataType.INTEGER);
        case BYTES:
            return new ResourceFieldSchema().setType(DataType.BYTEARRAY);
        case STRING:
            return new ResourceFieldSchema().setType(DataType.CHARARRAY);
        case FLOAT:
            return new ResourceFieldSchema().setType(DataType.FLOAT);
        case DOUBLE:
            return new ResourceFieldSchema().setType(DataType.DOUBLE);
        case INT:
            return new ResourceFieldSchema().setType(DataType.INTEGER);
        case LONG:
            return new ResourceFieldSchema().setType(DataType.LONG);
        case UNION:
            // Returns the first not-null type
            if (schema.getTypes().size() != 2) {
                LOG.warn("Field UNION {} must be ['null','othertype']. Maybe wrong definition?");
            }
            for (Schema s : schema.getTypes()) {
                if (s.getType() != Type.NULL)
                    return avro2ResouceFieldSchema(s);
            }
            LOG.error("Union with only ['null']?");
            throw new RuntimeException("Union with only ['null']?");
        case RECORD:
            // A record in Gora is a Tuple in Pig
            if (recursiveRecordSchema.incSchema(schema.getName()) > 1) {
                // Recursivity detected (and we are 2 levels bellow desired)
                // So we can put the esquema of bother leafs
                recursiveRecordSchema.decSchema(schema.getName());
                // Return a tuple schema with no fields
                return new ResourceFieldSchema().setType(DataType.TUPLE);
            }
            int numRecordFields = schema.getFields().size();
            Iterator<Field> recordFields = schema.getFields().iterator();
            ResourceFieldSchema returnRecordResourceFieldSchema = new ResourceFieldSchema().setType(DataType.TUPLE);
            ResourceFieldSchema[] recordFieldSchemas = new ResourceFieldSchema[numRecordFields];
            for (int fieldIndex = 0; recordFields.hasNext(); fieldIndex++) {
                Field schemaField = recordFields.next();
                recordFieldSchemas[fieldIndex] = avro2ResouceFieldSchema(schemaField.schema()).setName(schemaField.name());
            }
            returnRecordResourceFieldSchema.setSchema(new ResourceSchema().setFields(recordFieldSchemas));
            return returnRecordResourceFieldSchema;
        case ARRAY:
            // An array in Gora is a Bag in Pig
            // Maybe should be a Map with string(numeric) index to ensure order, but Avro and Pig data model are different :\
            ResourceFieldSchema returnArrayResourceFieldSchema = new ResourceFieldSchema().setType(DataType.BAG);
            Schema arrayElementType = schema.getElementType();
            returnArrayResourceFieldSchema.setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { new ResourceFieldSchema().setType(DataType.TUPLE).setName("t").setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { avro2ResouceFieldSchema(arrayElementType) })) }));
            return returnArrayResourceFieldSchema;
        case MAP:
            // A map in Gora is a Map in Pig, but in pig is only chararray=>something
            ResourceFieldSchema returnMapResourceFieldSchema = new ResourceFieldSchema().setType(DataType.MAP);
            Schema mapValueType = schema.getValueType();
            returnMapResourceFieldSchema.setSchema(new ResourceSchema().setFields(new ResourceFieldSchema[] { avro2ResouceFieldSchema(mapValueType) }));
            return returnMapResourceFieldSchema;
        case FIXED:
            // TODO Implement FIXED data type
            throw new RuntimeException("Fixed type not implemented");
        default:
            throw new RuntimeException("Unexpected schema type " + schemaType);
    }
}
Also used : Field(org.apache.avro.Schema.Field) DataType(org.apache.pig.data.DataType) Type(org.apache.avro.Schema.Type) ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) Schema(org.apache.avro.Schema) ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema)

Example 68 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class MongoStore method unionToMongo.

private Object unionToMongo(final String docf, final Schema fieldSchema, final DocumentFieldType storeType, final Object value) {
    // schema [type0, type1]
    Object result;
    Type type0 = fieldSchema.getTypes().get(0).getType();
    Type type1 = fieldSchema.getTypes().get(1).getType();
    // or ["type","null"]
    if (!type0.equals(type1) && (type0.equals(Type.NULL) || type1.equals(Type.NULL))) {
        Schema innerSchema = fieldSchema.getTypes().get(1);
        LOG.debug("Transform value to DBObject (UNION), schemaType:{}, type1:{}, storeType:{}", new Object[] { innerSchema.getType(), type1, storeType });
        // Deserialize as if schema was ["type"]
        result = toDocument(docf, innerSchema, type1, storeType, value);
    } else {
        throw new IllegalStateException("MongoStore doesn't support 3 types union field yet. Please update your mapping");
    }
    return result;
}
Also used : DocumentFieldType(org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Example 69 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class MongoStore method recordToMongo.

private Document recordToMongo(final String docf, final Schema fieldSchema, final Object value) {
    Document record = new Document();
    for (Field member : fieldSchema.getFields()) {
        Object innerValue = ((PersistentBase) value).get(member.pos());
        String innerDoc = mapping.getDocumentField(member.name());
        Type innerType = member.schema().getType();
        DocumentFieldType innerStoreType = mapping.getDocumentFieldType(innerDoc);
        LOG.debug("Transform value to DBObject (RECORD), docField:{}, schemaType:{}, storeType:{}", new Object[] { member.name(), member.schema().getType(), innerStoreType });
        record.put(member.name(), toDocument(docf, member.schema(), innerType, innerStoreType, innerValue));
    }
    return record;
}
Also used : Field(org.apache.avro.Schema.Field) PersistentBase(org.apache.gora.persistency.impl.PersistentBase) DocumentFieldType(org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType) Type(org.apache.avro.Schema.Type) DocumentFieldType(org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType) Document(org.bson.Document)

Example 70 with Type

use of org.apache.avro.Schema.Type in project incubator-gobblin by apache.

the class AvroToJdbcEntryConverter method produceFlattenedHelper.

private static void produceFlattenedHelper(Field field, Map<String, Type> flattened) throws SchemaConversionException {
    Schema actualSchema = determineType(field.schema());
    if (Type.RECORD.equals(actualSchema.getType())) {
        Map<String, Type> map = flatten(actualSchema);
        for (Entry<String, Type> entry : map.entrySet()) {
            String key = String.format("%s" + AVRO_NESTED_COLUMN_DELIMITER + "%s", field.name(), entry.getKey());
            Type existing = flattened.put(key, entry.getValue());
            Preconditions.checkArgument(existing == null, "Duplicate name detected in Avro schema. Field: " + key);
        }
        return;
    }
    Type existing = flattened.put(field.name(), actualSchema.getType());
    if (existing != null) {
        // No duplicate name allowed when flattening (not considering name space we don't have any assumption between namespace and actual database field name)
        throw new SchemaConversionException("Duplicate name detected in Avro schema. " + field.name());
    }
}
Also used : SchemaConversionException(org.apache.gobblin.converter.SchemaConversionException) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Aggregations

Type (org.apache.avro.Schema.Type)80 Schema (org.apache.avro.Schema)58 Field (org.apache.avro.Schema.Field)32 Map (java.util.Map)20 List (java.util.List)16 HashMap (java.util.HashMap)15 ArrayList (java.util.ArrayList)13 ByteBuffer (java.nio.ByteBuffer)11 Collectors (java.util.stream.Collectors)11 IOException (java.io.IOException)10 LogicalType (org.apache.avro.LogicalType)8 LinkedHashMap (java.util.LinkedHashMap)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Arrays (java.util.Arrays)5 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)5 Test (org.junit.Test)5 BaseRuntimeChildDefinition (ca.uhn.fhir.context.BaseRuntimeChildDefinition)4 BaseRuntimeElementDefinition (ca.uhn.fhir.context.BaseRuntimeElementDefinition)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)4