Search in sources :

Example 56 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class SchemaUtils method checkEqualSchema.

/**
 * Checks a Pig field schema comparing with avro schema, based on pig field's name (for record fields).
 *
 * @param pigFieldSchema A Pig field schema
 * @param avroSchema Avro schema related with pig field schema.
 * @throws IOException
 */
private static void checkEqualSchema(ResourceFieldSchema pigFieldSchema, Schema avroSchema) throws IOException {
    byte pigType = pigFieldSchema.getType();
    String fieldName = pigFieldSchema.getName();
    Type avroType = avroSchema.getType();
    // Switch that checks if avro type matches pig type, or if avro is union and some nested type matches pig type.
    switch(pigType) {
        case // Avro Array
        DataType.BAG:
            LOG.trace("    Bag");
            if (!avroType.equals(Type.ARRAY) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig BAG with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            checkEqualSchema(pigFieldSchema.getSchema().getFields()[0].getSchema().getFields()[0], avroSchema.getElementType());
            break;
        case DataType.BOOLEAN:
            LOG.trace("    Boolean");
            if (!avroType.equals(Type.BOOLEAN) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig BOOLEAN with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case DataType.BYTEARRAY:
            LOG.trace("    Bytearray");
            if (!avroType.equals(Type.BYTES) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig BYTEARRAY with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // String
        DataType.CHARARRAY:
            LOG.trace("    Chararray");
            if (!avroType.equals(Type.STRING) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig CHARARRAY with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case DataType.DOUBLE:
            LOG.trace("    Double");
            if (!avroType.equals(Type.DOUBLE) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig DOUBLE with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case DataType.FLOAT:
            LOG.trace("    Float");
            if (!avroType.equals(Type.FLOAT) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig FLOAT with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // Int or Enum
        DataType.INTEGER:
            LOG.trace("    Integer");
            if (!avroType.equals(Type.INT) && !avroType.equals(Type.ENUM) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig INTEGER with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case DataType.LONG:
            LOG.trace("    Long");
            if (!avroType.equals(Type.LONG) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig LONG with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // Avro Map
        DataType.MAP:
            LOG.trace("    Map");
            if (!avroType.equals(Type.MAP) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig MAP with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // Avro nullable??
        DataType.NULL:
            LOG.trace("    Type Null");
            if (!avroType.equals(Type.NULL) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig NULL with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        case // Avro Record
        DataType.TUPLE:
            LOG.trace("    Tuple");
            if (!avroType.equals(Type.RECORD) && !checkUnionSchema(avroSchema, pigFieldSchema))
                throw new IOException("Can not convert field [" + fieldName + "] from Pig TUPLE(record) with schema " + pigFieldSchema.getSchema() + " to avro " + avroType.name());
            break;
        default:
            throw new IOException("Unexpected Pig schema type " + DataType.genTypeToNameMap().get(pigType) + " for avro schema field " + avroSchema.getName() + ": " + avroType.name());
    }
}
Also used : DataType(org.apache.pig.data.DataType) Type(org.apache.avro.Schema.Type) IOException(java.io.IOException)

Example 57 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class MongoStore method recordToMongo.

private Document recordToMongo(final String docf, final Schema fieldSchema, final Object value) {
    Document record = new Document();
    for (Field member : fieldSchema.getFields()) {
        Object innerValue = ((PersistentBase) value).get(member.pos());
        String innerDoc = mapping.getDocumentField(member.name());
        Type innerType = member.schema().getType();
        DocumentFieldType innerStoreType = mapping.getDocumentFieldType(innerDoc);
        LOG.debug("Transform value to DBObject (RECORD), docField:{}, schemaType:{}, storeType:{}", new Object[] { member.name(), member.schema().getType(), innerStoreType });
        record.put(member.name(), toDocument(docf, member.schema(), innerType, innerStoreType, innerValue));
    }
    return record;
}
Also used : Field(org.apache.avro.Schema.Field) PersistentBase(org.apache.gora.persistency.impl.PersistentBase) DocumentFieldType(org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType) Type(org.apache.avro.Schema.Type) DocumentFieldType(org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType) Document(org.bson.Document)

Example 58 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class MongoStore method unionToMongo.

private Object unionToMongo(final String docf, final Schema fieldSchema, final DocumentFieldType storeType, final Object value) {
    // schema [type0, type1]
    Object result;
    Type type0 = fieldSchema.getTypes().get(0).getType();
    Type type1 = fieldSchema.getTypes().get(1).getType();
    // or ["type","null"]
    if (!type0.equals(type1) && (type0.equals(Type.NULL) || type1.equals(Type.NULL))) {
        Schema innerSchema = fieldSchema.getTypes().get(1);
        LOG.debug("Transform value to DBObject (UNION), schemaType:{}, type1:{}, storeType:{}", new Object[] { innerSchema.getType(), type1, storeType });
        // Deserialize as if schema was ["type"]
        result = toDocument(docf, innerSchema, type1, storeType, value);
    } else {
        throw new IllegalStateException("MongoStore doesn't support 3 types union field yet. Please update your mapping");
    }
    return result;
}
Also used : DocumentFieldType(org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Example 59 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class HBaseByteInterface method fromBytes.

/**
 * Deserializes an array of bytes matching the given schema to the proper basic
 * (enum, Utf8,...) or complex type (Persistent/Record).
 *
 * Does not handle <code>arrays/maps</code> if not inside a <code>record</code> type.
 *
 * @param schema Avro schema describing the expected data
 * @param val array of bytes with the data serialized
 * @return Enum|Utf8|ByteBuffer|Integer|Long|Float|Double|Boolean|Persistent|Null
 * @throws IOException
 */
@SuppressWarnings({ "rawtypes" })
public static Object fromBytes(Schema schema, byte[] val) throws IOException {
    Type type = schema.getType();
    switch(type) {
        case ENUM:
            return AvroUtils.getEnumValue(schema, val[0]);
        case STRING:
            return new Utf8(Bytes.toString(val));
        case BYTES:
            return ByteBuffer.wrap(val);
        case INT:
            return Bytes.toInt(val);
        case LONG:
            return Bytes.toLong(val);
        case FLOAT:
            return Bytes.toFloat(val);
        case DOUBLE:
            return Bytes.toDouble(val);
        case BOOLEAN:
            return val[0] != 0;
        case UNION:
            // if 'val' is empty we ignore the special case (will match Null in "case RECORD")
            if (schema.getTypes().size() == 2) {
                // schema [type0, type1]
                Type type0 = schema.getTypes().get(0).getType();
                Type type1 = schema.getTypes().get(1).getType();
                // Check if types are different and there's a "null", like ["null","type"] or ["type","null"]
                if (!type0.equals(type1) && (type0.equals(Schema.Type.NULL) || type1.equals(Schema.Type.NULL))) {
                    if (type0.equals(Schema.Type.NULL))
                        schema = schema.getTypes().get(1);
                    else
                        schema = schema.getTypes().get(0);
                    // Deserialize as if schema was ["type"]
                    return fromBytes(schema, val);
                }
            }
        case MAP:
        case RECORD:
            // For UNION schemas, must use a specific SpecificDatumReader
            // from the readerMap since unions don't have own name
            // (key name in map will be "UNION-type-type-...")
            // String schemaId = schema.getType().equals(Schema.Type.UNION) ? String.valueOf(schema.hashCode()) : schema.getFullName();
            SpecificDatumReader<?> reader = readerMap.get(schema);
            if (reader == null) {
                // ignore dirty bits
                reader = new SpecificDatumReader(schema);
                SpecificDatumReader localReader = null;
                if ((localReader = readerMap.putIfAbsent(schema, reader)) != null) {
                    reader = localReader;
                }
            }
            // initialize a decoder, possibly reusing previous one
            BinaryDecoder decoderFromCache = decoders.get();
            BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(val, null);
            // put in threadlocal cache if the initial get was empty
            if (decoderFromCache == null) {
                decoders.set(decoder);
            }
            return reader.read(null, decoder);
        default:
            throw new RuntimeException("Unknown type: " + type);
    }
}
Also used : Type(org.apache.avro.Schema.Type) Utf8(org.apache.avro.util.Utf8) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) BinaryDecoder(org.apache.avro.io.BinaryDecoder)

Example 60 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class SolrStore method deserializeFieldValue.

@SuppressWarnings("unchecked")
private Object deserializeFieldValue(Field field, Schema fieldSchema, Object solrValue, T persistent) throws IOException {
    Object fieldValue = null;
    switch(fieldSchema.getType()) {
        case MAP:
        case ARRAY:
        case RECORD:
            @SuppressWarnings("rawtypes") SpecificDatumReader reader = getDatumReader(fieldSchema);
            fieldValue = IOUtils.deserialize((byte[]) solrValue, reader, persistent.get(field.pos()));
            break;
        case ENUM:
            fieldValue = AvroUtils.getEnumValue(fieldSchema, (String) solrValue);
            break;
        case FIXED:
            throw new IOException("???");
        // break;
        case BYTES:
            fieldValue = ByteBuffer.wrap((byte[]) solrValue);
            break;
        case STRING:
            fieldValue = new Utf8(solrValue.toString());
            break;
        case UNION:
            if (fieldSchema.getTypes().size() == 2 && isNullable(fieldSchema)) {
                // schema [type0, type1]
                Type type0 = fieldSchema.getTypes().get(0).getType();
                Type type1 = fieldSchema.getTypes().get(1).getType();
                // ["null","type"] or ["type","null"]
                if (!type0.equals(type1)) {
                    if (type0.equals(Schema.Type.NULL))
                        fieldSchema = fieldSchema.getTypes().get(1);
                    else
                        fieldSchema = fieldSchema.getTypes().get(0);
                } else {
                    fieldSchema = fieldSchema.getTypes().get(0);
                }
                fieldValue = deserializeFieldValue(field, fieldSchema, solrValue, persistent);
            } else {
                @SuppressWarnings("rawtypes") SpecificDatumReader unionReader = getDatumReader(fieldSchema);
                fieldValue = IOUtils.deserialize((byte[]) solrValue, unionReader, persistent.get(field.pos()));
                break;
            }
            break;
        default:
            fieldValue = solrValue;
    }
    return fieldValue;
}
Also used : Type(org.apache.avro.Schema.Type) Utf8(org.apache.avro.util.Utf8) IOException(java.io.IOException) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader)

Aggregations

Type (org.apache.avro.Schema.Type)80 Schema (org.apache.avro.Schema)58 Field (org.apache.avro.Schema.Field)32 Map (java.util.Map)20 List (java.util.List)16 HashMap (java.util.HashMap)15 ArrayList (java.util.ArrayList)13 ByteBuffer (java.nio.ByteBuffer)11 Collectors (java.util.stream.Collectors)11 IOException (java.io.IOException)10 LogicalType (org.apache.avro.LogicalType)8 LinkedHashMap (java.util.LinkedHashMap)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Arrays (java.util.Arrays)5 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)5 Test (org.junit.Test)5 BaseRuntimeChildDefinition (ca.uhn.fhir.context.BaseRuntimeChildDefinition)4 BaseRuntimeElementDefinition (ca.uhn.fhir.context.BaseRuntimeElementDefinition)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)4