Search in sources :

Example 26 with Type

use of org.apache.avro.Schema.Type in project beam by apache.

the class BigQueryAvroUtils method convertNullableField.

@Nullable
private static Object convertNullableField(Schema avroSchema, TableFieldSchema fieldSchema, Object v) {
    // NULLABLE fields are represented as an Avro Union of the corresponding type and "null".
    verify(avroSchema.getType() == Type.UNION, "Expected Avro schema type UNION, not %s, for BigQuery NULLABLE field %s", avroSchema.getType(), fieldSchema.getName());
    List<Schema> unionTypes = avroSchema.getTypes();
    verify(unionTypes.size() == 2, "BigQuery NULLABLE field %s should be an Avro UNION of NULL and another type, not %s", fieldSchema.getName(), unionTypes);
    if (v == null) {
        return null;
    }
    Type firstType = unionTypes.get(0).getType();
    if (!firstType.equals(Type.NULL)) {
        return convertRequiredField(firstType, fieldSchema, v);
    }
    return convertRequiredField(unionTypes.get(1).getType(), fieldSchema, v);
}
Also used : Type(org.apache.avro.Schema.Type) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.avro.Schema) TableSchema(com.google.api.services.bigquery.model.TableSchema) Nullable(javax.annotation.Nullable)

Example 27 with Type

use of org.apache.avro.Schema.Type in project beam by apache.

the class BigQueryAvroUtils method convertField.

private static Field convertField(TableFieldSchema bigQueryField) {
    Type avroType = BIG_QUERY_TO_AVRO_TYPES.get(bigQueryField.getType());
    Schema elementSchema;
    if (avroType == Type.RECORD) {
        elementSchema = toGenericAvroSchema(bigQueryField.getName(), bigQueryField.getFields());
    } else {
        elementSchema = Schema.create(avroType);
    }
    Schema fieldSchema;
    if (bigQueryField.getMode() == null || bigQueryField.getMode().equals("NULLABLE")) {
        fieldSchema = Schema.createUnion(Schema.create(Type.NULL), elementSchema);
    } else if (bigQueryField.getMode().equals("REQUIRED")) {
        fieldSchema = elementSchema;
    } else if (bigQueryField.getMode().equals("REPEATED")) {
        fieldSchema = Schema.createArray(elementSchema);
    } else {
        throw new IllegalArgumentException(String.format("Unknown BigQuery Field Mode: %s", bigQueryField.getMode()));
    }
    return new Field(bigQueryField.getName(), fieldSchema, bigQueryField.getDescription(), (Object) null);
}
Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) Schema(org.apache.avro.Schema) TableSchema(com.google.api.services.bigquery.model.TableSchema)

Example 28 with Type

use of org.apache.avro.Schema.Type in project drill by apache.

the class AvroRecordReader method process.

private void process(final Object value, final Schema schema, final String fieldName, MapOrListWriterImpl writer, FieldSelection fieldSelection) {
    if (value == null) {
        return;
    }
    final Schema.Type type = schema.getType();
    switch(type) {
        case RECORD:
            // list field of MapOrListWriter will be non null when we want to store array of maps/records.
            MapOrListWriterImpl _writer = writer;
            for (final Schema.Field field : schema.getFields()) {
                if (field.schema().getType() == Schema.Type.RECORD || (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().get(0).getType() == Schema.Type.NULL && field.schema().getTypes().get(1).getType() == Schema.Type.RECORD)) {
                    _writer = (MapOrListWriterImpl) writer.map(field.name());
                }
                process(((GenericRecord) value).get(field.name()), field.schema(), field.name(), _writer, fieldSelection.getChild(field.name()));
            }
            break;
        case ARRAY:
            assert fieldName != null;
            final GenericArray<?> array = (GenericArray<?>) value;
            Schema elementSchema = array.getSchema().getElementType();
            Type elementType = elementSchema.getType();
            if (elementType == Schema.Type.RECORD || elementType == Schema.Type.MAP) {
                writer = (MapOrListWriterImpl) writer.list(fieldName).listoftmap(fieldName);
            } else {
                writer = (MapOrListWriterImpl) writer.list(fieldName);
            }
            for (final Object o : array) {
                writer.start();
                process(o, elementSchema, fieldName, writer, fieldSelection.getChild(fieldName));
                writer.end();
            }
            break;
        case UNION:
            // currently supporting only nullable union (optional fields) like ["null", "some-type"].
            if (schema.getTypes().get(0).getType() != Schema.Type.NULL) {
                throw new UnsupportedOperationException("Avro union type must be of the format : [\"null\", \"some-type\"]");
            }
            process(value, schema.getTypes().get(1), fieldName, writer, fieldSelection);
            break;
        case MAP:
            @SuppressWarnings("unchecked") final HashMap<Object, Object> map = (HashMap<Object, Object>) value;
            Schema valueSchema = schema.getValueType();
            writer = (MapOrListWriterImpl) writer.map(fieldName);
            writer.start();
            for (Entry<Object, Object> entry : map.entrySet()) {
                process(entry.getValue(), valueSchema, entry.getKey().toString(), writer, fieldSelection.getChild(entry.getKey().toString()));
            }
            writer.end();
            break;
        case FIXED:
            throw new UnsupportedOperationException("Unimplemented type: " + type.toString());
        // Enum symbols are strings
        case ENUM:
        // Treat null type as a primitive
        case NULL:
        default:
            assert fieldName != null;
            if (writer.isMapWriter()) {
                if (fieldSelection.isNeverValid()) {
                    break;
                }
            }
            processPrimitive(value, schema.getType(), fieldName, writer);
            break;
    }
}
Also used : MapOrListWriterImpl(org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) Type(org.apache.avro.Schema.Type) Type(org.apache.avro.Schema.Type) GenericArray(org.apache.avro.generic.GenericArray)

Example 29 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class SolrStore method deserializeFieldValue.

@SuppressWarnings("unchecked")
private Object deserializeFieldValue(Field field, Schema fieldSchema, Object solrValue, T persistent) throws IOException {
    Object fieldValue = null;
    switch(fieldSchema.getType()) {
        case MAP:
        case ARRAY:
        case RECORD:
            @SuppressWarnings("rawtypes") SpecificDatumReader reader = getDatumReader(fieldSchema);
            fieldValue = IOUtils.deserialize((byte[]) solrValue, reader, persistent.get(field.pos()));
            break;
        case ENUM:
            fieldValue = AvroUtils.getEnumValue(fieldSchema, (String) solrValue);
            break;
        case FIXED:
            throw new IOException("???");
        // break;
        case BYTES:
            fieldValue = ByteBuffer.wrap((byte[]) solrValue);
            break;
        case STRING:
            fieldValue = new Utf8(solrValue.toString());
            break;
        case UNION:
            if (fieldSchema.getTypes().size() == 2 && isNullable(fieldSchema)) {
                // schema [type0, type1]
                Type type0 = fieldSchema.getTypes().get(0).getType();
                Type type1 = fieldSchema.getTypes().get(1).getType();
                // ["null","type"] or ["type","null"]
                if (!type0.equals(type1)) {
                    if (type0.equals(Schema.Type.NULL))
                        fieldSchema = fieldSchema.getTypes().get(1);
                    else
                        fieldSchema = fieldSchema.getTypes().get(0);
                } else {
                    fieldSchema = fieldSchema.getTypes().get(0);
                }
                fieldValue = deserializeFieldValue(field, fieldSchema, solrValue, persistent);
            } else {
                @SuppressWarnings("rawtypes") SpecificDatumReader unionReader = getDatumReader(fieldSchema);
                fieldValue = IOUtils.deserialize((byte[]) solrValue, unionReader, persistent.get(field.pos()));
                break;
            }
            break;
        default:
            fieldValue = solrValue;
    }
    return fieldValue;
}
Also used : Type(org.apache.avro.Schema.Type) Utf8(org.apache.avro.util.Utf8) IOException(java.io.IOException) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader)

Example 30 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class HBaseByteInterface method fromBytes.

/**
   * Deserializes an array of bytes matching the given schema to the proper basic 
   * (enum, Utf8,...) or complex type (Persistent/Record).
   * 
   * Does not handle <code>arrays/maps</code> if not inside a <code>record</code> type.
   * 
   * @param schema Avro schema describing the expected data
   * @param val array of bytes with the data serialized
   * @return Enum|Utf8|ByteBuffer|Integer|Long|Float|Double|Boolean|Persistent|Null
   * @throws IOException
   */
@SuppressWarnings({ "rawtypes" })
public static Object fromBytes(Schema schema, byte[] val) throws IOException {
    Type type = schema.getType();
    switch(type) {
        case ENUM:
            return AvroUtils.getEnumValue(schema, val[0]);
        case STRING:
            return new Utf8(Bytes.toString(val));
        case BYTES:
            return ByteBuffer.wrap(val);
        case INT:
            return Bytes.toInt(val);
        case LONG:
            return Bytes.toLong(val);
        case FLOAT:
            return Bytes.toFloat(val);
        case DOUBLE:
            return Bytes.toDouble(val);
        case BOOLEAN:
            return val[0] != 0;
        case UNION:
            // if 'val' is empty we ignore the special case (will match Null in "case RECORD")  
            if (schema.getTypes().size() == 2) {
                // schema [type0, type1]
                Type type0 = schema.getTypes().get(0).getType();
                Type type1 = schema.getTypes().get(1).getType();
                // Check if types are different and there's a "null", like ["null","type"] or ["type","null"]
                if (!type0.equals(type1) && (type0.equals(Schema.Type.NULL) || type1.equals(Schema.Type.NULL))) {
                    if (type0.equals(Schema.Type.NULL))
                        schema = schema.getTypes().get(1);
                    else
                        schema = schema.getTypes().get(0);
                    // Deserialize as if schema was ["type"] 
                    return fromBytes(schema, val);
                }
            }
        case RECORD:
            // For UNION schemas, must use a specific SpecificDatumReader
            // from the readerMap since unions don't have own name
            // (key name in map will be "UNION-type-type-...")
            String schemaId = schema.getType().equals(Schema.Type.UNION) ? String.valueOf(schema.hashCode()) : schema.getFullName();
            SpecificDatumReader<?> reader = readerMap.get(schemaId);
            if (reader == null) {
                // ignore dirty bits
                reader = new SpecificDatumReader(schema);
                SpecificDatumReader localReader = null;
                if ((localReader = readerMap.putIfAbsent(schemaId, reader)) != null) {
                    reader = localReader;
                }
            }
            // initialize a decoder, possibly reusing previous one
            BinaryDecoder decoderFromCache = decoders.get();
            BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(val, null);
            // put in threadlocal cache if the initial get was empty
            if (decoderFromCache == null) {
                decoders.set(decoder);
            }
            return reader.read(null, decoder);
        default:
            throw new RuntimeException("Unknown type: " + type);
    }
}
Also used : Type(org.apache.avro.Schema.Type) Utf8(org.apache.avro.util.Utf8) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) BinaryDecoder(org.apache.avro.io.BinaryDecoder)

Aggregations

Type (org.apache.avro.Schema.Type)40 Schema (org.apache.avro.Schema)28 Field (org.apache.avro.Schema.Field)13 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)6 ByteBuffer (java.nio.ByteBuffer)6 HashMap (java.util.HashMap)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 SQLException (java.sql.SQLException)4 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)4 EventCreationException (com.linkedin.databus2.producers.EventCreationException)3 SourceType (com.linkedin.databus2.relay.config.ReplicationBitSetterStaticConfig.SourceType)3 IOException (java.io.IOException)3 LinkedHashMap (java.util.LinkedHashMap)3 List (java.util.List)3 GenericArray (org.apache.avro.generic.GenericArray)3 Utf8 (org.apache.avro.util.Utf8)3 DocumentFieldType (org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType)3 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)2