Examples with Type - org.apache.avro.Schema.Type

Example 31 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraSubColumn method getValue.

/**
   * Deserialize a String into an typed Object, according to the field schema.
   * @see org.apache.gora.cassandra.query.CassandraColumn#getValue()
   */
public Object getValue() {
    Field field = getField();
    Schema fieldSchema = field.schema();
    Type type = fieldSchema.getType();
    ByteBuffer byteBuffer = hColumn.getValue();
    if (byteBuffer == null) {
        return null;
    }
    Object value = getFieldValue(type, fieldSchema, byteBuffer);
    return value;
}

Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema) ByteBuffer(java.nio.ByteBuffer)

Example 32 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method addOrUpdateField.

/**
   * Add a field to Cassandra according to its type.
   * @param key     the key of the row where the field should be added
   * @param field   the Avro field representing a datum
   * @param schema  the schema belonging to the particular Avro field
   * @param value   the field value
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void addOrUpdateField(K key, Field field, Schema schema, Object value) {
    Type type = schema.getType();
    // checking if the value to be updated is used for saving union schema
    if (!field.name().contains(CassandraStore.UNION_COL_SUFIX)) {
        switch(type) {
            case STRING:
            case BOOLEAN:
            case INT:
            case LONG:
            case BYTES:
            case FLOAT:
            case DOUBLE:
            case FIXED:
                this.cassandraClient.addColumn(key, field.name(), value);
                break;
            case RECORD:
                if (value != null) {
                    if (value instanceof PersistentBase) {
                        PersistentBase persistentBase = (PersistentBase) value;
                        try {
                            byte[] byteValue = AvroSerializerUtil.serializer(persistentBase, schema);
                            this.cassandraClient.addColumn(key, field.name(), byteValue);
                        } catch (IOException e) {
                            LOG.warn(field.name() + " named record could not be serialized.");
                        }
                    } else {
                        LOG.warn("Record with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                    this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
                }
                break;
            case MAP:
                if (value != null) {
                    if (value instanceof Map<?, ?>) {
                        Map<CharSequence, Object> map = (Map<CharSequence, Object>) value;
                        Schema valueSchema = schema.getValueType();
                        Type valueType = valueSchema.getType();
                        if (Type.UNION.equals(valueType)) {
                            Map<CharSequence, Object> valueMap = new HashMap<>();
                            for (CharSequence mapKey : map.keySet()) {
                                Object mapValue = map.get(mapKey);
                                int valueUnionIndex = getUnionSchema(mapValue, valueSchema);
                                valueMap.put((mapKey + UNION_COL_SUFIX), valueUnionIndex);
                                valueMap.put(mapKey, mapValue);
                            }
                            map = valueMap;
                        }
                        String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                        // If map is not super column. We using Avro serializer. 
                        if (!this.cassandraClient.isSuper(familyName)) {
                            try {
                                byte[] byteValue = AvroSerializerUtil.serializer(map, schema);
                                this.cassandraClient.addColumn(key, field.name(), byteValue);
                            } catch (IOException e) {
                                LOG.warn(field.name() + " named map could not be serialized.");
                            }
                        } else {
                            this.cassandraClient.addStatefulHashMap(key, field.name(), map);
                        }
                    } else {
                        LOG.warn("Map with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    // delete map
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    this.cassandraClient.deleteStatefulHashMap(key, field.name());
                }
                break;
            case ARRAY:
                if (value != null) {
                    if (value instanceof DirtyListWrapper<?>) {
                        DirtyListWrapper fieldValue = (DirtyListWrapper<?>) value;
                        GenericArray valueArray = new Array(fieldValue.size(), schema);
                        for (int i = 0; i < fieldValue.size(); i++) {
                            valueArray.add(i, fieldValue.get(i));
                        }
                        this.cassandraClient.addGenericArray(key, field.name(), (GenericArray<?>) valueArray);
                    } else {
                        LOG.warn("Array with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    this.cassandraClient.deleteGenericArray(key, field.name());
                }
                break;
            case UNION:
                // adding union schema index
                String columnName = field.name() + UNION_COL_SUFIX;
                String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                if (value != null) {
                    int schemaPos = getUnionSchema(value, schema);
                    LOG.debug("Union with value: " + value.toString() + " at index: " + schemaPos + " supported for field: " + field.name());
                    this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
                    if (this.cassandraClient.isSuper(familyName)) {
                        this.cassandraClient.addSubColumn(key, columnName, columnName, schemaPos);
                    } else {
                        this.cassandraClient.addColumn(key, columnName, schemaPos);
                    }
                    //this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
                    // adding union value
                    Schema unionSchema = schema.getTypes().get(schemaPos);
                    addOrUpdateField(key, field, unionSchema, value);
                //this.cassandraClient.addColumn(key, field.name(), value);
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    if (this.cassandraClient.isSuper(familyName)) {
                        this.cassandraClient.deleteSubColumn(key, field.name());
                    } else {
                        this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
                    }
                }
                break;
            default:
                LOG.warn("Type: " + type.name() + " not considered for field: " + field.name() + ". Please report this to dev@gora.apache.org");
        }
    }
}

Also used : PersistentBase(org.apache.gora.persistency.impl.PersistentBase) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Schema(org.apache.avro.Schema) IOException(java.io.IOException) GenericArray(org.apache.avro.generic.GenericArray) Array(org.apache.avro.generic.GenericData.Array) Type(org.apache.avro.Schema.Type) DirtyListWrapper(org.apache.gora.persistency.impl.DirtyListWrapper) GenericArray(org.apache.avro.generic.GenericArray) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 33 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method put.

/**
   * When doing the
   * {@link org.apache.gora.cassandra.store.CassandraStore#put(Object, PersistentBase)}
   * operation, the logic is as follows:
   * <ol>
   * <li>Obtain the Avro {@link org.apache.avro.Schema} for the object.</li>
   * <li>Create a new duplicate instance of the object (explained in more detail below) **.</li>
   * <li>Obtain a {@link java.util.List} of the {@link org.apache.avro.Schema}
   * {@link org.apache.avro.Schema.Field}'s.</li>
   * <li>Iterate through the field {@link java.util.List}. This allows us to
   * consequently process each item.</li>
   * <li>Check to see if the {@link org.apache.avro.Schema.Field} is NOT dirty.
   * If this condition is true then we DO NOT process this field.</li>
   * <li>Obtain the element at the specified position in this list so we can
   * directly operate on it.</li>
   * <li>Obtain the {@link org.apache.avro.Schema.Type} of the element obtained
   * above and process it accordingly. N.B. For nested type ARRAY, MAP
   * RECORD or UNION, we shadow the checks in bullet point 5 above to infer that the
   * {@link org.apache.avro.Schema.Field} is either at
   * position 0 OR it is NOT dirty. If one of these conditions is true then we DO NOT
   * process this field. This is carried out in
   * {@link org.apache.gora.cassandra.store.CassandraStore#getFieldValue(Schema, Type, Object)}</li>
   * <li>We then insert the Key and Object into the {@link java.util.LinkedHashMap} buffer
   * before being flushed. This performs a structural modification of the map.</li>
   * </ol>
   * ** We create a duplicate instance of the object to be persisted and insert processed
   * objects into a synchronized {@link java.util.LinkedHashMap}. This allows
   * us to keep all the objects in memory till flushing.
   *
   * @param key   for the Avro Record (object).
   * @param value Record object to be persisted in Cassandra
   * @see org.apache.gora.store.DataStore#put(java.lang.Object,org.apache.gora.persistency.Persistent)
   */
@Override
public void put(K key, T value) {
    Schema schema = value.getSchema();
    @SuppressWarnings("unchecked") T p = (T) SpecificData.get().newRecord(value, schema);
    List<Field> fields = schema.getFields();
    for (int i = 1; i < fields.size(); i++) {
        if (!value.isDirty(i)) {
            continue;
        }
        Field field = fields.get(i);
        Type type = field.schema().getType();
        Object fieldValue = value.get(field.pos());
        Schema fieldSchema = field.schema();
        // check if field has a nested structure (array, map, record or union)
        fieldValue = getFieldValue(fieldSchema, type, fieldValue);
        p.put(field.pos(), fieldValue);
    }
    // this performs a structural modification of the map
    this.buffer.put(key, p);
}

Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Example 34 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method getFieldValue.

/**
   * For every field within an object, we pass in a field schema, Type and value.
   * This enables us to process fields (based on their characteristics) 
   * preparing them for persistence.
   * @param fieldSchema the associated field schema
   * @param type the field type
   * @param fieldValue the field value.
   * @return
   */
private Object getFieldValue(Schema fieldSchema, Type type, Object fieldValue) {
    switch(type) {
        case RECORD:
            PersistentBase persistent = (PersistentBase) fieldValue;
            PersistentBase newRecord = (PersistentBase) SpecificData.get().newRecord(persistent, persistent.getSchema());
            for (Field member : fieldSchema.getFields()) {
                if (member.pos() == 0 || !persistent.isDirty()) {
                    continue;
                }
                Schema memberSchema = member.schema();
                Type memberType = memberSchema.getType();
                Object memberValue = persistent.get(member.pos());
                newRecord.put(member.pos(), getFieldValue(memberSchema, memberType, memberValue));
            }
            fieldValue = newRecord;
            break;
        case MAP:
            Map<?, ?> map = (Map<?, ?>) fieldValue;
            fieldValue = map;
            break;
        case ARRAY:
            fieldValue = (List<?>) fieldValue;
            break;
        case UNION:
            // be stored as soon as we get break out.
            if (fieldValue != null) {
                int schemaPos = getUnionSchema(fieldValue, fieldSchema);
                Schema unionSchema = fieldSchema.getTypes().get(schemaPos);
                Type unionType = unionSchema.getType();
                fieldValue = getFieldValue(unionSchema, unionType, fieldValue);
            }
            //p.put(fieldPos, fieldValue);
            break;
        default:
            break;
    }
    return fieldValue;
}

Also used : Field(org.apache.avro.Schema.Field) PersistentBase(org.apache.gora.persistency.impl.PersistentBase) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 35 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class HBaseByteInterface method fromBytes.

/**
   * Deserializes an array of bytes matching the given schema to the proper basic 
   * (enum, Utf8,...) or complex type (Persistent/Record).
   * 
   * Does not handle <code>arrays/maps</code> if not inside a <code>record</code> type.
   * 
   * @param schema Avro schema describing the expected data
   * @param val array of bytes with the data serialized
   * @return Enum|Utf8|ByteBuffer|Integer|Long|Float|Double|Boolean|Persistent|Null
   * @throws IOException
   */
@SuppressWarnings({ "rawtypes" })
public static Object fromBytes(Schema schema, byte[] val) throws IOException {
    Type type = schema.getType();
    switch(type) {
        case ENUM:
            return AvroUtils.getEnumValue(schema, val[0]);
        case STRING:
            return new Utf8(Bytes.toString(val));
        case BYTES:
            return ByteBuffer.wrap(val);
        case INT:
            return Bytes.toInt(val);
        case LONG:
            return Bytes.toLong(val);
        case FLOAT:
            return Bytes.toFloat(val);
        case DOUBLE:
            return Bytes.toDouble(val);
        case BOOLEAN:
            return val[0] != 0;
        case UNION:
            // if 'val' is empty we ignore the special case (will match Null in "case RECORD")  
            if (schema.getTypes().size() == 2) {
                // schema [type0, type1]
                Type type0 = schema.getTypes().get(0).getType();
                Type type1 = schema.getTypes().get(1).getType();
                // Check if types are different and there's a "null", like ["null","type"] or ["type","null"]
                if (!type0.equals(type1) && (type0.equals(Schema.Type.NULL) || type1.equals(Schema.Type.NULL))) {
                    if (type0.equals(Schema.Type.NULL))
                        schema = schema.getTypes().get(1);
                    else
                        schema = schema.getTypes().get(0);
                    // Deserialize as if schema was ["type"] 
                    return fromBytes(schema, val);
                }
            }
        case RECORD:
            // For UNION schemas, must use a specific SpecificDatumReader
            // from the readerMap since unions don't have own name
            // (key name in map will be "UNION-type-type-...")
            String schemaId = schema.getType().equals(Schema.Type.UNION) ? String.valueOf(schema.hashCode()) : schema.getFullName();
            SpecificDatumReader<?> reader = readerMap.get(schemaId);
            if (reader == null) {
                // ignore dirty bits
                reader = new SpecificDatumReader(schema);
                SpecificDatumReader localReader = null;
                if ((localReader = readerMap.putIfAbsent(schemaId, reader)) != null) {
                    reader = localReader;
                }
            }
            // initialize a decoder, possibly reusing previous one
            BinaryDecoder decoderFromCache = decoders.get();
            BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(val, null);
            // put in threadlocal cache if the initial get was empty
            if (decoderFromCache == null) {
                decoders.set(decoder);
            }
            return reader.read(null, decoder);
        default:
            throw new RuntimeException("Unknown type: " + type);
    }
}

Also used : Type(org.apache.avro.Schema.Type) Utf8(org.apache.avro.util.Utf8) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) BinaryDecoder(org.apache.avro.io.BinaryDecoder)

Aggregations

Type (org.apache.avro.Schema.Type)41 Schema (org.apache.avro.Schema)28 Field (org.apache.avro.Schema.Field)13 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)6 ByteBuffer (java.nio.ByteBuffer)6 HashMap (java.util.HashMap)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 SQLException (java.sql.SQLException)4 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)4 EventCreationException (com.linkedin.databus2.producers.EventCreationException)3 SourceType (com.linkedin.databus2.relay.config.ReplicationBitSetterStaticConfig.SourceType)3 IOException (java.io.IOException)3 LinkedHashMap (java.util.LinkedHashMap)3 List (java.util.List)3 GenericArray (org.apache.avro.generic.GenericArray)3 Utf8 (org.apache.avro.util.Utf8)3 DocumentFieldType (org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType)3 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)2