Search in sources :

Example 36 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraResult method updatePersistent.

/**
   * Load key/value pair from Cassandra row to Avro record.
   * @throws IOException
   */
private void updatePersistent() throws IOException {
    CassandraRow<K> cassandraRow = this.cassandraResultSet.get(this.rowNumber);
    // load key
    this.key = cassandraRow.getKey();
    // load value
    Schema schema = this.persistent.getSchema();
    List<Field> fields = schema.getFields();
    for (CassandraColumn cassandraColumn : cassandraRow) {
        // get field name
        String family = cassandraColumn.getFamily();
        String fieldName = this.reverseMap.get(family + ":" + StringSerializer.get().fromByteBuffer(cassandraColumn.getName().duplicate()));
        if (fieldName != null) {
            // get field
            if (!fieldName.contains(CassandraStore.UNION_COL_SUFIX)) {
                int pos = this.persistent.getSchema().getField(fieldName).pos();
                Field field = fields.get(pos);
                Type fieldType = field.schema().getType();
                if (fieldType.equals(Type.UNION)) {
                    //getting UNION stored type
                    CassandraColumn cc = getUnionTypeColumn(fieldName + CassandraStore.UNION_COL_SUFIX, cassandraRow.toArray());
                    //creating temporary UNION Field
                    Field unionField = new Field(fieldName + CassandraStore.UNION_COL_SUFIX, Schema.create(Type.INT), null, null);
                    // get value of UNION stored type
                    cc.setField(unionField);
                    Object val = cc.getValue();
                    cassandraColumn.setUnionType(Integer.parseInt(val.toString()));
                }
                // get value
                cassandraColumn.setField(field);
                Object value = cassandraColumn.getValue();
                this.persistent.put(pos, value);
                // this field does not need to be written back to the store
                this.persistent.clearDirty(pos);
            }
        } else
            LOG.debug("FieldName was null while iterating CassandraRow and using Avro Union type");
    }
}
Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Example 37 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraSubColumn method getValue.

/**
   * Deserialize a String into an typed Object, according to the field schema.
   * @see org.apache.gora.cassandra.query.CassandraColumn#getValue()
   */
public Object getValue() {
    Field field = getField();
    Schema fieldSchema = field.schema();
    Type type = fieldSchema.getType();
    ByteBuffer byteBuffer = hColumn.getValue();
    if (byteBuffer == null) {
        return null;
    }
    Object value = getFieldValue(type, fieldSchema, byteBuffer);
    return value;
}
Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema) ByteBuffer(java.nio.ByteBuffer)

Example 38 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method addOrUpdateField.

/**
   * Add a field to Cassandra according to its type.
   * @param key     the key of the row where the field should be added
   * @param field   the Avro field representing a datum
   * @param schema  the schema belonging to the particular Avro field
   * @param value   the field value
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void addOrUpdateField(K key, Field field, Schema schema, Object value) {
    Type type = schema.getType();
    // checking if the value to be updated is used for saving union schema
    if (!field.name().contains(CassandraStore.UNION_COL_SUFIX)) {
        switch(type) {
            case STRING:
            case BOOLEAN:
            case INT:
            case LONG:
            case BYTES:
            case FLOAT:
            case DOUBLE:
            case FIXED:
                this.cassandraClient.addColumn(key, field.name(), value);
                break;
            case RECORD:
                if (value != null) {
                    if (value instanceof PersistentBase) {
                        PersistentBase persistentBase = (PersistentBase) value;
                        try {
                            byte[] byteValue = AvroSerializerUtil.serializer(persistentBase, schema);
                            this.cassandraClient.addColumn(key, field.name(), byteValue);
                        } catch (IOException e) {
                            LOG.warn(field.name() + " named record could not be serialized.");
                        }
                    } else {
                        LOG.warn("Record with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                    this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
                }
                break;
            case MAP:
                if (value != null) {
                    if (value instanceof Map<?, ?>) {
                        Map<CharSequence, Object> map = (Map<CharSequence, Object>) value;
                        Schema valueSchema = schema.getValueType();
                        Type valueType = valueSchema.getType();
                        if (Type.UNION.equals(valueType)) {
                            Map<CharSequence, Object> valueMap = new HashMap<>();
                            for (CharSequence mapKey : map.keySet()) {
                                Object mapValue = map.get(mapKey);
                                int valueUnionIndex = getUnionSchema(mapValue, valueSchema);
                                valueMap.put((mapKey + UNION_COL_SUFIX), valueUnionIndex);
                                valueMap.put(mapKey, mapValue);
                            }
                            map = valueMap;
                        }
                        String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                        // If map is not super column. We using Avro serializer. 
                        if (!this.cassandraClient.isSuper(familyName)) {
                            try {
                                byte[] byteValue = AvroSerializerUtil.serializer(map, schema);
                                this.cassandraClient.addColumn(key, field.name(), byteValue);
                            } catch (IOException e) {
                                LOG.warn(field.name() + " named map could not be serialized.");
                            }
                        } else {
                            this.cassandraClient.addStatefulHashMap(key, field.name(), map);
                        }
                    } else {
                        LOG.warn("Map with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    // delete map
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    this.cassandraClient.deleteStatefulHashMap(key, field.name());
                }
                break;
            case ARRAY:
                if (value != null) {
                    if (value instanceof DirtyListWrapper<?>) {
                        DirtyListWrapper fieldValue = (DirtyListWrapper<?>) value;
                        GenericArray valueArray = new Array(fieldValue.size(), schema);
                        for (int i = 0; i < fieldValue.size(); i++) {
                            valueArray.add(i, fieldValue.get(i));
                        }
                        this.cassandraClient.addGenericArray(key, field.name(), (GenericArray<?>) valueArray);
                    } else {
                        LOG.warn("Array with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    this.cassandraClient.deleteGenericArray(key, field.name());
                }
                break;
            case UNION:
                // adding union schema index
                String columnName = field.name() + UNION_COL_SUFIX;
                String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                if (value != null) {
                    int schemaPos = getUnionSchema(value, schema);
                    LOG.debug("Union with value: " + value.toString() + " at index: " + schemaPos + " supported for field: " + field.name());
                    this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
                    if (this.cassandraClient.isSuper(familyName)) {
                        this.cassandraClient.addSubColumn(key, columnName, columnName, schemaPos);
                    } else {
                        this.cassandraClient.addColumn(key, columnName, schemaPos);
                    }
                    //this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
                    // adding union value
                    Schema unionSchema = schema.getTypes().get(schemaPos);
                    addOrUpdateField(key, field, unionSchema, value);
                //this.cassandraClient.addColumn(key, field.name(), value);
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    if (this.cassandraClient.isSuper(familyName)) {
                        this.cassandraClient.deleteSubColumn(key, field.name());
                    } else {
                        this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
                    }
                }
                break;
            default:
                LOG.warn("Type: " + type.name() + " not considered for field: " + field.name() + ". Please report this to dev@gora.apache.org");
        }
    }
}
Also used : PersistentBase(org.apache.gora.persistency.impl.PersistentBase) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Schema(org.apache.avro.Schema) IOException(java.io.IOException) GenericArray(org.apache.avro.generic.GenericArray) Array(org.apache.avro.generic.GenericData.Array) Type(org.apache.avro.Schema.Type) DirtyListWrapper(org.apache.gora.persistency.impl.DirtyListWrapper) GenericArray(org.apache.avro.generic.GenericArray) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 39 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method put.

/**
   * When doing the
   * {@link org.apache.gora.cassandra.store.CassandraStore#put(Object, PersistentBase)}
   * operation, the logic is as follows:
   * <ol>
   * <li>Obtain the Avro {@link org.apache.avro.Schema} for the object.</li>
   * <li>Create a new duplicate instance of the object (explained in more detail below) **.</li>
   * <li>Obtain a {@link java.util.List} of the {@link org.apache.avro.Schema}
   * {@link org.apache.avro.Schema.Field}'s.</li>
   * <li>Iterate through the field {@link java.util.List}. This allows us to
   * consequently process each item.</li>
   * <li>Check to see if the {@link org.apache.avro.Schema.Field} is NOT dirty.
   * If this condition is true then we DO NOT process this field.</li>
   * <li>Obtain the element at the specified position in this list so we can
   * directly operate on it.</li>
   * <li>Obtain the {@link org.apache.avro.Schema.Type} of the element obtained
   * above and process it accordingly. N.B. For nested type ARRAY, MAP
   * RECORD or UNION, we shadow the checks in bullet point 5 above to infer that the
   * {@link org.apache.avro.Schema.Field} is either at
   * position 0 OR it is NOT dirty. If one of these conditions is true then we DO NOT
   * process this field. This is carried out in
   * {@link org.apache.gora.cassandra.store.CassandraStore#getFieldValue(Schema, Type, Object)}</li>
   * <li>We then insert the Key and Object into the {@link java.util.LinkedHashMap} buffer
   * before being flushed. This performs a structural modification of the map.</li>
   * </ol>
   * ** We create a duplicate instance of the object to be persisted and insert processed
   * objects into a synchronized {@link java.util.LinkedHashMap}. This allows
   * us to keep all the objects in memory till flushing.
   *
   * @param key   for the Avro Record (object).
   * @param value Record object to be persisted in Cassandra
   * @see org.apache.gora.store.DataStore#put(java.lang.Object,org.apache.gora.persistency.Persistent)
   */
@Override
public void put(K key, T value) {
    Schema schema = value.getSchema();
    @SuppressWarnings("unchecked") T p = (T) SpecificData.get().newRecord(value, schema);
    List<Field> fields = schema.getFields();
    for (int i = 1; i < fields.size(); i++) {
        if (!value.isDirty(i)) {
            continue;
        }
        Field field = fields.get(i);
        Type type = field.schema().getType();
        Object fieldValue = value.get(field.pos());
        Schema fieldSchema = field.schema();
        // check if field has a nested structure (array, map, record or union)
        fieldValue = getFieldValue(fieldSchema, type, fieldValue);
        p.put(field.pos(), fieldValue);
    }
    // this performs a structural modification of the map
    this.buffer.put(key, p);
}
Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Example 40 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method getFieldValue.

/**
   * For every field within an object, we pass in a field schema, Type and value.
   * This enables us to process fields (based on their characteristics) 
   * preparing them for persistence.
   * @param fieldSchema the associated field schema
   * @param type the field type
   * @param fieldValue the field value.
   * @return
   */
private Object getFieldValue(Schema fieldSchema, Type type, Object fieldValue) {
    switch(type) {
        case RECORD:
            PersistentBase persistent = (PersistentBase) fieldValue;
            PersistentBase newRecord = (PersistentBase) SpecificData.get().newRecord(persistent, persistent.getSchema());
            for (Field member : fieldSchema.getFields()) {
                if (member.pos() == 0 || !persistent.isDirty()) {
                    continue;
                }
                Schema memberSchema = member.schema();
                Type memberType = memberSchema.getType();
                Object memberValue = persistent.get(member.pos());
                newRecord.put(member.pos(), getFieldValue(memberSchema, memberType, memberValue));
            }
            fieldValue = newRecord;
            break;
        case MAP:
            Map<?, ?> map = (Map<?, ?>) fieldValue;
            fieldValue = map;
            break;
        case ARRAY:
            fieldValue = (List<?>) fieldValue;
            break;
        case UNION:
            // be stored as soon as we get break out.
            if (fieldValue != null) {
                int schemaPos = getUnionSchema(fieldValue, fieldSchema);
                Schema unionSchema = fieldSchema.getTypes().get(schemaPos);
                Type unionType = unionSchema.getType();
                fieldValue = getFieldValue(unionSchema, unionType, fieldValue);
            }
            //p.put(fieldPos, fieldValue);
            break;
        default:
            break;
    }
    return fieldValue;
}
Also used : Field(org.apache.avro.Schema.Field) PersistentBase(org.apache.gora.persistency.impl.PersistentBase) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Aggregations

Type (org.apache.avro.Schema.Type)40 Schema (org.apache.avro.Schema)28 Field (org.apache.avro.Schema.Field)13 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)6 ByteBuffer (java.nio.ByteBuffer)6 HashMap (java.util.HashMap)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 SQLException (java.sql.SQLException)4 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)4 EventCreationException (com.linkedin.databus2.producers.EventCreationException)3 SourceType (com.linkedin.databus2.relay.config.ReplicationBitSetterStaticConfig.SourceType)3 IOException (java.io.IOException)3 LinkedHashMap (java.util.LinkedHashMap)3 List (java.util.List)3 GenericArray (org.apache.avro.generic.GenericArray)3 Utf8 (org.apache.avro.util.Utf8)3 DocumentFieldType (org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType)3 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)2