Search in sources :

Example 46 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraSubColumn method getValue.

/**
   * Deserialize a String into an typed Object, according to the field schema.
   * @see org.apache.gora.cassandra.query.CassandraColumn#getValue()
   */
public Object getValue() {
    Field field = getField();
    Schema fieldSchema = field.schema();
    Type type = fieldSchema.getType();
    ByteBuffer byteBuffer = hColumn.getValue();
    if (byteBuffer == null) {
        return null;
    }
    Object value = getFieldValue(type, fieldSchema, byteBuffer);
    return value;
}
Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema) ByteBuffer(java.nio.ByteBuffer)

Example 47 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method addOrUpdateField.

/**
   * Add a field to Cassandra according to its type.
   * @param key     the key of the row where the field should be added
   * @param field   the Avro field representing a datum
   * @param schema  the schema belonging to the particular Avro field
   * @param value   the field value
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void addOrUpdateField(K key, Field field, Schema schema, Object value) {
    Type type = schema.getType();
    // checking if the value to be updated is used for saving union schema
    if (!field.name().contains(CassandraStore.UNION_COL_SUFIX)) {
        switch(type) {
            case STRING:
            case BOOLEAN:
            case INT:
            case LONG:
            case BYTES:
            case FLOAT:
            case DOUBLE:
            case FIXED:
                this.cassandraClient.addColumn(key, field.name(), value);
                break;
            case RECORD:
                if (value != null) {
                    if (value instanceof PersistentBase) {
                        PersistentBase persistentBase = (PersistentBase) value;
                        try {
                            byte[] byteValue = AvroSerializerUtil.serializer(persistentBase, schema);
                            this.cassandraClient.addColumn(key, field.name(), byteValue);
                        } catch (IOException e) {
                            LOG.warn(field.name() + " named record could not be serialized.");
                        }
                    } else {
                        LOG.warn("Record with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                    this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
                }
                break;
            case MAP:
                if (value != null) {
                    if (value instanceof Map<?, ?>) {
                        Map<CharSequence, Object> map = (Map<CharSequence, Object>) value;
                        Schema valueSchema = schema.getValueType();
                        Type valueType = valueSchema.getType();
                        if (Type.UNION.equals(valueType)) {
                            Map<CharSequence, Object> valueMap = new HashMap<>();
                            for (CharSequence mapKey : map.keySet()) {
                                Object mapValue = map.get(mapKey);
                                int valueUnionIndex = getUnionSchema(mapValue, valueSchema);
                                valueMap.put((mapKey + UNION_COL_SUFIX), valueUnionIndex);
                                valueMap.put(mapKey, mapValue);
                            }
                            map = valueMap;
                        }
                        String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                        // If map is not super column. We using Avro serializer. 
                        if (!this.cassandraClient.isSuper(familyName)) {
                            try {
                                byte[] byteValue = AvroSerializerUtil.serializer(map, schema);
                                this.cassandraClient.addColumn(key, field.name(), byteValue);
                            } catch (IOException e) {
                                LOG.warn(field.name() + " named map could not be serialized.");
                            }
                        } else {
                            this.cassandraClient.addStatefulHashMap(key, field.name(), map);
                        }
                    } else {
                        LOG.warn("Map with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    // delete map
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    this.cassandraClient.deleteStatefulHashMap(key, field.name());
                }
                break;
            case ARRAY:
                if (value != null) {
                    if (value instanceof DirtyListWrapper<?>) {
                        DirtyListWrapper fieldValue = (DirtyListWrapper<?>) value;
                        GenericArray valueArray = new Array(fieldValue.size(), schema);
                        for (int i = 0; i < fieldValue.size(); i++) {
                            valueArray.add(i, fieldValue.get(i));
                        }
                        this.cassandraClient.addGenericArray(key, field.name(), (GenericArray<?>) valueArray);
                    } else {
                        LOG.warn("Array with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    this.cassandraClient.deleteGenericArray(key, field.name());
                }
                break;
            case UNION:
                // adding union schema index
                String columnName = field.name() + UNION_COL_SUFIX;
                String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                if (value != null) {
                    int schemaPos = getUnionSchema(value, schema);
                    LOG.debug("Union with value: " + value.toString() + " at index: " + schemaPos + " supported for field: " + field.name());
                    this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
                    if (this.cassandraClient.isSuper(familyName)) {
                        this.cassandraClient.addSubColumn(key, columnName, columnName, schemaPos);
                    } else {
                        this.cassandraClient.addColumn(key, columnName, schemaPos);
                    }
                    //this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
                    // adding union value
                    Schema unionSchema = schema.getTypes().get(schemaPos);
                    addOrUpdateField(key, field, unionSchema, value);
                //this.cassandraClient.addColumn(key, field.name(), value);
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    if (this.cassandraClient.isSuper(familyName)) {
                        this.cassandraClient.deleteSubColumn(key, field.name());
                    } else {
                        this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
                    }
                }
                break;
            default:
                LOG.warn("Type: " + type.name() + " not considered for field: " + field.name() + ". Please report this to dev@gora.apache.org");
        }
    }
}
Also used : PersistentBase(org.apache.gora.persistency.impl.PersistentBase) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Schema(org.apache.avro.Schema) IOException(java.io.IOException) GenericArray(org.apache.avro.generic.GenericArray) Array(org.apache.avro.generic.GenericData.Array) Type(org.apache.avro.Schema.Type) DirtyListWrapper(org.apache.gora.persistency.impl.DirtyListWrapper) GenericArray(org.apache.avro.generic.GenericArray) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 48 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method put.

/**
   * When doing the
   * {@link org.apache.gora.cassandra.store.CassandraStore#put(Object, PersistentBase)}
   * operation, the logic is as follows:
   * <ol>
   * <li>Obtain the Avro {@link org.apache.avro.Schema} for the object.</li>
   * <li>Create a new duplicate instance of the object (explained in more detail below) **.</li>
   * <li>Obtain a {@link java.util.List} of the {@link org.apache.avro.Schema}
   * {@link org.apache.avro.Schema.Field}'s.</li>
   * <li>Iterate through the field {@link java.util.List}. This allows us to
   * consequently process each item.</li>
   * <li>Check to see if the {@link org.apache.avro.Schema.Field} is NOT dirty.
   * If this condition is true then we DO NOT process this field.</li>
   * <li>Obtain the element at the specified position in this list so we can
   * directly operate on it.</li>
   * <li>Obtain the {@link org.apache.avro.Schema.Type} of the element obtained
   * above and process it accordingly. N.B. For nested type ARRAY, MAP
   * RECORD or UNION, we shadow the checks in bullet point 5 above to infer that the
   * {@link org.apache.avro.Schema.Field} is either at
   * position 0 OR it is NOT dirty. If one of these conditions is true then we DO NOT
   * process this field. This is carried out in
   * {@link org.apache.gora.cassandra.store.CassandraStore#getFieldValue(Schema, Type, Object)}</li>
   * <li>We then insert the Key and Object into the {@link java.util.LinkedHashMap} buffer
   * before being flushed. This performs a structural modification of the map.</li>
   * </ol>
   * ** We create a duplicate instance of the object to be persisted and insert processed
   * objects into a synchronized {@link java.util.LinkedHashMap}. This allows
   * us to keep all the objects in memory till flushing.
   *
   * @param key   for the Avro Record (object).
   * @param value Record object to be persisted in Cassandra
   * @see org.apache.gora.store.DataStore#put(java.lang.Object,org.apache.gora.persistency.Persistent)
   */
@Override
public void put(K key, T value) {
    Schema schema = value.getSchema();
    @SuppressWarnings("unchecked") T p = (T) SpecificData.get().newRecord(value, schema);
    List<Field> fields = schema.getFields();
    for (int i = 1; i < fields.size(); i++) {
        if (!value.isDirty(i)) {
            continue;
        }
        Field field = fields.get(i);
        Type type = field.schema().getType();
        Object fieldValue = value.get(field.pos());
        Schema fieldSchema = field.schema();
        // check if field has a nested structure (array, map, record or union)
        fieldValue = getFieldValue(fieldSchema, type, fieldValue);
        p.put(field.pos(), fieldValue);
    }
    // this performs a structural modification of the map
    this.buffer.put(key, p);
}
Also used : Field(org.apache.avro.Schema.Field) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Example 49 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class CassandraStore method getFieldValue.

/**
   * For every field within an object, we pass in a field schema, Type and value.
   * This enables us to process fields (based on their characteristics) 
   * preparing them for persistence.
   * @param fieldSchema the associated field schema
   * @param type the field type
   * @param fieldValue the field value.
   * @return
   */
private Object getFieldValue(Schema fieldSchema, Type type, Object fieldValue) {
    switch(type) {
        case RECORD:
            PersistentBase persistent = (PersistentBase) fieldValue;
            PersistentBase newRecord = (PersistentBase) SpecificData.get().newRecord(persistent, persistent.getSchema());
            for (Field member : fieldSchema.getFields()) {
                if (member.pos() == 0 || !persistent.isDirty()) {
                    continue;
                }
                Schema memberSchema = member.schema();
                Type memberType = memberSchema.getType();
                Object memberValue = persistent.get(member.pos());
                newRecord.put(member.pos(), getFieldValue(memberSchema, memberType, memberValue));
            }
            fieldValue = newRecord;
            break;
        case MAP:
            Map<?, ?> map = (Map<?, ?>) fieldValue;
            fieldValue = map;
            break;
        case ARRAY:
            fieldValue = (List<?>) fieldValue;
            break;
        case UNION:
            // be stored as soon as we get break out.
            if (fieldValue != null) {
                int schemaPos = getUnionSchema(fieldValue, fieldSchema);
                Schema unionSchema = fieldSchema.getTypes().get(schemaPos);
                Type unionType = unionSchema.getType();
                fieldValue = getFieldValue(unionSchema, unionType, fieldValue);
            }
            //p.put(fieldPos, fieldValue);
            break;
        default:
            break;
    }
    return fieldValue;
}
Also used : Field(org.apache.avro.Schema.Field) PersistentBase(org.apache.gora.persistency.impl.PersistentBase) Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 50 with Type

use of org.apache.avro.Schema.Type in project nifi by apache.

the class AvroTypeUtil method determineDataType.

public static DataType determineDataType(final Schema avroSchema, Map<String, DataType> knownRecordTypes) {
    if (knownRecordTypes == null) {
        throw new IllegalArgumentException("'knownRecordTypes' cannot be null.");
    }
    final Type avroType = avroSchema.getType();
    final LogicalType logicalType = avroSchema.getLogicalType();
    if (logicalType != null) {
        final String logicalTypeName = logicalType.getName();
        switch(logicalTypeName) {
            case LOGICAL_TYPE_DATE:
                return RecordFieldType.DATE.getDataType();
            case LOGICAL_TYPE_TIME_MILLIS:
            case LOGICAL_TYPE_TIME_MICROS:
                return RecordFieldType.TIME.getDataType();
            case LOGICAL_TYPE_TIMESTAMP_MILLIS:
            case LOGICAL_TYPE_TIMESTAMP_MICROS:
                return RecordFieldType.TIMESTAMP.getDataType();
            case LOGICAL_TYPE_DECIMAL:
                // Alternatively we could convert it to String, but numeric type is generally more preferable by users.
                return RecordFieldType.DOUBLE.getDataType();
        }
    }
    switch(avroType) {
        case ARRAY:
            return RecordFieldType.ARRAY.getArrayDataType(determineDataType(avroSchema.getElementType(), knownRecordTypes));
        case BYTES:
        case FIXED:
            return RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType());
        case BOOLEAN:
            return RecordFieldType.BOOLEAN.getDataType();
        case DOUBLE:
            return RecordFieldType.DOUBLE.getDataType();
        case ENUM:
        case STRING:
            return RecordFieldType.STRING.getDataType();
        case FLOAT:
            return RecordFieldType.FLOAT.getDataType();
        case INT:
            return RecordFieldType.INT.getDataType();
        case LONG:
            return RecordFieldType.LONG.getDataType();
        case RECORD:
            {
                String schemaFullName = avroSchema.getNamespace() + "." + avroSchema.getName();
                if (knownRecordTypes.containsKey(schemaFullName)) {
                    return knownRecordTypes.get(schemaFullName);
                } else {
                    SimpleRecordSchema recordSchema = new SimpleRecordSchema(avroSchema.toString(), AVRO_SCHEMA_FORMAT, SchemaIdentifier.EMPTY);
                    DataType recordSchemaType = RecordFieldType.RECORD.getRecordDataType(recordSchema);
                    knownRecordTypes.put(schemaFullName, recordSchemaType);
                    final List<Field> avroFields = avroSchema.getFields();
                    final List<RecordField> recordFields = new ArrayList<>(avroFields.size());
                    for (final Field field : avroFields) {
                        final String fieldName = field.name();
                        final Schema fieldSchema = field.schema();
                        final DataType fieldType = determineDataType(fieldSchema, knownRecordTypes);
                        final boolean nullable = isNullable(fieldSchema);
                        addFieldToList(recordFields, field, fieldName, fieldSchema, fieldType, nullable);
                    }
                    recordSchema.setFields(recordFields);
                    return recordSchemaType;
                }
            }
        case NULL:
            return RecordFieldType.STRING.getDataType();
        case MAP:
            final Schema valueSchema = avroSchema.getValueType();
            final DataType valueType = determineDataType(valueSchema, knownRecordTypes);
            return RecordFieldType.MAP.getMapDataType(valueType);
        case UNION:
            {
                final List<Schema> nonNullSubSchemas = getNonNullSubSchemas(avroSchema);
                if (nonNullSubSchemas.size() == 1) {
                    return determineDataType(nonNullSubSchemas.get(0), knownRecordTypes);
                }
                final List<DataType> possibleChildTypes = new ArrayList<>(nonNullSubSchemas.size());
                for (final Schema subSchema : nonNullSubSchemas) {
                    final DataType childDataType = determineDataType(subSchema, knownRecordTypes);
                    possibleChildTypes.add(childDataType);
                }
                return RecordFieldType.CHOICE.getChoiceDataType(possibleChildTypes);
            }
    }
    return null;
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Field(org.apache.avro.Schema.Field) RecordField(org.apache.nifi.serialization.record.RecordField) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) RecordFieldType(org.apache.nifi.serialization.record.RecordFieldType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) Type(org.apache.avro.Schema.Type) LogicalType(org.apache.avro.LogicalType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) LogicalType(org.apache.avro.LogicalType) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) List(java.util.List) ArrayList(java.util.ArrayList)

Aggregations

Type (org.apache.avro.Schema.Type)80 Schema (org.apache.avro.Schema)58 Field (org.apache.avro.Schema.Field)32 Map (java.util.Map)20 List (java.util.List)16 HashMap (java.util.HashMap)15 ArrayList (java.util.ArrayList)13 ByteBuffer (java.nio.ByteBuffer)11 Collectors (java.util.stream.Collectors)11 IOException (java.io.IOException)10 LogicalType (org.apache.avro.LogicalType)8 LinkedHashMap (java.util.LinkedHashMap)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Arrays (java.util.Arrays)5 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)5 Test (org.junit.Test)5 BaseRuntimeChildDefinition (ca.uhn.fhir.context.BaseRuntimeChildDefinition)4 BaseRuntimeElementDefinition (ca.uhn.fhir.context.BaseRuntimeElementDefinition)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)4