Search in sources :

Example 16 with GenericArray

use of org.apache.avro.generic.GenericArray in project drill by apache.

the class AvroRecordReader method process.

private void process(final Object value, final Schema schema, final String fieldName, MapOrListWriterImpl writer, FieldSelection fieldSelection) {
    if (value == null) {
        return;
    }
    final Schema.Type type = schema.getType();
    switch(type) {
        case RECORD:
            // list field of MapOrListWriter will be non null when we want to store array of maps/records.
            MapOrListWriterImpl _writer = writer;
            for (final Schema.Field field : schema.getFields()) {
                if (field.schema().getType() == Schema.Type.RECORD || (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().get(0).getType() == Schema.Type.NULL && field.schema().getTypes().get(1).getType() == Schema.Type.RECORD)) {
                    _writer = (MapOrListWriterImpl) writer.map(field.name());
                }
                process(((GenericRecord) value).get(field.name()), field.schema(), field.name(), _writer, fieldSelection.getChild(field.name()));
            }
            break;
        case ARRAY:
            assert fieldName != null;
            final GenericArray<?> array = (GenericArray<?>) value;
            Schema elementSchema = array.getSchema().getElementType();
            Type elementType = elementSchema.getType();
            if (elementType == Schema.Type.RECORD || elementType == Schema.Type.MAP) {
                writer = (MapOrListWriterImpl) writer.list(fieldName).listoftmap(fieldName);
            } else {
                writer = (MapOrListWriterImpl) writer.list(fieldName);
            }
            for (final Object o : array) {
                writer.start();
                process(o, elementSchema, fieldName, writer, fieldSelection.getChild(fieldName));
                writer.end();
            }
            break;
        case UNION:
            // currently supporting only nullable union (optional fields) like ["null", "some-type"].
            if (schema.getTypes().get(0).getType() != Schema.Type.NULL) {
                throw new UnsupportedOperationException("Avro union type must be of the format : [\"null\", \"some-type\"]");
            }
            process(value, schema.getTypes().get(1), fieldName, writer, fieldSelection);
            break;
        case MAP:
            @SuppressWarnings("unchecked") final HashMap<Object, Object> map = (HashMap<Object, Object>) value;
            Schema valueSchema = schema.getValueType();
            writer = (MapOrListWriterImpl) writer.map(fieldName);
            writer.start();
            for (Entry<Object, Object> entry : map.entrySet()) {
                process(entry.getValue(), valueSchema, entry.getKey().toString(), writer, fieldSelection.getChild(entry.getKey().toString()));
            }
            writer.end();
            break;
        case FIXED:
            throw new UnsupportedOperationException("Unimplemented type: " + type.toString());
        // Enum symbols are strings
        case ENUM:
        // Treat null type as a primitive
        case NULL:
        default:
            assert fieldName != null;
            if (writer.isMapWriter()) {
                if (fieldSelection.isNeverValid()) {
                    break;
                }
            }
            processPrimitive(value, schema.getType(), fieldName, writer);
            break;
    }
}
Also used : MapOrListWriterImpl(org.apache.drill.exec.vector.complex.impl.MapOrListWriterImpl) HashMap(java.util.HashMap) Schema(org.apache.avro.Schema) Type(org.apache.avro.Schema.Type) Type(org.apache.avro.Schema.Type) GenericArray(org.apache.avro.generic.GenericArray)

Example 17 with GenericArray

use of org.apache.avro.generic.GenericArray in project gora by apache.

the class CassandraStore method addOrUpdateField.

/**
   * Add a field to Cassandra according to its type.
   * @param key     the key of the row where the field should be added
   * @param field   the Avro field representing a datum
   * @param schema  the schema belonging to the particular Avro field
   * @param value   the field value
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void addOrUpdateField(K key, Field field, Schema schema, Object value) {
    Type type = schema.getType();
    // checking if the value to be updated is used for saving union schema
    if (!field.name().contains(CassandraStore.UNION_COL_SUFIX)) {
        switch(type) {
            case STRING:
            case BOOLEAN:
            case INT:
            case LONG:
            case BYTES:
            case FLOAT:
            case DOUBLE:
            case FIXED:
                this.cassandraClient.addColumn(key, field.name(), value);
                break;
            case RECORD:
                if (value != null) {
                    if (value instanceof PersistentBase) {
                        PersistentBase persistentBase = (PersistentBase) value;
                        try {
                            byte[] byteValue = AvroSerializerUtil.serializer(persistentBase, schema);
                            this.cassandraClient.addColumn(key, field.name(), byteValue);
                        } catch (IOException e) {
                            LOG.warn(field.name() + " named record could not be serialized.");
                        }
                    } else {
                        LOG.warn("Record with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                    this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
                }
                break;
            case MAP:
                if (value != null) {
                    if (value instanceof Map<?, ?>) {
                        Map<CharSequence, Object> map = (Map<CharSequence, Object>) value;
                        Schema valueSchema = schema.getValueType();
                        Type valueType = valueSchema.getType();
                        if (Type.UNION.equals(valueType)) {
                            Map<CharSequence, Object> valueMap = new HashMap<>();
                            for (CharSequence mapKey : map.keySet()) {
                                Object mapValue = map.get(mapKey);
                                int valueUnionIndex = getUnionSchema(mapValue, valueSchema);
                                valueMap.put((mapKey + UNION_COL_SUFIX), valueUnionIndex);
                                valueMap.put(mapKey, mapValue);
                            }
                            map = valueMap;
                        }
                        String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                        // If map is not super column. We using Avro serializer. 
                        if (!this.cassandraClient.isSuper(familyName)) {
                            try {
                                byte[] byteValue = AvroSerializerUtil.serializer(map, schema);
                                this.cassandraClient.addColumn(key, field.name(), byteValue);
                            } catch (IOException e) {
                                LOG.warn(field.name() + " named map could not be serialized.");
                            }
                        } else {
                            this.cassandraClient.addStatefulHashMap(key, field.name(), map);
                        }
                    } else {
                        LOG.warn("Map with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    // delete map
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    this.cassandraClient.deleteStatefulHashMap(key, field.name());
                }
                break;
            case ARRAY:
                if (value != null) {
                    if (value instanceof DirtyListWrapper<?>) {
                        DirtyListWrapper fieldValue = (DirtyListWrapper<?>) value;
                        GenericArray valueArray = new Array(fieldValue.size(), schema);
                        for (int i = 0; i < fieldValue.size(); i++) {
                            valueArray.add(i, fieldValue.get(i));
                        }
                        this.cassandraClient.addGenericArray(key, field.name(), (GenericArray<?>) valueArray);
                    } else {
                        LOG.warn("Array with value: " + value.toString() + " not supported for field: " + field.name());
                    }
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    this.cassandraClient.deleteGenericArray(key, field.name());
                }
                break;
            case UNION:
                // adding union schema index
                String columnName = field.name() + UNION_COL_SUFIX;
                String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
                if (value != null) {
                    int schemaPos = getUnionSchema(value, schema);
                    LOG.debug("Union with value: " + value.toString() + " at index: " + schemaPos + " supported for field: " + field.name());
                    this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
                    if (this.cassandraClient.isSuper(familyName)) {
                        this.cassandraClient.addSubColumn(key, columnName, columnName, schemaPos);
                    } else {
                        this.cassandraClient.addColumn(key, columnName, schemaPos);
                    }
                    //this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
                    // adding union value
                    Schema unionSchema = schema.getTypes().get(schemaPos);
                    addOrUpdateField(key, field, unionSchema, value);
                //this.cassandraClient.addColumn(key, field.name(), value);
                } else {
                    LOG.warn("Setting content of: " + field.name() + " to null.");
                    if (this.cassandraClient.isSuper(familyName)) {
                        this.cassandraClient.deleteSubColumn(key, field.name());
                    } else {
                        this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
                    }
                }
                break;
            default:
                LOG.warn("Type: " + type.name() + " not considered for field: " + field.name() + ". Please report this to dev@gora.apache.org");
        }
    }
}
Also used : PersistentBase(org.apache.gora.persistency.impl.PersistentBase) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Schema(org.apache.avro.Schema) IOException(java.io.IOException) GenericArray(org.apache.avro.generic.GenericArray) Array(org.apache.avro.generic.GenericData.Array) Type(org.apache.avro.Schema.Type) DirtyListWrapper(org.apache.gora.persistency.impl.DirtyListWrapper) GenericArray(org.apache.avro.generic.GenericArray) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 18 with GenericArray

use of org.apache.avro.generic.GenericArray in project drill by apache.

the class AvroTestUtil method generateSimpleArraySchema_NoNullValues.

public static AvroTestRecordWriter generateSimpleArraySchema_NoNullValues() throws Exception {
    final File file = File.createTempFile("avro-array-test", ".avro");
    file.deleteOnExit();
    final Schema schema = SchemaBuilder.record("AvroRecordReaderTest").namespace("org.apache.drill.exec.store.avro").fields().name("a_string").type().stringType().noDefault().name("b_int").type().intType().noDefault().name("c_string_array").type().array().items().stringType().noDefault().name("d_int_array").type().array().items().intType().noDefault().name("e_float_array").type().array().items().floatType().noDefault().endRecord();
    final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file);
    try {
        for (int i = 0; i < RECORD_COUNT; i++) {
            record.startRecord();
            record.put("a_string", "a_" + i);
            record.put("b_int", i);
            {
                GenericArray<String> array = new GenericData.Array<>(ARRAY_SIZE, schema.getField("c_string_array").schema());
                for (int j = 0; j < ARRAY_SIZE; j++) {
                    array.add(j, "c_string_array_" + i + "_" + j);
                }
                record.put("c_string_array", array);
            }
            {
                GenericArray<Integer> array = new GenericData.Array<>(ARRAY_SIZE, schema.getField("d_int_array").schema());
                for (int j = 0; j < ARRAY_SIZE; j++) {
                    array.add(j, i * j);
                }
                record.put("d_int_array", array);
            }
            {
                GenericArray<Float> array = new GenericData.Array<>(ARRAY_SIZE, schema.getField("e_float_array").schema());
                for (int j = 0; j < ARRAY_SIZE; j++) {
                    array.add(j, (float) (i * j));
                }
                record.put("e_float_array", array);
            }
            record.endRecord();
        }
    } finally {
        record.close();
    }
    return record;
}
Also used : Schema(org.apache.avro.Schema) GenericArray(org.apache.avro.generic.GenericArray) File(java.io.File) GenericData(org.apache.avro.generic.GenericData)

Aggregations

GenericArray (org.apache.avro.generic.GenericArray)18 Schema (org.apache.avro.Schema)15 GenericRecord (org.apache.avro.generic.GenericRecord)11 HashMap (java.util.HashMap)5 File (java.io.File)4 Map (java.util.Map)3 Type (org.apache.avro.Schema.Type)3 EventCreationException (com.linkedin.databus2.producers.EventCreationException)2 Array (java.sql.Array)2 ResultSet (java.sql.ResultSet)2 SQLException (java.sql.SQLException)2 Struct (java.sql.Struct)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Field (org.apache.avro.Schema.Field)2 GenericData (org.apache.avro.generic.GenericData)2 Utf8 (org.apache.avro.util.Utf8)2 IThrowableProxy (ch.qos.logback.classic.spi.IThrowableProxy)1 StackTraceElementProxy (ch.qos.logback.classic.spi.StackTraceElementProxy)1 IOException (java.io.IOException)1