use of org.apache.avro.generic.GenericArray in project drill by apache.
the class AvroRecordReader method process.
private void process(final Object value, final Schema schema, final String fieldName, MapOrListWriterImpl writer, FieldSelection fieldSelection) {
if (value == null) {
return;
}
final Schema.Type type = schema.getType();
switch(type) {
case RECORD:
// list field of MapOrListWriter will be non null when we want to store array of maps/records.
MapOrListWriterImpl _writer = writer;
for (final Schema.Field field : schema.getFields()) {
if (field.schema().getType() == Schema.Type.RECORD || (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().get(0).getType() == Schema.Type.NULL && field.schema().getTypes().get(1).getType() == Schema.Type.RECORD)) {
_writer = (MapOrListWriterImpl) writer.map(field.name());
}
process(((GenericRecord) value).get(field.name()), field.schema(), field.name(), _writer, fieldSelection.getChild(field.name()));
}
break;
case ARRAY:
assert fieldName != null;
final GenericArray<?> array = (GenericArray<?>) value;
Schema elementSchema = array.getSchema().getElementType();
Type elementType = elementSchema.getType();
if (elementType == Schema.Type.RECORD || elementType == Schema.Type.MAP) {
writer = (MapOrListWriterImpl) writer.list(fieldName).listoftmap(fieldName);
} else {
writer = (MapOrListWriterImpl) writer.list(fieldName);
}
for (final Object o : array) {
writer.start();
process(o, elementSchema, fieldName, writer, fieldSelection.getChild(fieldName));
writer.end();
}
break;
case UNION:
// currently supporting only nullable union (optional fields) like ["null", "some-type"].
if (schema.getTypes().get(0).getType() != Schema.Type.NULL) {
throw new UnsupportedOperationException("Avro union type must be of the format : [\"null\", \"some-type\"]");
}
process(value, schema.getTypes().get(1), fieldName, writer, fieldSelection);
break;
case MAP:
@SuppressWarnings("unchecked") final HashMap<Object, Object> map = (HashMap<Object, Object>) value;
Schema valueSchema = schema.getValueType();
writer = (MapOrListWriterImpl) writer.map(fieldName);
writer.start();
for (Entry<Object, Object> entry : map.entrySet()) {
process(entry.getValue(), valueSchema, entry.getKey().toString(), writer, fieldSelection.getChild(entry.getKey().toString()));
}
writer.end();
break;
case FIXED:
throw new UnsupportedOperationException("Unimplemented type: " + type.toString());
// Enum symbols are strings
case ENUM:
// Treat null type as a primitive
case NULL:
default:
assert fieldName != null;
if (writer.isMapWriter()) {
if (fieldSelection.isNeverValid()) {
break;
}
}
processPrimitive(value, schema.getType(), fieldName, writer);
break;
}
}
use of org.apache.avro.generic.GenericArray in project gora by apache.
the class CassandraStore method addOrUpdateField.
/**
* Add a field to Cassandra according to its type.
* @param key the key of the row where the field should be added
* @param field the Avro field representing a datum
* @param schema the schema belonging to the particular Avro field
* @param value the field value
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private void addOrUpdateField(K key, Field field, Schema schema, Object value) {
Type type = schema.getType();
// checking if the value to be updated is used for saving union schema
if (!field.name().contains(CassandraStore.UNION_COL_SUFIX)) {
switch(type) {
case STRING:
case BOOLEAN:
case INT:
case LONG:
case BYTES:
case FLOAT:
case DOUBLE:
case FIXED:
this.cassandraClient.addColumn(key, field.name(), value);
break;
case RECORD:
if (value != null) {
if (value instanceof PersistentBase) {
PersistentBase persistentBase = (PersistentBase) value;
try {
byte[] byteValue = AvroSerializerUtil.serializer(persistentBase, schema);
this.cassandraClient.addColumn(key, field.name(), byteValue);
} catch (IOException e) {
LOG.warn(field.name() + " named record could not be serialized.");
}
} else {
LOG.warn("Record with value: " + value.toString() + " not supported for field: " + field.name());
}
} else {
LOG.warn("Setting content of: " + field.name() + " to null.");
String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
}
break;
case MAP:
if (value != null) {
if (value instanceof Map<?, ?>) {
Map<CharSequence, Object> map = (Map<CharSequence, Object>) value;
Schema valueSchema = schema.getValueType();
Type valueType = valueSchema.getType();
if (Type.UNION.equals(valueType)) {
Map<CharSequence, Object> valueMap = new HashMap<>();
for (CharSequence mapKey : map.keySet()) {
Object mapValue = map.get(mapKey);
int valueUnionIndex = getUnionSchema(mapValue, valueSchema);
valueMap.put((mapKey + UNION_COL_SUFIX), valueUnionIndex);
valueMap.put(mapKey, mapValue);
}
map = valueMap;
}
String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
// If map is not super column. We using Avro serializer.
if (!this.cassandraClient.isSuper(familyName)) {
try {
byte[] byteValue = AvroSerializerUtil.serializer(map, schema);
this.cassandraClient.addColumn(key, field.name(), byteValue);
} catch (IOException e) {
LOG.warn(field.name() + " named map could not be serialized.");
}
} else {
this.cassandraClient.addStatefulHashMap(key, field.name(), map);
}
} else {
LOG.warn("Map with value: " + value.toString() + " not supported for field: " + field.name());
}
} else {
// delete map
LOG.warn("Setting content of: " + field.name() + " to null.");
this.cassandraClient.deleteStatefulHashMap(key, field.name());
}
break;
case ARRAY:
if (value != null) {
if (value instanceof DirtyListWrapper<?>) {
DirtyListWrapper fieldValue = (DirtyListWrapper<?>) value;
GenericArray valueArray = new Array(fieldValue.size(), schema);
for (int i = 0; i < fieldValue.size(); i++) {
valueArray.add(i, fieldValue.get(i));
}
this.cassandraClient.addGenericArray(key, field.name(), (GenericArray<?>) valueArray);
} else {
LOG.warn("Array with value: " + value.toString() + " not supported for field: " + field.name());
}
} else {
LOG.warn("Setting content of: " + field.name() + " to null.");
this.cassandraClient.deleteGenericArray(key, field.name());
}
break;
case UNION:
// adding union schema index
String columnName = field.name() + UNION_COL_SUFIX;
String familyName = this.cassandraClient.getCassandraMapping().getFamily(field.name());
if (value != null) {
int schemaPos = getUnionSchema(value, schema);
LOG.debug("Union with value: " + value.toString() + " at index: " + schemaPos + " supported for field: " + field.name());
this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
if (this.cassandraClient.isSuper(familyName)) {
this.cassandraClient.addSubColumn(key, columnName, columnName, schemaPos);
} else {
this.cassandraClient.addColumn(key, columnName, schemaPos);
}
//this.cassandraClient.getCassandraMapping().addColumn(familyName, columnName, columnName);
// adding union value
Schema unionSchema = schema.getTypes().get(schemaPos);
addOrUpdateField(key, field, unionSchema, value);
//this.cassandraClient.addColumn(key, field.name(), value);
} else {
LOG.warn("Setting content of: " + field.name() + " to null.");
if (this.cassandraClient.isSuper(familyName)) {
this.cassandraClient.deleteSubColumn(key, field.name());
} else {
this.cassandraClient.deleteColumn(key, familyName, this.cassandraClient.toByteBuffer(field.name()));
}
}
break;
default:
LOG.warn("Type: " + type.name() + " not considered for field: " + field.name() + ". Please report this to dev@gora.apache.org");
}
}
}
use of org.apache.avro.generic.GenericArray in project drill by apache.
the class AvroTestUtil method generateSimpleArraySchema_NoNullValues.
public static AvroTestRecordWriter generateSimpleArraySchema_NoNullValues() throws Exception {
final File file = File.createTempFile("avro-array-test", ".avro");
file.deleteOnExit();
final Schema schema = SchemaBuilder.record("AvroRecordReaderTest").namespace("org.apache.drill.exec.store.avro").fields().name("a_string").type().stringType().noDefault().name("b_int").type().intType().noDefault().name("c_string_array").type().array().items().stringType().noDefault().name("d_int_array").type().array().items().intType().noDefault().name("e_float_array").type().array().items().floatType().noDefault().endRecord();
final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file);
try {
for (int i = 0; i < RECORD_COUNT; i++) {
record.startRecord();
record.put("a_string", "a_" + i);
record.put("b_int", i);
{
GenericArray<String> array = new GenericData.Array<>(ARRAY_SIZE, schema.getField("c_string_array").schema());
for (int j = 0; j < ARRAY_SIZE; j++) {
array.add(j, "c_string_array_" + i + "_" + j);
}
record.put("c_string_array", array);
}
{
GenericArray<Integer> array = new GenericData.Array<>(ARRAY_SIZE, schema.getField("d_int_array").schema());
for (int j = 0; j < ARRAY_SIZE; j++) {
array.add(j, i * j);
}
record.put("d_int_array", array);
}
{
GenericArray<Float> array = new GenericData.Array<>(ARRAY_SIZE, schema.getField("e_float_array").schema());
for (int j = 0; j < ARRAY_SIZE; j++) {
array.add(j, (float) (i * j));
}
record.put("e_float_array", array);
}
record.endRecord();
}
} finally {
record.close();
}
return record;
}
Aggregations