Search in sources :

Example 6 with Type

use of org.apache.avro.Schema.Type in project pinot by linkedin.

the class AvroRecordReader method getColumnType.

public static DataType getColumnType(Field field) {
    org.apache.avro.Schema fieldSchema = field.schema();
    fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);
    final Type type = fieldSchema.getType();
    if (type == Type.ARRAY) {
        org.apache.avro.Schema elementSchema = extractSchemaFromUnionIfNeeded(fieldSchema.getElementType());
        if (elementSchema.getType() == Type.RECORD) {
            if (elementSchema.getFields().size() == 1) {
                elementSchema = elementSchema.getFields().get(0).schema();
            } else {
                throw new RuntimeException("More than one schema in Multi-value column!");
            }
            elementSchema = extractSchemaFromUnionIfNeeded(elementSchema);
        }
        return DataType.valueOf(elementSchema.getType());
    } else {
        return DataType.valueOf(type);
    }
}
Also used : Type(org.apache.avro.Schema.Type) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType)

Example 7 with Type

use of org.apache.avro.Schema.Type in project pinot by linkedin.

the class SegmentTestUtils method isSingleValueField.

private static boolean isSingleValueField(Field field) {
    org.apache.avro.Schema fieldSchema = field.schema();
    fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);
    final Type type = fieldSchema.getType();
    if (type == Type.ARRAY) {
        return false;
    }
    return true;
}
Also used : Type(org.apache.avro.Schema.Type) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 8 with Type

use of org.apache.avro.Schema.Type in project beam by apache.

the class BigQueryAvroUtils method convertRepeatedField.

private static List<Object> convertRepeatedField(Schema schema, TableFieldSchema fieldSchema, Object v) {
    Type arrayType = schema.getType();
    verify(arrayType == Type.ARRAY, "BigQuery REPEATED field %s should be Avro ARRAY, not %s", fieldSchema.getName(), arrayType);
    // REPEATED fields are represented as Avro arrays.
    if (v == null) {
        // Handle the case of an empty repeated field.
        return ImmutableList.of();
    }
    @SuppressWarnings("unchecked") List<Object> elements = (List<Object>) v;
    ImmutableList.Builder<Object> values = ImmutableList.builder();
    Type elementType = schema.getElementType().getType();
    for (Object element : elements) {
        values.add(convertRequiredField(elementType, fieldSchema, element));
    }
    return values.build();
}
Also used : Type(org.apache.avro.Schema.Type) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList)

Example 9 with Type

use of org.apache.avro.Schema.Type in project beam by apache.

the class BigQueryAvroUtils method convertRequiredField.

private static Object convertRequiredField(Type avroType, TableFieldSchema fieldSchema, Object v) {
    // REQUIRED fields are represented as the corresponding Avro types. For example, a BigQuery
    // INTEGER type maps to an Avro LONG type.
    checkNotNull(v, "REQUIRED field %s should not be null", fieldSchema.getName());
    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the type field
    // is required, so it may not be null.
    String bqType = fieldSchema.getType();
    Type expectedAvroType = BIG_QUERY_TO_AVRO_TYPES.get(bqType);
    verifyNotNull(expectedAvroType, "Unsupported BigQuery type: %s", bqType);
    verify(avroType == expectedAvroType, "Expected Avro schema type %s, not %s, for BigQuery %s field %s", expectedAvroType, avroType, bqType, fieldSchema.getName());
    switch(fieldSchema.getType()) {
        case "STRING":
        case "DATE":
        case "DATETIME":
        case "TIME":
            // Avro will use a CharSequence to represent String objects, but it may not always use
            // java.lang.String; for example, it may prefer org.apache.avro.util.Utf8.
            verify(v instanceof CharSequence, "Expected CharSequence (String), got %s", v.getClass());
            return v.toString();
        case "INTEGER":
            verify(v instanceof Long, "Expected Long, got %s", v.getClass());
            return ((Long) v).toString();
        case "FLOAT":
            verify(v instanceof Double, "Expected Double, got %s", v.getClass());
            return v;
        case "BOOLEAN":
            verify(v instanceof Boolean, "Expected Boolean, got %s", v.getClass());
            return v;
        case "TIMESTAMP":
            // TIMESTAMP data types are represented as Avro LONG types. They are converted back to
            // Strings with variable-precision (up to six digits) to match the JSON files export
            // by BigQuery.
            verify(v instanceof Long, "Expected Long, got %s", v.getClass());
            Double doubleValue = ((Long) v) / 1000000.0;
            return formatTimestamp(doubleValue.toString());
        case "RECORD":
            verify(v instanceof GenericRecord, "Expected GenericRecord, got %s", v.getClass());
            return convertGenericRecordToTableRow((GenericRecord) v, fieldSchema.getFields());
        case "BYTES":
            verify(v instanceof ByteBuffer, "Expected ByteBuffer, got %s", v.getClass());
            ByteBuffer byteBuffer = (ByteBuffer) v;
            byte[] bytes = new byte[byteBuffer.limit()];
            byteBuffer.get(bytes);
            return BaseEncoding.base64().encode(bytes);
        default:
            throw new UnsupportedOperationException(String.format("Unexpected BigQuery field schema type %s for field named %s", fieldSchema.getType(), fieldSchema.getName()));
    }
}
Also used : Type(org.apache.avro.Schema.Type) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer)

Example 10 with Type

use of org.apache.avro.Schema.Type in project gora by apache.

the class AccumuloStore method firstNullSchemaTypeIndex.

private int firstNullSchemaTypeIndex(Schema toSchema) {
    List<Schema> possibleTypes = toSchema.getTypes();
    int unionIndex = 0;
    for (int i = 0; i < possibleTypes.size(); i++) {
        Type pType = possibleTypes.get(i).getType();
        if (pType == Type.NULL) {
            // FIXME HUGE kludge to pass tests
            unionIndex = i;
            break;
        }
    }
    return unionIndex;
}
Also used : Type(org.apache.avro.Schema.Type) Schema(org.apache.avro.Schema)

Aggregations

Type (org.apache.avro.Schema.Type)41 Schema (org.apache.avro.Schema)28 Field (org.apache.avro.Schema.Field)13 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)6 ByteBuffer (java.nio.ByteBuffer)6 HashMap (java.util.HashMap)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)5 SQLException (java.sql.SQLException)4 PersistentBase (org.apache.gora.persistency.impl.PersistentBase)4 EventCreationException (com.linkedin.databus2.producers.EventCreationException)3 SourceType (com.linkedin.databus2.relay.config.ReplicationBitSetterStaticConfig.SourceType)3 IOException (java.io.IOException)3 LinkedHashMap (java.util.LinkedHashMap)3 List (java.util.List)3 GenericArray (org.apache.avro.generic.GenericArray)3 Utf8 (org.apache.avro.util.Utf8)3 DocumentFieldType (org.apache.gora.mongodb.store.MongoMapping.DocumentFieldType)3 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)2