Search in sources :

Example 11 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class RowCoderTest method testConsistentWithEqualsArrayOfArrayOfBytes.

@Test
public void testConsistentWithEqualsArrayOfArrayOfBytes() throws Exception {
    FieldType fieldType = FieldType.array(FieldType.array(FieldType.BYTES));
    Schema schema = Schema.of(Schema.Field.of("f1", fieldType));
    RowCoder coder = RowCoder.of(schema);
    List<byte[]> innerList1 = Collections.singletonList(new byte[] { 1, 2, 3, 4 });
    List<List<byte[]>> list1 = Collections.singletonList(innerList1);
    Row row1 = Row.withSchema(schema).addValue(list1).build();
    List<byte[]> innerList2 = Collections.singletonList(new byte[] { 1, 2, 3, 4 });
    List<List<byte[]>> list2 = Collections.singletonList(innerList2);
    Row row2 = Row.withSchema(schema).addValue(list2).build();
    Assume.assumeTrue(coder.consistentWithEquals());
    CoderProperties.coderConsistentWithEquals(coder, row1, row2);
}
Also used : Schema(org.apache.beam.sdk.schemas.Schema) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Row(org.apache.beam.sdk.values.Row) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test)

Example 12 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class RowCoderTest method testArrayOfArray.

@Test
public void testArrayOfArray() throws Exception {
    FieldType arrayType = FieldType.array(FieldType.array(FieldType.INT32));
    Schema schema = Schema.builder().addField("f_array", arrayType).build();
    Row row = Row.withSchema(schema).addArray(Lists.newArrayList(1, 2, 3, 4), Lists.newArrayList(5, 6, 7, 8), Lists.newArrayList(9, 10, 11, 12)).build();
    CoderProperties.coderDecodeEncodeEqual(RowCoder.of(schema), row);
}
Also used : Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test)

Example 13 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class AvroUtils method convertArrayStrict.

private static Object convertArrayStrict(List<Object> values, org.apache.avro.Schema elemAvroSchema, Schema.FieldType fieldType) {
    checkTypeName(fieldType.getTypeName(), Schema.TypeName.ARRAY, "array");
    List<Object> ret = new ArrayList<>(values.size());
    Schema.FieldType elemFieldType = fieldType.getCollectionElementType();
    for (Object value : values) {
        ret.add(convertAvroFieldStrict(value, elemAvroSchema, elemFieldType));
    }
    return ret;
}
Also used : AvroRecordSchema(org.apache.beam.sdk.schemas.AvroRecordSchema) Schema(org.apache.beam.sdk.schemas.Schema) ArrayList(java.util.ArrayList) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 14 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class AvroUtils method toFieldType.

/**
 * Converts AVRO schema to Beam field.
 */
private static Schema.FieldType toFieldType(TypeWithNullability type) {
    Schema.FieldType fieldType = null;
    org.apache.avro.Schema avroSchema = type.type;
    LogicalType logicalType = LogicalTypes.fromSchema(avroSchema);
    if (logicalType != null) {
        if (logicalType instanceof LogicalTypes.Decimal) {
            fieldType = FieldType.DECIMAL;
        } else if (logicalType instanceof LogicalTypes.TimestampMillis) {
            // TODO: There is a desire to move Beam schema DATETIME to a micros representation. When
            // this is done, this logical type needs to be changed.
            fieldType = FieldType.DATETIME;
        } else if (logicalType instanceof LogicalTypes.Date) {
            fieldType = FieldType.DATETIME;
        }
    }
    if (fieldType == null) {
        switch(type.type.getType()) {
            case RECORD:
                fieldType = Schema.FieldType.row(toBeamSchema(avroSchema));
                break;
            case ENUM:
                fieldType = FieldType.logicalType(EnumerationType.create(type.type.getEnumSymbols()));
                break;
            case ARRAY:
                Schema.FieldType elementType = toFieldType(new TypeWithNullability(avroSchema.getElementType()));
                fieldType = Schema.FieldType.array(elementType);
                break;
            case MAP:
                fieldType = Schema.FieldType.map(Schema.FieldType.STRING, toFieldType(new TypeWithNullability(avroSchema.getValueType())));
                break;
            case FIXED:
                fieldType = FixedBytesField.fromAvroType(type.type).toBeamType();
                break;
            case STRING:
                fieldType = Schema.FieldType.STRING;
                break;
            case BYTES:
                fieldType = Schema.FieldType.BYTES;
                break;
            case INT:
                fieldType = Schema.FieldType.INT32;
                break;
            case LONG:
                fieldType = Schema.FieldType.INT64;
                break;
            case FLOAT:
                fieldType = Schema.FieldType.FLOAT;
                break;
            case DOUBLE:
                fieldType = Schema.FieldType.DOUBLE;
                break;
            case BOOLEAN:
                fieldType = Schema.FieldType.BOOLEAN;
                break;
            case UNION:
                fieldType = FieldType.logicalType(OneOfType.create(avroSchema.getTypes().stream().map(x -> Field.of(x.getName(), toFieldType(new TypeWithNullability(x)))).collect(Collectors.toList())));
                break;
            case NULL:
                throw new IllegalArgumentException("Can't convert 'null' to FieldType");
            default:
                throw new AssertionError("Unexpected AVRO Schema.Type: " + avroSchema.getType());
        }
    }
    fieldType = fieldType.withNullable(type.nullable);
    return fieldType;
}
Also used : AvroRecordSchema(org.apache.beam.sdk.schemas.AvroRecordSchema) Schema(org.apache.beam.sdk.schemas.Schema) LogicalType(org.apache.avro.LogicalType) LogicalTypes(org.apache.avro.LogicalTypes) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) BigDecimal(java.math.BigDecimal)

Example 15 with FieldType

use of org.apache.beam.sdk.schemas.Schema.FieldType in project beam by apache.

the class AvroUtils method getFieldSchema.

private static org.apache.avro.Schema getFieldSchema(Schema.FieldType fieldType, String fieldName, String namespace) {
    org.apache.avro.Schema baseType;
    switch(fieldType.getTypeName()) {
        case BYTE:
        case INT16:
        case INT32:
            baseType = org.apache.avro.Schema.create(Type.INT);
            break;
        case INT64:
            baseType = org.apache.avro.Schema.create(Type.LONG);
            break;
        case DECIMAL:
            baseType = LogicalTypes.decimal(Integer.MAX_VALUE).addToSchema(org.apache.avro.Schema.create(Type.BYTES));
            break;
        case FLOAT:
            baseType = org.apache.avro.Schema.create(Type.FLOAT);
            break;
        case DOUBLE:
            baseType = org.apache.avro.Schema.create(Type.DOUBLE);
            break;
        case STRING:
            baseType = org.apache.avro.Schema.create(Type.STRING);
            break;
        case DATETIME:
            // TODO: There is a desire to move Beam schema DATETIME to a micros representation. When
            // this is done, this logical type needs to be changed.
            baseType = LogicalTypes.timestampMillis().addToSchema(org.apache.avro.Schema.create(Type.LONG));
            break;
        case BOOLEAN:
            baseType = org.apache.avro.Schema.create(Type.BOOLEAN);
            break;
        case BYTES:
            baseType = org.apache.avro.Schema.create(Type.BYTES);
            break;
        case LOGICAL_TYPE:
            switch(fieldType.getLogicalType().getIdentifier()) {
                case FixedBytes.IDENTIFIER:
                    FixedBytesField fixedBytesField = checkNotNull(FixedBytesField.fromBeamFieldType(fieldType));
                    baseType = fixedBytesField.toAvroType("fixed", namespace + "." + fieldName);
                    break;
                case EnumerationType.IDENTIFIER:
                    EnumerationType enumerationType = fieldType.getLogicalType(EnumerationType.class);
                    baseType = org.apache.avro.Schema.createEnum(fieldName, "", "", enumerationType.getValues());
                    break;
                case OneOfType.IDENTIFIER:
                    OneOfType oneOfType = fieldType.getLogicalType(OneOfType.class);
                    baseType = org.apache.avro.Schema.createUnion(oneOfType.getOneOfSchema().getFields().stream().map(x -> getFieldSchema(x.getType(), x.getName(), namespace)).collect(Collectors.toList()));
                    break;
                case "CHAR":
                case "NCHAR":
                    baseType = buildHiveLogicalTypeSchema("char", (int) fieldType.getLogicalType().getArgument());
                    break;
                case "NVARCHAR":
                case "VARCHAR":
                case "LONGNVARCHAR":
                case "LONGVARCHAR":
                    baseType = buildHiveLogicalTypeSchema("varchar", (int) fieldType.getLogicalType().getArgument());
                    break;
                case "DATE":
                    baseType = LogicalTypes.date().addToSchema(org.apache.avro.Schema.create(Type.INT));
                    break;
                case "TIME":
                    baseType = LogicalTypes.timeMillis().addToSchema(org.apache.avro.Schema.create(Type.INT));
                    break;
                default:
                    throw new RuntimeException("Unhandled logical type " + fieldType.getLogicalType().getIdentifier());
            }
            break;
        case ARRAY:
        case ITERABLE:
            baseType = org.apache.avro.Schema.createArray(getFieldSchema(fieldType.getCollectionElementType(), fieldName, namespace));
            break;
        case MAP:
            if (fieldType.getMapKeyType().getTypeName().isStringType()) {
                // Avro only supports string keys in maps.
                baseType = org.apache.avro.Schema.createMap(getFieldSchema(fieldType.getMapValueType(), fieldName, namespace));
            } else {
                throw new IllegalArgumentException("Avro only supports maps with string keys");
            }
            break;
        case ROW:
            baseType = toAvroSchema(fieldType.getRowSchema(), fieldName, namespace);
            break;
        default:
            throw new IllegalArgumentException("Unexpected type " + fieldType);
    }
    return fieldType.getNullable() ? ReflectData.makeNullable(baseType) : baseType;
}
Also used : OneOfType(org.apache.beam.sdk.schemas.logicaltypes.OneOfType) TypeCasting(org.apache.beam.vendor.bytebuddy.v1_11_0.net.bytebuddy.implementation.bytecode.assign.TypeCasting) ObjectInputStream(java.io.ObjectInputStream) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) ByteBuffer(java.nio.ByteBuffer) AvroName(org.apache.avro.reflect.AvroName) BigDecimal(java.math.BigDecimal) ByteArrayInputStream(java.io.ByteArrayInputStream) Strings(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings) FixedBytes(org.apache.beam.sdk.schemas.logicaltypes.FixedBytes) JodaTimestampConversion(org.apache.beam.sdk.coders.AvroCoder.JodaTimestampConversion) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) EnumerationType(org.apache.beam.sdk.schemas.logicaltypes.EnumerationType) Days(org.joda.time.Days) Method(java.lang.reflect.Method) SpecificData(org.apache.avro.specific.SpecificData) Conversions(org.apache.avro.Conversions) Utf8(org.apache.avro.util.Utf8) TypeCreation(org.apache.beam.vendor.bytebuddy.v1_11_0.net.bytebuddy.implementation.bytecode.TypeCreation) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) ReadableInstant(org.joda.time.ReadableInstant) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Collectors(java.util.stream.Collectors) AvroRecordSchema(org.apache.beam.sdk.schemas.AvroRecordSchema) TypeConversion(org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.TypeConversion) StandardCharsets(java.nio.charset.StandardCharsets) Objects(java.util.Objects) List(java.util.List) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) Preconditions.checkNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ConvertType(org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertType) Experimental(org.apache.beam.sdk.annotations.Experimental) CaseFormat(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.CaseFormat) Duration(org.joda.time.Duration) HashMap(java.util.HashMap) SpecificRecord(org.apache.avro.specific.SpecificRecord) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) ReflectData(org.apache.avro.reflect.ReflectData) LogicalTypes(org.apache.avro.LogicalTypes) Kind(org.apache.beam.sdk.annotations.Experimental.Kind) SchemaUserTypeCreator(org.apache.beam.sdk.schemas.SchemaUserTypeCreator) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) AvroIgnore(org.apache.avro.reflect.AvroIgnore) ObjectOutputStream(java.io.ObjectOutputStream) FieldValueTypeInformation(org.apache.beam.sdk.schemas.FieldValueTypeInformation) ForLoadedType(org.apache.beam.vendor.bytebuddy.v1_11_0.net.bytebuddy.description.type.TypeDescription.ForLoadedType) Row(org.apache.beam.sdk.values.Row) Nonnull(javax.annotation.Nonnull) Type(org.apache.avro.Schema.Type) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Field(org.apache.beam.sdk.schemas.Schema.Field) GenericRecord(org.apache.avro.generic.GenericRecord) Duplication(org.apache.beam.vendor.bytebuddy.v1_11_0.net.bytebuddy.implementation.bytecode.Duplication) GenericFixed(org.apache.avro.generic.GenericFixed) LogicalType(org.apache.avro.LogicalType) Compound(org.apache.beam.vendor.bytebuddy.v1_11_0.net.bytebuddy.implementation.bytecode.StackManipulation.Compound) ConvertValueForGetter(org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertValueForGetter) ElementMatchers(org.apache.beam.vendor.bytebuddy.v1_11_0.net.bytebuddy.matcher.ElementMatchers) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) IOException(java.io.IOException) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Schema(org.apache.beam.sdk.schemas.Schema) TypeName(org.apache.beam.sdk.schemas.Schema.TypeName) StackManipulation(org.apache.beam.vendor.bytebuddy.v1_11_0.net.bytebuddy.implementation.bytecode.StackManipulation) MethodInvocation(org.apache.beam.vendor.bytebuddy.v1_11_0.net.bytebuddy.implementation.bytecode.member.MethodInvocation) ConvertValueForSetter(org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertValueForSetter) Instant(org.joda.time.Instant) TypeConversionsFactory(org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.TypeConversionsFactory) AvroRuntimeException(org.apache.avro.AvroRuntimeException) FieldValueGetter(org.apache.beam.sdk.schemas.FieldValueGetter) AvroRuntimeException(org.apache.avro.AvroRuntimeException) EnumerationType(org.apache.beam.sdk.schemas.logicaltypes.EnumerationType) OneOfType(org.apache.beam.sdk.schemas.logicaltypes.OneOfType)

Aggregations

FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)58 Schema (org.apache.beam.sdk.schemas.Schema)24 Field (org.apache.beam.sdk.schemas.Schema.Field)20 Row (org.apache.beam.sdk.values.Row)15 Test (org.junit.Test)15 Map (java.util.Map)10 List (java.util.List)9 ArrayList (java.util.ArrayList)7 Nullable (org.checkerframework.checker.nullness.qual.Nullable)7 FieldDescriptor (com.google.protobuf.Descriptors.FieldDescriptor)6 BigDecimal (java.math.BigDecimal)6 Schema.toSchema (org.apache.beam.sdk.schemas.Schema.toSchema)6 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)6 Collectors (java.util.stream.Collectors)5 EnumerationType (org.apache.beam.sdk.schemas.logicaltypes.EnumerationType)5 LocalDateTime (java.time.LocalDateTime)4 LocalTime (java.time.LocalTime)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3 AutoValue (com.google.auto.value.AutoValue)3