Search in sources :

Example 1 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class AvroSchemaConverter method convertToSchema.

/**
 * Converts Flink SQL {@link LogicalType} (can be nested) into an Avro schema.
 *
 * <p>The "{rowName}_" is used as the nested row type name prefix in order to generate the right
 * schema. Nested record type that only differs with type name is still compatible.
 *
 * @param logicalType logical type
 * @param rowName the record name
 * @return Avro's {@link Schema} matching this logical type.
 */
public static Schema convertToSchema(LogicalType logicalType, String rowName) {
    int precision;
    boolean nullable = logicalType.isNullable();
    switch(logicalType.getTypeRoot()) {
        case NULL:
            return SchemaBuilder.builder().nullType();
        case BOOLEAN:
            Schema bool = SchemaBuilder.builder().booleanType();
            return nullable ? nullableSchema(bool) : bool;
        case TINYINT:
        case SMALLINT:
        case INTEGER:
            Schema integer = SchemaBuilder.builder().intType();
            return nullable ? nullableSchema(integer) : integer;
        case BIGINT:
            Schema bigint = SchemaBuilder.builder().longType();
            return nullable ? nullableSchema(bigint) : bigint;
        case FLOAT:
            Schema f = SchemaBuilder.builder().floatType();
            return nullable ? nullableSchema(f) : f;
        case DOUBLE:
            Schema d = SchemaBuilder.builder().doubleType();
            return nullable ? nullableSchema(d) : d;
        case CHAR:
        case VARCHAR:
            Schema str = SchemaBuilder.builder().stringType();
            return nullable ? nullableSchema(str) : str;
        case BINARY:
        case VARBINARY:
            Schema binary = SchemaBuilder.builder().bytesType();
            return nullable ? nullableSchema(binary) : binary;
        case TIMESTAMP_WITHOUT_TIME_ZONE:
            // use long to represents Timestamp
            final TimestampType timestampType = (TimestampType) logicalType;
            precision = timestampType.getPrecision();
            org.apache.avro.LogicalType avroLogicalType;
            if (precision <= 3) {
                avroLogicalType = LogicalTypes.timestampMillis();
            } else {
                throw new IllegalArgumentException("Avro does not support TIMESTAMP type " + "with precision: " + precision + ", it only supports precision less than 3.");
            }
            Schema timestamp = avroLogicalType.addToSchema(SchemaBuilder.builder().longType());
            return nullable ? nullableSchema(timestamp) : timestamp;
        case DATE:
            // use int to represents Date
            Schema date = LogicalTypes.date().addToSchema(SchemaBuilder.builder().intType());
            return nullable ? nullableSchema(date) : date;
        case TIME_WITHOUT_TIME_ZONE:
            precision = ((TimeType) logicalType).getPrecision();
            if (precision > 3) {
                throw new IllegalArgumentException("Avro does not support TIME type with precision: " + precision + ", it only supports precision less than 3.");
            }
            // use int to represents Time, we only support millisecond when deserialization
            Schema time = LogicalTypes.timeMillis().addToSchema(SchemaBuilder.builder().intType());
            return nullable ? nullableSchema(time) : time;
        case DECIMAL:
            DecimalType decimalType = (DecimalType) logicalType;
            // store BigDecimal as byte[]
            Schema decimal = LogicalTypes.decimal(decimalType.getPrecision(), decimalType.getScale()).addToSchema(SchemaBuilder.builder().bytesType());
            return nullable ? nullableSchema(decimal) : decimal;
        case ROW:
            RowType rowType = (RowType) logicalType;
            List<String> fieldNames = rowType.getFieldNames();
            // we have to make sure the record name is different in a Schema
            SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.builder().record(rowName).fields();
            for (int i = 0; i < rowType.getFieldCount(); i++) {
                String fieldName = fieldNames.get(i);
                LogicalType fieldType = rowType.getTypeAt(i);
                SchemaBuilder.GenericDefault<Schema> fieldBuilder = builder.name(fieldName).type(convertToSchema(fieldType, rowName + "_" + fieldName));
                if (fieldType.isNullable()) {
                    builder = fieldBuilder.withDefault(null);
                } else {
                    builder = fieldBuilder.noDefault();
                }
            }
            Schema record = builder.endRecord();
            return nullable ? nullableSchema(record) : record;
        case MULTISET:
        case MAP:
            Schema map = SchemaBuilder.builder().map().values(convertToSchema(extractValueTypeToAvroMap(logicalType), rowName));
            return nullable ? nullableSchema(map) : map;
        case ARRAY:
            ArrayType arrayType = (ArrayType) logicalType;
            Schema array = SchemaBuilder.builder().array().items(convertToSchema(arrayType.getElementType(), rowName));
            return nullable ? nullableSchema(array) : array;
        case RAW:
        case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
        default:
            throw new UnsupportedOperationException("Unsupported to derive Schema for type: " + logicalType);
    }
}
Also used : Schema(org.apache.avro.Schema) AvroRowDeserializationSchema(org.apache.flink.formats.avro.AvroRowDeserializationSchema) AvroRowSerializationSchema(org.apache.flink.formats.avro.AvroRowSerializationSchema) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) ArrayType(org.apache.flink.table.types.logical.ArrayType) SchemaBuilder(org.apache.avro.SchemaBuilder) TimestampType(org.apache.flink.table.types.logical.TimestampType) DecimalType(org.apache.flink.table.types.logical.DecimalType)

Example 2 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class ArrowUtils method toArrowField.

private static Field toArrowField(String fieldName, LogicalType logicalType) {
    FieldType fieldType = new FieldType(logicalType.isNullable(), logicalType.accept(LogicalTypeToArrowTypeConverter.INSTANCE), null);
    List<Field> children = null;
    if (logicalType instanceof ArrayType) {
        children = Collections.singletonList(toArrowField("element", ((ArrayType) logicalType).getElementType()));
    } else if (logicalType instanceof RowType) {
        RowType rowType = (RowType) logicalType;
        children = new ArrayList<>(rowType.getFieldCount());
        for (RowType.RowField field : rowType.getFields()) {
            children.add(toArrowField(field.getName(), field.getType()));
        }
    }
    return new Field(fieldName, fieldType, children);
}
Also used : ArrayType(org.apache.flink.table.types.logical.ArrayType) Field(org.apache.arrow.vector.types.pojo.Field) ArrayList(java.util.ArrayList) RowType(org.apache.flink.table.types.logical.RowType) FieldType(org.apache.arrow.vector.types.pojo.FieldType)

Example 3 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class OrcSplitReaderUtil method logicalTypeToOrcType.

/**
 * See {@code org.apache.flink.table.catalog.hive.util.HiveTypeUtil}.
 */
public static TypeDescription logicalTypeToOrcType(LogicalType type) {
    type = type.copy(true);
    switch(type.getTypeRoot()) {
        case CHAR:
            return TypeDescription.createChar().withMaxLength(((CharType) type).getLength());
        case VARCHAR:
            int len = ((VarCharType) type).getLength();
            if (len == VarCharType.MAX_LENGTH) {
                return TypeDescription.createString();
            } else {
                return TypeDescription.createVarchar().withMaxLength(len);
            }
        case BOOLEAN:
            return TypeDescription.createBoolean();
        case VARBINARY:
            if (type.equals(DataTypes.BYTES().getLogicalType())) {
                return TypeDescription.createBinary();
            } else {
                throw new UnsupportedOperationException("Not support other binary type: " + type);
            }
        case DECIMAL:
            DecimalType decimalType = (DecimalType) type;
            return TypeDescription.createDecimal().withScale(decimalType.getScale()).withPrecision(decimalType.getPrecision());
        case TINYINT:
            return TypeDescription.createByte();
        case SMALLINT:
            return TypeDescription.createShort();
        case INTEGER:
            return TypeDescription.createInt();
        case BIGINT:
            return TypeDescription.createLong();
        case FLOAT:
            return TypeDescription.createFloat();
        case DOUBLE:
            return TypeDescription.createDouble();
        case DATE:
            return TypeDescription.createDate();
        case TIMESTAMP_WITHOUT_TIME_ZONE:
            return TypeDescription.createTimestamp();
        case ARRAY:
            ArrayType arrayType = (ArrayType) type;
            return TypeDescription.createList(logicalTypeToOrcType(arrayType.getElementType()));
        case MAP:
            MapType mapType = (MapType) type;
            return TypeDescription.createMap(logicalTypeToOrcType(mapType.getKeyType()), logicalTypeToOrcType(mapType.getValueType()));
        case ROW:
            RowType rowType = (RowType) type;
            TypeDescription struct = TypeDescription.createStruct();
            for (int i = 0; i < rowType.getFieldCount(); i++) {
                struct.addField(rowType.getFieldNames().get(i), logicalTypeToOrcType(rowType.getChildren().get(i)));
            }
            return struct;
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : ArrayType(org.apache.flink.table.types.logical.ArrayType) DecimalType(org.apache.flink.table.types.logical.DecimalType) RowType(org.apache.flink.table.types.logical.RowType) TypeDescription(org.apache.orc.TypeDescription) VarCharType(org.apache.flink.table.types.logical.VarCharType) MapType(org.apache.flink.table.types.logical.MapType)

Example 4 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class ParquetRowDataWriter method createWriter.

private FieldWriter createWriter(LogicalType t, Type type) {
    if (type.isPrimitive()) {
        switch(t.getTypeRoot()) {
            case CHAR:
            case VARCHAR:
                return new StringWriter();
            case BOOLEAN:
                return new BooleanWriter();
            case BINARY:
            case VARBINARY:
                return new BinaryWriter();
            case DECIMAL:
                DecimalType decimalType = (DecimalType) t;
                return createDecimalWriter(decimalType.getPrecision(), decimalType.getScale());
            case TINYINT:
                return new ByteWriter();
            case SMALLINT:
                return new ShortWriter();
            case DATE:
            case TIME_WITHOUT_TIME_ZONE:
            case INTEGER:
                return new IntWriter();
            case BIGINT:
                return new LongWriter();
            case FLOAT:
                return new FloatWriter();
            case DOUBLE:
                return new DoubleWriter();
            case TIMESTAMP_WITHOUT_TIME_ZONE:
                TimestampType timestampType = (TimestampType) t;
                return new TimestampWriter(timestampType.getPrecision());
            case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
                LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) t;
                return new TimestampWriter(localZonedTimestampType.getPrecision());
            default:
                throw new UnsupportedOperationException("Unsupported type: " + type);
        }
    } else {
        GroupType groupType = type.asGroupType();
        LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
        if (t instanceof ArrayType && logicalType instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
            return new ArrayWriter(((ArrayType) t).getElementType(), groupType);
        } else if (t instanceof MapType && logicalType instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation) {
            return new MapWriter(((MapType) t).getKeyType(), ((MapType) t).getValueType(), groupType);
        } else if (t instanceof RowType && type instanceof GroupType) {
            return new RowWriter((RowType) t, groupType);
        } else {
            throw new UnsupportedOperationException("Unsupported type: " + type);
        }
    }
}
Also used : LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) RowType(org.apache.flink.table.types.logical.RowType) MapType(org.apache.flink.table.types.logical.MapType) ArrayType(org.apache.flink.table.types.logical.ArrayType) GroupType(org.apache.parquet.schema.GroupType) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) DecimalType(org.apache.flink.table.types.logical.DecimalType) TimestampType(org.apache.flink.table.types.logical.TimestampType) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType)

Example 5 with ArrayType

use of org.apache.flink.table.types.logical.ArrayType in project flink by apache.

the class PostgresRowConverter method createPostgresArrayConverter.

private JdbcDeserializationConverter createPostgresArrayConverter(ArrayType arrayType) {
    // PG's bytea[] is wrapped in PGobject, rather than primitive byte arrays
    if (arrayType.getElementType().is(LogicalTypeFamily.BINARY_STRING)) {
        final Class<?> elementClass = LogicalTypeUtils.toInternalConversionClass(arrayType.getElementType());
        final JdbcDeserializationConverter elementConverter = createNullableInternalConverter(arrayType.getElementType());
        return val -> {
            PgArray pgArray = (PgArray) val;
            Object[] in = (Object[]) pgArray.getArray();
            final Object[] array = (Object[]) Array.newInstance(elementClass, in.length);
            for (int i = 0; i < in.length; i++) {
                array[i] = elementConverter.deserialize(((PGobject) in[i]).getValue().getBytes());
            }
            return new GenericArrayData(array);
        };
    } else {
        final Class<?> elementClass = LogicalTypeUtils.toInternalConversionClass(arrayType.getElementType());
        final JdbcDeserializationConverter elementConverter = createNullableInternalConverter(arrayType.getElementType());
        return val -> {
            PgArray pgArray = (PgArray) val;
            Object[] in = (Object[]) pgArray.getArray();
            final Object[] array = (Object[]) Array.newInstance(elementClass, in.length);
            for (int i = 0; i < in.length; i++) {
                array[i] = elementConverter.deserialize(in[i]);
            }
            return new GenericArrayData(array);
        };
    }
}
Also used : PGobject(org.postgresql.util.PGobject) Array(java.lang.reflect.Array) LogicalType(org.apache.flink.table.types.logical.LogicalType) LogicalTypeFamily(org.apache.flink.table.types.logical.LogicalTypeFamily) GenericArrayData(org.apache.flink.table.data.GenericArrayData) AbstractJdbcRowConverter(org.apache.flink.connector.jdbc.converter.AbstractJdbcRowConverter) ArrayType(org.apache.flink.table.types.logical.ArrayType) RowType(org.apache.flink.table.types.logical.RowType) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) LogicalTypeUtils(org.apache.flink.table.types.logical.utils.LogicalTypeUtils) PgArray(org.postgresql.jdbc.PgArray) GenericArrayData(org.apache.flink.table.data.GenericArrayData) PgArray(org.postgresql.jdbc.PgArray) PGobject(org.postgresql.util.PGobject)

Aggregations

ArrayType (org.apache.flink.table.types.logical.ArrayType)28 LogicalType (org.apache.flink.table.types.logical.LogicalType)18 RowType (org.apache.flink.table.types.logical.RowType)18 DecimalType (org.apache.flink.table.types.logical.DecimalType)11 MapType (org.apache.flink.table.types.logical.MapType)11 TimestampType (org.apache.flink.table.types.logical.TimestampType)10 IntType (org.apache.flink.table.types.logical.IntType)8 ArrayList (java.util.ArrayList)7 GenericRowData (org.apache.flink.table.data.GenericRowData)7 RowData (org.apache.flink.table.data.RowData)7 VarCharType (org.apache.flink.table.types.logical.VarCharType)7 LocalTime (java.time.LocalTime)6 Internal (org.apache.flink.annotation.Internal)6 LocalZonedTimestampType (org.apache.flink.table.types.logical.LocalZonedTimestampType)6 Serializable (java.io.Serializable)5 Array (java.lang.reflect.Array)5 LocalDate (java.time.LocalDate)5 LocalDateTime (java.time.LocalDateTime)5 Map (java.util.Map)5 JsonNode (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode)5