Search in sources :

Example 1 with DecimalType

use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.

the class AvroToRowDataConverters method createDecimalConverter.

private static AvroToRowDataConverter createDecimalConverter(DecimalType decimalType) {
    final int precision = decimalType.getPrecision();
    final int scale = decimalType.getScale();
    return avroObject -> {
        final byte[] bytes;
        if (avroObject instanceof GenericFixed) {
            bytes = ((GenericFixed) avroObject).bytes();
        } else if (avroObject instanceof ByteBuffer) {
            ByteBuffer byteBuffer = (ByteBuffer) avroObject;
            bytes = new byte[byteBuffer.remaining()];
            byteBuffer.get(bytes);
        } else {
            bytes = (byte[]) avroObject;
        }
        return DecimalData.fromUnscaledBytes(bytes, precision, scale);
    };
}
Also used : ChronoField(java.time.temporal.ChronoField) Array(java.lang.reflect.Array) GenericArrayData(org.apache.flink.table.data.GenericArrayData) HashMap(java.util.HashMap) RowType(org.apache.flink.table.types.logical.RowType) ByteBuffer(java.nio.ByteBuffer) GenericRowData(org.apache.flink.table.data.GenericRowData) DecimalType(org.apache.flink.table.types.logical.DecimalType) GenericMapData(org.apache.flink.table.data.GenericMapData) Map(java.util.Map) LocalTime(java.time.LocalTime) LogicalTypeUtils(org.apache.flink.table.types.logical.utils.LogicalTypeUtils) IndexedRecord(org.apache.avro.generic.IndexedRecord) GenericRecord(org.apache.avro.generic.GenericRecord) RowData(org.apache.flink.table.data.RowData) GenericFixed(org.apache.avro.generic.GenericFixed) TimestampData(org.apache.flink.table.data.TimestampData) DataTypes(org.apache.flink.table.api.DataTypes) DecimalData(org.apache.flink.table.data.DecimalData) ArrayType(org.apache.flink.table.types.logical.ArrayType) Instant(java.time.Instant) AvroSchemaConverter.extractValueTypeToAvroMap(org.apache.flink.formats.avro.typeutils.AvroSchemaConverter.extractValueTypeToAvroMap) Serializable(java.io.Serializable) StringData(org.apache.flink.table.data.StringData) List(java.util.List) LogicalType(org.apache.flink.table.types.logical.LogicalType) LocalDate(java.time.LocalDate) Internal(org.apache.flink.annotation.Internal) GenericFixed(org.apache.avro.generic.GenericFixed) ByteBuffer(java.nio.ByteBuffer)

Example 2 with DecimalType

use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.

the class AvroSchemaConverter method convertToSchema.

/**
 * Converts Flink SQL {@link LogicalType} (can be nested) into an Avro schema.
 *
 * <p>The "{rowName}_" is used as the nested row type name prefix in order to generate the right
 * schema. Nested record type that only differs with type name is still compatible.
 *
 * @param logicalType logical type
 * @param rowName the record name
 * @return Avro's {@link Schema} matching this logical type.
 */
public static Schema convertToSchema(LogicalType logicalType, String rowName) {
    int precision;
    boolean nullable = logicalType.isNullable();
    switch(logicalType.getTypeRoot()) {
        case NULL:
            return SchemaBuilder.builder().nullType();
        case BOOLEAN:
            Schema bool = SchemaBuilder.builder().booleanType();
            return nullable ? nullableSchema(bool) : bool;
        case TINYINT:
        case SMALLINT:
        case INTEGER:
            Schema integer = SchemaBuilder.builder().intType();
            return nullable ? nullableSchema(integer) : integer;
        case BIGINT:
            Schema bigint = SchemaBuilder.builder().longType();
            return nullable ? nullableSchema(bigint) : bigint;
        case FLOAT:
            Schema f = SchemaBuilder.builder().floatType();
            return nullable ? nullableSchema(f) : f;
        case DOUBLE:
            Schema d = SchemaBuilder.builder().doubleType();
            return nullable ? nullableSchema(d) : d;
        case CHAR:
        case VARCHAR:
            Schema str = SchemaBuilder.builder().stringType();
            return nullable ? nullableSchema(str) : str;
        case BINARY:
        case VARBINARY:
            Schema binary = SchemaBuilder.builder().bytesType();
            return nullable ? nullableSchema(binary) : binary;
        case TIMESTAMP_WITHOUT_TIME_ZONE:
            // use long to represents Timestamp
            final TimestampType timestampType = (TimestampType) logicalType;
            precision = timestampType.getPrecision();
            org.apache.avro.LogicalType avroLogicalType;
            if (precision <= 3) {
                avroLogicalType = LogicalTypes.timestampMillis();
            } else {
                throw new IllegalArgumentException("Avro does not support TIMESTAMP type " + "with precision: " + precision + ", it only supports precision less than 3.");
            }
            Schema timestamp = avroLogicalType.addToSchema(SchemaBuilder.builder().longType());
            return nullable ? nullableSchema(timestamp) : timestamp;
        case DATE:
            // use int to represents Date
            Schema date = LogicalTypes.date().addToSchema(SchemaBuilder.builder().intType());
            return nullable ? nullableSchema(date) : date;
        case TIME_WITHOUT_TIME_ZONE:
            precision = ((TimeType) logicalType).getPrecision();
            if (precision > 3) {
                throw new IllegalArgumentException("Avro does not support TIME type with precision: " + precision + ", it only supports precision less than 3.");
            }
            // use int to represents Time, we only support millisecond when deserialization
            Schema time = LogicalTypes.timeMillis().addToSchema(SchemaBuilder.builder().intType());
            return nullable ? nullableSchema(time) : time;
        case DECIMAL:
            DecimalType decimalType = (DecimalType) logicalType;
            // store BigDecimal as byte[]
            Schema decimal = LogicalTypes.decimal(decimalType.getPrecision(), decimalType.getScale()).addToSchema(SchemaBuilder.builder().bytesType());
            return nullable ? nullableSchema(decimal) : decimal;
        case ROW:
            RowType rowType = (RowType) logicalType;
            List<String> fieldNames = rowType.getFieldNames();
            // we have to make sure the record name is different in a Schema
            SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.builder().record(rowName).fields();
            for (int i = 0; i < rowType.getFieldCount(); i++) {
                String fieldName = fieldNames.get(i);
                LogicalType fieldType = rowType.getTypeAt(i);
                SchemaBuilder.GenericDefault<Schema> fieldBuilder = builder.name(fieldName).type(convertToSchema(fieldType, rowName + "_" + fieldName));
                if (fieldType.isNullable()) {
                    builder = fieldBuilder.withDefault(null);
                } else {
                    builder = fieldBuilder.noDefault();
                }
            }
            Schema record = builder.endRecord();
            return nullable ? nullableSchema(record) : record;
        case MULTISET:
        case MAP:
            Schema map = SchemaBuilder.builder().map().values(convertToSchema(extractValueTypeToAvroMap(logicalType), rowName));
            return nullable ? nullableSchema(map) : map;
        case ARRAY:
            ArrayType arrayType = (ArrayType) logicalType;
            Schema array = SchemaBuilder.builder().array().items(convertToSchema(arrayType.getElementType(), rowName));
            return nullable ? nullableSchema(array) : array;
        case RAW:
        case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
        default:
            throw new UnsupportedOperationException("Unsupported to derive Schema for type: " + logicalType);
    }
}
Also used : Schema(org.apache.avro.Schema) AvroRowDeserializationSchema(org.apache.flink.formats.avro.AvroRowDeserializationSchema) AvroRowSerializationSchema(org.apache.flink.formats.avro.AvroRowSerializationSchema) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType) ArrayType(org.apache.flink.table.types.logical.ArrayType) SchemaBuilder(org.apache.avro.SchemaBuilder) TimestampType(org.apache.flink.table.types.logical.TimestampType) DecimalType(org.apache.flink.table.types.logical.DecimalType)

Example 3 with DecimalType

use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.

the class OrcSplitReaderUtil method logicalTypeToOrcType.

/**
 * See {@code org.apache.flink.table.catalog.hive.util.HiveTypeUtil}.
 */
public static TypeDescription logicalTypeToOrcType(LogicalType type) {
    type = type.copy(true);
    switch(type.getTypeRoot()) {
        case CHAR:
            return TypeDescription.createChar().withMaxLength(((CharType) type).getLength());
        case VARCHAR:
            int len = ((VarCharType) type).getLength();
            if (len == VarCharType.MAX_LENGTH) {
                return TypeDescription.createString();
            } else {
                return TypeDescription.createVarchar().withMaxLength(len);
            }
        case BOOLEAN:
            return TypeDescription.createBoolean();
        case VARBINARY:
            if (type.equals(DataTypes.BYTES().getLogicalType())) {
                return TypeDescription.createBinary();
            } else {
                throw new UnsupportedOperationException("Not support other binary type: " + type);
            }
        case DECIMAL:
            DecimalType decimalType = (DecimalType) type;
            return TypeDescription.createDecimal().withScale(decimalType.getScale()).withPrecision(decimalType.getPrecision());
        case TINYINT:
            return TypeDescription.createByte();
        case SMALLINT:
            return TypeDescription.createShort();
        case INTEGER:
            return TypeDescription.createInt();
        case BIGINT:
            return TypeDescription.createLong();
        case FLOAT:
            return TypeDescription.createFloat();
        case DOUBLE:
            return TypeDescription.createDouble();
        case DATE:
            return TypeDescription.createDate();
        case TIMESTAMP_WITHOUT_TIME_ZONE:
            return TypeDescription.createTimestamp();
        case ARRAY:
            ArrayType arrayType = (ArrayType) type;
            return TypeDescription.createList(logicalTypeToOrcType(arrayType.getElementType()));
        case MAP:
            MapType mapType = (MapType) type;
            return TypeDescription.createMap(logicalTypeToOrcType(mapType.getKeyType()), logicalTypeToOrcType(mapType.getValueType()));
        case ROW:
            RowType rowType = (RowType) type;
            TypeDescription struct = TypeDescription.createStruct();
            for (int i = 0; i < rowType.getFieldCount(); i++) {
                struct.addField(rowType.getFieldNames().get(i), logicalTypeToOrcType(rowType.getChildren().get(i)));
            }
            return struct;
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : ArrayType(org.apache.flink.table.types.logical.ArrayType) DecimalType(org.apache.flink.table.types.logical.DecimalType) RowType(org.apache.flink.table.types.logical.RowType) TypeDescription(org.apache.orc.TypeDescription) VarCharType(org.apache.flink.table.types.logical.VarCharType) MapType(org.apache.flink.table.types.logical.MapType)

Example 4 with DecimalType

use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.

the class ParquetColumnarRowSplitReaderTest method innerTestPartitionValues.

private void innerTestPartitionValues(Path testPath, Map<String, Object> partSpec, boolean nullPartValue) throws IOException {
    LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new BooleanType(), new DateType(), new TimestampType(9), new DoubleType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new VarCharType(VarCharType.MAX_LENGTH) };
    ParquetColumnarRowSplitReader reader = ParquetSplitReaderUtil.genPartColumnarRowReader(false, true, new Configuration(), IntStream.range(0, 28).mapToObj(i -> "f" + i).toArray(String[]::new), Arrays.stream(fieldTypes).map(TypeConversions::fromLogicalToDataType).toArray(DataType[]::new), partSpec, new int[] { 7, 2, 4, 15, 19, 20, 21, 22, 23, 18, 16, 17, 24, 25, 26, 27 }, rowGroupSize, new Path(testPath.getPath()), 0, Long.MAX_VALUE);
    int i = 0;
    while (!reader.reachedEnd()) {
        ColumnarRowData row = reader.nextRecord();
        // common values
        assertEquals(i, row.getDouble(0), 0);
        assertEquals((byte) i, row.getByte(1));
        assertEquals(i, row.getInt(2));
        // partition values
        if (nullPartValue) {
            for (int j = 3; j < 16; j++) {
                assertTrue(row.isNullAt(j));
            }
        } else {
            assertTrue(row.getBoolean(3));
            assertEquals(9, row.getByte(4));
            assertEquals(10, row.getShort(5));
            assertEquals(11, row.getInt(6));
            assertEquals(12, row.getLong(7));
            assertEquals(13, row.getFloat(8), 0);
            assertEquals(6.6, row.getDouble(9), 0);
            assertEquals(DateTimeUtils.toInternal(Date.valueOf("2020-11-23")), row.getInt(10));
            assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(11, 9).toLocalDateTime());
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(24), 5, 0), row.getDecimal(12, 5, 0));
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(25), 15, 0), row.getDecimal(13, 15, 0));
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(26), 20, 0), row.getDecimal(14, 20, 0));
            assertEquals("f27", row.getString(15).toString());
        }
        i++;
    }
    reader.close();
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TypeConversions(org.apache.flink.table.types.utils.TypeConversions) BooleanType(org.apache.flink.table.types.logical.BooleanType) LogicalType(org.apache.flink.table.types.logical.LogicalType) BigIntType(org.apache.flink.table.types.logical.BigIntType) BigDecimal(java.math.BigDecimal) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) FloatType(org.apache.flink.table.types.logical.FloatType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) DoubleType(org.apache.flink.table.types.logical.DoubleType) TimestampType(org.apache.flink.table.types.logical.TimestampType) DecimalType(org.apache.flink.table.types.logical.DecimalType) DataType(org.apache.flink.table.types.DataType) ColumnarRowData(org.apache.flink.table.data.columnar.ColumnarRowData) VarCharType(org.apache.flink.table.types.logical.VarCharType) DateType(org.apache.flink.table.types.logical.DateType)

Example 5 with DecimalType

use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.

the class ParquetColumnarRowInputFormatTest method innerTestPartitionValues.

private void innerTestPartitionValues(Path testPath, List<String> partitionKeys, boolean nullPartValue) throws IOException {
    LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new BooleanType(), new DateType(), new TimestampType(9), new DoubleType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new VarCharType(VarCharType.MAX_LENGTH) };
    RowType rowType = RowType.of(fieldTypes, IntStream.range(0, 28).mapToObj(i -> "f" + i).toArray(String[]::new));
    int[] projected = new int[] { 7, 2, 4, 15, 19, 20, 21, 22, 23, 18, 16, 17, 24, 25, 26, 27 };
    RowType producedType = new RowType(Arrays.stream(projected).mapToObj(i -> rowType.getFields().get(i)).collect(Collectors.toList()));
    ParquetColumnarRowInputFormat<FileSourceSplit> format = ParquetColumnarRowInputFormat.createPartitionedFormat(new Configuration(), producedType, InternalTypeInfo.of(producedType), partitionKeys, PartitionFieldExtractor.forFileSystem("my_default_value"), 500, false, true);
    FileStatus fileStatus = testPath.getFileSystem().getFileStatus(testPath);
    AtomicInteger cnt = new AtomicInteger(0);
    forEachRemaining(format.createReader(EMPTY_CONF, new FileSourceSplit("id", testPath, 0, Long.MAX_VALUE, fileStatus.getModificationTime(), fileStatus.getLen())), row -> {
        int i = cnt.get();
        // common values
        assertEquals(i, row.getDouble(0), 0);
        assertEquals((byte) i, row.getByte(1));
        assertEquals(i, row.getInt(2));
        // partition values
        if (nullPartValue) {
            for (int j = 3; j < 16; j++) {
                assertTrue(row.isNullAt(j));
            }
        } else {
            assertTrue(row.getBoolean(3));
            assertEquals(9, row.getByte(4));
            assertEquals(10, row.getShort(5));
            assertEquals(11, row.getInt(6));
            assertEquals(12, row.getLong(7));
            assertEquals(13, row.getFloat(8), 0);
            assertEquals(6.6, row.getDouble(9), 0);
            assertEquals(DateTimeUtils.toInternal(Date.valueOf("2020-11-23")), row.getInt(10));
            assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(11, 9).toLocalDateTime());
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(24), 5, 0), row.getDecimal(12, 5, 0));
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(25), 15, 0), row.getDecimal(13, 15, 0));
            assertEquals(DecimalData.fromBigDecimal(new BigDecimal(26), 20, 0), row.getDecimal(14, 20, 0));
            assertEquals("f27", row.getString(15).toString());
        }
        cnt.incrementAndGet();
    });
}
Also used : FileStatus(org.apache.flink.core.fs.FileStatus) FileSourceSplit(org.apache.flink.connector.file.src.FileSourceSplit) Configuration(org.apache.hadoop.conf.Configuration) BooleanType(org.apache.flink.table.types.logical.BooleanType) LogicalType(org.apache.flink.table.types.logical.LogicalType) BigIntType(org.apache.flink.table.types.logical.BigIntType) RowType(org.apache.flink.table.types.logical.RowType) BigDecimal(java.math.BigDecimal) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) TinyIntType(org.apache.flink.table.types.logical.TinyIntType) IntType(org.apache.flink.table.types.logical.IntType) BigIntType(org.apache.flink.table.types.logical.BigIntType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) FloatType(org.apache.flink.table.types.logical.FloatType) SmallIntType(org.apache.flink.table.types.logical.SmallIntType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DoubleType(org.apache.flink.table.types.logical.DoubleType) TimestampType(org.apache.flink.table.types.logical.TimestampType) DecimalType(org.apache.flink.table.types.logical.DecimalType) VarCharType(org.apache.flink.table.types.logical.VarCharType) DateType(org.apache.flink.table.types.logical.DateType)

Aggregations

DecimalType (org.apache.flink.table.types.logical.DecimalType)27 LogicalType (org.apache.flink.table.types.logical.LogicalType)14 RowType (org.apache.flink.table.types.logical.RowType)12 TimestampType (org.apache.flink.table.types.logical.TimestampType)12 ArrayType (org.apache.flink.table.types.logical.ArrayType)8 BigIntType (org.apache.flink.table.types.logical.BigIntType)8 IntType (org.apache.flink.table.types.logical.IntType)8 BigDecimal (java.math.BigDecimal)7 LocalZonedTimestampType (org.apache.flink.table.types.logical.LocalZonedTimestampType)7 SmallIntType (org.apache.flink.table.types.logical.SmallIntType)7 TinyIntType (org.apache.flink.table.types.logical.TinyIntType)7 VarBinaryType (org.apache.flink.table.types.logical.VarBinaryType)7 VarCharType (org.apache.flink.table.types.logical.VarCharType)7 DataType (org.apache.flink.table.types.DataType)6 BooleanType (org.apache.flink.table.types.logical.BooleanType)6 DoubleType (org.apache.flink.table.types.logical.DoubleType)6 FloatType (org.apache.flink.table.types.logical.FloatType)6 LegacyTypeInformationType (org.apache.flink.table.types.logical.LegacyTypeInformationType)6 ArrayList (java.util.ArrayList)5 DateType (org.apache.flink.table.types.logical.DateType)5