use of org.apache.flink.table.types.logical.TimestampType in project flink by apache.
the class AvroSchemaConverter method convertToSchema.
/**
* Converts Flink SQL {@link LogicalType} (can be nested) into an Avro schema.
*
* <p>The "{rowName}_" is used as the nested row type name prefix in order to generate the right
* schema. Nested record type that only differs with type name is still compatible.
*
* @param logicalType logical type
* @param rowName the record name
* @return Avro's {@link Schema} matching this logical type.
*/
public static Schema convertToSchema(LogicalType logicalType, String rowName) {
int precision;
boolean nullable = logicalType.isNullable();
switch(logicalType.getTypeRoot()) {
case NULL:
return SchemaBuilder.builder().nullType();
case BOOLEAN:
Schema bool = SchemaBuilder.builder().booleanType();
return nullable ? nullableSchema(bool) : bool;
case TINYINT:
case SMALLINT:
case INTEGER:
Schema integer = SchemaBuilder.builder().intType();
return nullable ? nullableSchema(integer) : integer;
case BIGINT:
Schema bigint = SchemaBuilder.builder().longType();
return nullable ? nullableSchema(bigint) : bigint;
case FLOAT:
Schema f = SchemaBuilder.builder().floatType();
return nullable ? nullableSchema(f) : f;
case DOUBLE:
Schema d = SchemaBuilder.builder().doubleType();
return nullable ? nullableSchema(d) : d;
case CHAR:
case VARCHAR:
Schema str = SchemaBuilder.builder().stringType();
return nullable ? nullableSchema(str) : str;
case BINARY:
case VARBINARY:
Schema binary = SchemaBuilder.builder().bytesType();
return nullable ? nullableSchema(binary) : binary;
case TIMESTAMP_WITHOUT_TIME_ZONE:
// use long to represents Timestamp
final TimestampType timestampType = (TimestampType) logicalType;
precision = timestampType.getPrecision();
org.apache.avro.LogicalType avroLogicalType;
if (precision <= 3) {
avroLogicalType = LogicalTypes.timestampMillis();
} else {
throw new IllegalArgumentException("Avro does not support TIMESTAMP type " + "with precision: " + precision + ", it only supports precision less than 3.");
}
Schema timestamp = avroLogicalType.addToSchema(SchemaBuilder.builder().longType());
return nullable ? nullableSchema(timestamp) : timestamp;
case DATE:
// use int to represents Date
Schema date = LogicalTypes.date().addToSchema(SchemaBuilder.builder().intType());
return nullable ? nullableSchema(date) : date;
case TIME_WITHOUT_TIME_ZONE:
precision = ((TimeType) logicalType).getPrecision();
if (precision > 3) {
throw new IllegalArgumentException("Avro does not support TIME type with precision: " + precision + ", it only supports precision less than 3.");
}
// use int to represents Time, we only support millisecond when deserialization
Schema time = LogicalTypes.timeMillis().addToSchema(SchemaBuilder.builder().intType());
return nullable ? nullableSchema(time) : time;
case DECIMAL:
DecimalType decimalType = (DecimalType) logicalType;
// store BigDecimal as byte[]
Schema decimal = LogicalTypes.decimal(decimalType.getPrecision(), decimalType.getScale()).addToSchema(SchemaBuilder.builder().bytesType());
return nullable ? nullableSchema(decimal) : decimal;
case ROW:
RowType rowType = (RowType) logicalType;
List<String> fieldNames = rowType.getFieldNames();
// we have to make sure the record name is different in a Schema
SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.builder().record(rowName).fields();
for (int i = 0; i < rowType.getFieldCount(); i++) {
String fieldName = fieldNames.get(i);
LogicalType fieldType = rowType.getTypeAt(i);
SchemaBuilder.GenericDefault<Schema> fieldBuilder = builder.name(fieldName).type(convertToSchema(fieldType, rowName + "_" + fieldName));
if (fieldType.isNullable()) {
builder = fieldBuilder.withDefault(null);
} else {
builder = fieldBuilder.noDefault();
}
}
Schema record = builder.endRecord();
return nullable ? nullableSchema(record) : record;
case MULTISET:
case MAP:
Schema map = SchemaBuilder.builder().map().values(convertToSchema(extractValueTypeToAvroMap(logicalType), rowName));
return nullable ? nullableSchema(map) : map;
case ARRAY:
ArrayType arrayType = (ArrayType) logicalType;
Schema array = SchemaBuilder.builder().array().items(convertToSchema(arrayType.getElementType(), rowName));
return nullable ? nullableSchema(array) : array;
case RAW:
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
default:
throw new UnsupportedOperationException("Unsupported to derive Schema for type: " + logicalType);
}
}
use of org.apache.flink.table.types.logical.TimestampType in project flink by apache.
the class ParquetColumnarRowSplitReaderTest method innerTestPartitionValues.
private void innerTestPartitionValues(Path testPath, Map<String, Object> partSpec, boolean nullPartValue) throws IOException {
LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new BooleanType(), new DateType(), new TimestampType(9), new DoubleType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new VarCharType(VarCharType.MAX_LENGTH) };
ParquetColumnarRowSplitReader reader = ParquetSplitReaderUtil.genPartColumnarRowReader(false, true, new Configuration(), IntStream.range(0, 28).mapToObj(i -> "f" + i).toArray(String[]::new), Arrays.stream(fieldTypes).map(TypeConversions::fromLogicalToDataType).toArray(DataType[]::new), partSpec, new int[] { 7, 2, 4, 15, 19, 20, 21, 22, 23, 18, 16, 17, 24, 25, 26, 27 }, rowGroupSize, new Path(testPath.getPath()), 0, Long.MAX_VALUE);
int i = 0;
while (!reader.reachedEnd()) {
ColumnarRowData row = reader.nextRecord();
// common values
assertEquals(i, row.getDouble(0), 0);
assertEquals((byte) i, row.getByte(1));
assertEquals(i, row.getInt(2));
// partition values
if (nullPartValue) {
for (int j = 3; j < 16; j++) {
assertTrue(row.isNullAt(j));
}
} else {
assertTrue(row.getBoolean(3));
assertEquals(9, row.getByte(4));
assertEquals(10, row.getShort(5));
assertEquals(11, row.getInt(6));
assertEquals(12, row.getLong(7));
assertEquals(13, row.getFloat(8), 0);
assertEquals(6.6, row.getDouble(9), 0);
assertEquals(DateTimeUtils.toInternal(Date.valueOf("2020-11-23")), row.getInt(10));
assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(11, 9).toLocalDateTime());
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(24), 5, 0), row.getDecimal(12, 5, 0));
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(25), 15, 0), row.getDecimal(13, 15, 0));
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(26), 20, 0), row.getDecimal(14, 20, 0));
assertEquals("f27", row.getString(15).toString());
}
i++;
}
reader.close();
}
use of org.apache.flink.table.types.logical.TimestampType in project flink by apache.
the class ParquetColumnarRowInputFormatTest method innerTestPartitionValues.
private void innerTestPartitionValues(Path testPath, List<String> partitionKeys, boolean nullPartValue) throws IOException {
LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new BooleanType(), new DateType(), new TimestampType(9), new DoubleType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new VarCharType(VarCharType.MAX_LENGTH) };
RowType rowType = RowType.of(fieldTypes, IntStream.range(0, 28).mapToObj(i -> "f" + i).toArray(String[]::new));
int[] projected = new int[] { 7, 2, 4, 15, 19, 20, 21, 22, 23, 18, 16, 17, 24, 25, 26, 27 };
RowType producedType = new RowType(Arrays.stream(projected).mapToObj(i -> rowType.getFields().get(i)).collect(Collectors.toList()));
ParquetColumnarRowInputFormat<FileSourceSplit> format = ParquetColumnarRowInputFormat.createPartitionedFormat(new Configuration(), producedType, InternalTypeInfo.of(producedType), partitionKeys, PartitionFieldExtractor.forFileSystem("my_default_value"), 500, false, true);
FileStatus fileStatus = testPath.getFileSystem().getFileStatus(testPath);
AtomicInteger cnt = new AtomicInteger(0);
forEachRemaining(format.createReader(EMPTY_CONF, new FileSourceSplit("id", testPath, 0, Long.MAX_VALUE, fileStatus.getModificationTime(), fileStatus.getLen())), row -> {
int i = cnt.get();
// common values
assertEquals(i, row.getDouble(0), 0);
assertEquals((byte) i, row.getByte(1));
assertEquals(i, row.getInt(2));
// partition values
if (nullPartValue) {
for (int j = 3; j < 16; j++) {
assertTrue(row.isNullAt(j));
}
} else {
assertTrue(row.getBoolean(3));
assertEquals(9, row.getByte(4));
assertEquals(10, row.getShort(5));
assertEquals(11, row.getInt(6));
assertEquals(12, row.getLong(7));
assertEquals(13, row.getFloat(8), 0);
assertEquals(6.6, row.getDouble(9), 0);
assertEquals(DateTimeUtils.toInternal(Date.valueOf("2020-11-23")), row.getInt(10));
assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(11, 9).toLocalDateTime());
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(24), 5, 0), row.getDecimal(12, 5, 0));
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(25), 15, 0), row.getDecimal(13, 15, 0));
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(26), 20, 0), row.getDecimal(14, 20, 0));
assertEquals("f27", row.getString(15).toString());
}
cnt.incrementAndGet();
});
}
use of org.apache.flink.table.types.logical.TimestampType in project flink by apache.
the class ParquetColumnarRowInputFormatTest method testReadingSplit.
private int testReadingSplit(List<Integer> expected, Path path, long splitStart, long splitLength, long seekToRow) throws IOException {
LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0) };
ParquetColumnarRowInputFormat format = new ParquetColumnarRowInputFormat(new Configuration(), RowType.of(fieldTypes, new String[] { "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", "f14" }), null, 500, false, true);
// validate java serialization
try {
InstantiationUtil.clone(format);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
FileStatus fileStatus = path.getFileSystem().getFileStatus(path);
BulkFormat.Reader<RowData> reader = format.restoreReader(EMPTY_CONF, new FileSourceSplit("id", path, splitStart, splitLength, fileStatus.getModificationTime(), fileStatus.getLen(), new String[0], new CheckpointedPosition(CheckpointedPosition.NO_OFFSET, seekToRow)));
AtomicInteger cnt = new AtomicInteger(0);
final AtomicReference<RowData> previousRow = new AtomicReference<>();
forEachRemaining(reader, row -> {
if (previousRow.get() == null) {
previousRow.set(row);
} else {
// ParquetColumnarRowInputFormat should only have one row instance.
assertSame(previousRow.get(), row);
}
Integer v = expected.get(cnt.get());
if (v == null) {
assertTrue(row.isNullAt(0));
assertTrue(row.isNullAt(1));
assertTrue(row.isNullAt(2));
assertTrue(row.isNullAt(3));
assertTrue(row.isNullAt(4));
assertTrue(row.isNullAt(5));
assertTrue(row.isNullAt(6));
assertTrue(row.isNullAt(7));
assertTrue(row.isNullAt(8));
assertTrue(row.isNullAt(9));
assertTrue(row.isNullAt(10));
assertTrue(row.isNullAt(11));
assertTrue(row.isNullAt(12));
assertTrue(row.isNullAt(13));
assertTrue(row.isNullAt(14));
} else {
assertEquals("" + v, row.getString(0).toString());
assertEquals(v % 2 == 0, row.getBoolean(1));
assertEquals(v.byteValue(), row.getByte(2));
assertEquals(v.shortValue(), row.getShort(3));
assertEquals(v.intValue(), row.getInt(4));
assertEquals(v.longValue(), row.getLong(5));
assertEquals(v.floatValue(), row.getFloat(6), 0);
assertEquals(v.doubleValue(), row.getDouble(7), 0);
assertEquals(toDateTime(v), row.getTimestamp(8, 9).toLocalDateTime());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(9, 5, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(10, 15, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(11, 20, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(12, 5, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(13, 15, 0).toBigDecimal());
assertEquals(BigDecimal.valueOf(v), row.getDecimal(14, 20, 0).toBigDecimal());
}
cnt.incrementAndGet();
});
return cnt.get();
}
use of org.apache.flink.table.types.logical.TimestampType in project flink by apache.
the class ParquetRowDataWriter method createWriter.
private FieldWriter createWriter(LogicalType t, Type type) {
if (type.isPrimitive()) {
switch(t.getTypeRoot()) {
case CHAR:
case VARCHAR:
return new StringWriter();
case BOOLEAN:
return new BooleanWriter();
case BINARY:
case VARBINARY:
return new BinaryWriter();
case DECIMAL:
DecimalType decimalType = (DecimalType) t;
return createDecimalWriter(decimalType.getPrecision(), decimalType.getScale());
case TINYINT:
return new ByteWriter();
case SMALLINT:
return new ShortWriter();
case DATE:
case TIME_WITHOUT_TIME_ZONE:
case INTEGER:
return new IntWriter();
case BIGINT:
return new LongWriter();
case FLOAT:
return new FloatWriter();
case DOUBLE:
return new DoubleWriter();
case TIMESTAMP_WITHOUT_TIME_ZONE:
TimestampType timestampType = (TimestampType) t;
return new TimestampWriter(timestampType.getPrecision());
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) t;
return new TimestampWriter(localZonedTimestampType.getPrecision());
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
} else {
GroupType groupType = type.asGroupType();
LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
if (t instanceof ArrayType && logicalType instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
return new ArrayWriter(((ArrayType) t).getElementType(), groupType);
} else if (t instanceof MapType && logicalType instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation) {
return new MapWriter(((MapType) t).getKeyType(), ((MapType) t).getValueType(), groupType);
} else if (t instanceof RowType && type instanceof GroupType) {
return new RowWriter((RowType) t, groupType);
} else {
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
}
Aggregations