use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.
the class ParquetSplitReaderUtil method createVectorFromConstant.
public static ColumnVector createVectorFromConstant(LogicalType type, Object value, int batchSize) {
switch(type.getTypeRoot()) {
case CHAR:
case VARCHAR:
case BINARY:
case VARBINARY:
HeapBytesVector bsv = new HeapBytesVector(batchSize);
if (value == null) {
bsv.fillWithNulls();
} else {
bsv.fill(value instanceof byte[] ? (byte[]) value : value.toString().getBytes(StandardCharsets.UTF_8));
}
return bsv;
case BOOLEAN:
HeapBooleanVector bv = new HeapBooleanVector(batchSize);
if (value == null) {
bv.fillWithNulls();
} else {
bv.fill((boolean) value);
}
return bv;
case TINYINT:
HeapByteVector byteVector = new HeapByteVector(batchSize);
if (value == null) {
byteVector.fillWithNulls();
} else {
byteVector.fill(((Number) value).byteValue());
}
return byteVector;
case SMALLINT:
HeapShortVector sv = new HeapShortVector(batchSize);
if (value == null) {
sv.fillWithNulls();
} else {
sv.fill(((Number) value).shortValue());
}
return sv;
case INTEGER:
HeapIntVector iv = new HeapIntVector(batchSize);
if (value == null) {
iv.fillWithNulls();
} else {
iv.fill(((Number) value).intValue());
}
return iv;
case BIGINT:
HeapLongVector lv = new HeapLongVector(batchSize);
if (value == null) {
lv.fillWithNulls();
} else {
lv.fill(((Number) value).longValue());
}
return lv;
case DECIMAL:
DecimalType decimalType = (DecimalType) type;
int precision = decimalType.getPrecision();
int scale = decimalType.getScale();
DecimalData decimal = value == null ? null : Preconditions.checkNotNull(DecimalData.fromBigDecimal((BigDecimal) value, precision, scale));
ColumnVector internalVector;
if (ParquetSchemaConverter.is32BitDecimal(precision)) {
internalVector = createVectorFromConstant(new IntType(), decimal == null ? null : (int) decimal.toUnscaledLong(), batchSize);
} else if (ParquetSchemaConverter.is64BitDecimal(precision)) {
internalVector = createVectorFromConstant(new BigIntType(), decimal == null ? null : decimal.toUnscaledLong(), batchSize);
} else {
internalVector = createVectorFromConstant(new VarBinaryType(), decimal == null ? null : decimal.toUnscaledBytes(), batchSize);
}
return new ParquetDecimalVector(internalVector);
case FLOAT:
HeapFloatVector fv = new HeapFloatVector(batchSize);
if (value == null) {
fv.fillWithNulls();
} else {
fv.fill(((Number) value).floatValue());
}
return fv;
case DOUBLE:
HeapDoubleVector dv = new HeapDoubleVector(batchSize);
if (value == null) {
dv.fillWithNulls();
} else {
dv.fill(((Number) value).doubleValue());
}
return dv;
case DATE:
if (value instanceof LocalDate) {
value = Date.valueOf((LocalDate) value);
}
return createVectorFromConstant(new IntType(), value == null ? null : toInternal((Date) value), batchSize);
case TIMESTAMP_WITHOUT_TIME_ZONE:
HeapTimestampVector tv = new HeapTimestampVector(batchSize);
if (value == null) {
tv.fillWithNulls();
} else {
tv.fill(TimestampData.fromLocalDateTime((LocalDateTime) value));
}
return tv;
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.
the class ParquetSchemaConverter method convertToParquetType.
private static Type convertToParquetType(String name, LogicalType type, Type.Repetition repetition) {
switch(type.getTypeRoot()) {
case CHAR:
case VARCHAR:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8).named(name);
case BOOLEAN:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BOOLEAN, repetition).named(name);
case BINARY:
case VARBINARY:
return Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, repetition).named(name);
case DECIMAL:
int precision = ((DecimalType) type).getPrecision();
int scale = ((DecimalType) type).getScale();
int numBytes = computeMinBytesForDecimalPrecision(precision);
return Types.primitive(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, repetition).precision(precision).scale(scale).length(numBytes).as(OriginalType.DECIMAL).named(name);
case TINYINT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.INT_8).named(name);
case SMALLINT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.INT_16).named(name);
case INTEGER:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).named(name);
case BIGINT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT64, repetition).named(name);
case FLOAT:
return Types.primitive(PrimitiveType.PrimitiveTypeName.FLOAT, repetition).named(name);
case DOUBLE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, repetition).named(name);
case DATE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
case TIME_WITHOUT_TIME_ZONE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, repetition).as(OriginalType.TIME_MILLIS).named(name);
case TIMESTAMP_WITHOUT_TIME_ZONE:
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
return Types.primitive(PrimitiveType.PrimitiveTypeName.INT96, repetition).named(name);
case ARRAY:
ArrayType arrayType = (ArrayType) type;
return ConversionPatterns.listOfElements(repetition, name, convertToParquetType(LIST_ELEMENT_NAME, arrayType.getElementType()));
case MAP:
MapType mapType = (MapType) type;
return ConversionPatterns.mapType(repetition, name, MAP_REPEATED_NAME, convertToParquetType("key", mapType.getKeyType()), convertToParquetType("value", mapType.getValueType()));
case ROW:
RowType rowType = (RowType) type;
return new GroupType(repetition, name, convertToParquetTypes(rowType));
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.
the class ArrowReaderWriterTest method init.
@BeforeClass
public static void init() {
fieldTypes.add(new TinyIntType());
fieldTypes.add(new SmallIntType());
fieldTypes.add(new IntType());
fieldTypes.add(new BigIntType());
fieldTypes.add(new BooleanType());
fieldTypes.add(new FloatType());
fieldTypes.add(new DoubleType());
fieldTypes.add(new VarCharType());
fieldTypes.add(new VarBinaryType());
fieldTypes.add(new DecimalType(10, 3));
fieldTypes.add(new DateType());
fieldTypes.add(new TimeType(0));
fieldTypes.add(new TimeType(2));
fieldTypes.add(new TimeType(4));
fieldTypes.add(new TimeType(8));
fieldTypes.add(new LocalZonedTimestampType(0));
fieldTypes.add(new LocalZonedTimestampType(2));
fieldTypes.add(new LocalZonedTimestampType(4));
fieldTypes.add(new LocalZonedTimestampType(8));
fieldTypes.add(new TimestampType(0));
fieldTypes.add(new TimestampType(2));
fieldTypes.add(new TimestampType(4));
fieldTypes.add(new TimestampType(8));
fieldTypes.add(new ArrayType(new VarCharType()));
rowFieldType = new RowType(Arrays.asList(new RowType.RowField("a", new IntType()), new RowType.RowField("b", new VarCharType()), new RowType.RowField("c", new ArrayType(new VarCharType())), new RowType.RowField("d", new TimestampType(2)), new RowType.RowField("e", new RowType(Arrays.asList(new RowType.RowField("e1", new IntType()), new RowType.RowField("e2", new VarCharType()))))));
fieldTypes.add(rowFieldType);
List<RowType.RowField> rowFields = new ArrayList<>();
for (int i = 0; i < fieldTypes.size(); i++) {
rowFields.add(new RowType.RowField("f" + i, fieldTypes.get(i)));
}
rowType = new RowType(rowFields);
allocator = ArrowUtils.getRootAllocator().newChildAllocator("stdout", 0, Long.MAX_VALUE);
}
use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.
the class ArrowUtilsTest method init.
@BeforeClass
public static void init() {
testFields = new ArrayList<>();
testFields.add(Tuple5.of("f1", new TinyIntType(), new ArrowType.Int(8, true), TinyIntWriter.TinyIntWriterForRow.class, ArrowTinyIntColumnVector.class));
testFields.add(Tuple5.of("f2", new SmallIntType(), new ArrowType.Int(8 * 2, true), SmallIntWriter.SmallIntWriterForRow.class, ArrowSmallIntColumnVector.class));
testFields.add(Tuple5.of("f3", new IntType(), new ArrowType.Int(8 * 4, true), IntWriter.IntWriterForRow.class, ArrowIntColumnVector.class));
testFields.add(Tuple5.of("f4", new BigIntType(), new ArrowType.Int(8 * 8, true), BigIntWriter.BigIntWriterForRow.class, ArrowBigIntColumnVector.class));
testFields.add(Tuple5.of("f5", new BooleanType(), new ArrowType.Bool(), BooleanWriter.BooleanWriterForRow.class, ArrowBooleanColumnVector.class));
testFields.add(Tuple5.of("f6", new FloatType(), new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), FloatWriter.FloatWriterForRow.class, ArrowFloatColumnVector.class));
testFields.add(Tuple5.of("f7", new DoubleType(), new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), DoubleWriter.DoubleWriterForRow.class, ArrowDoubleColumnVector.class));
testFields.add(Tuple5.of("f8", new VarCharType(), ArrowType.Utf8.INSTANCE, VarCharWriter.VarCharWriterForRow.class, ArrowVarCharColumnVector.class));
testFields.add(Tuple5.of("f9", new VarBinaryType(), ArrowType.Binary.INSTANCE, VarBinaryWriter.VarBinaryWriterForRow.class, ArrowVarBinaryColumnVector.class));
testFields.add(Tuple5.of("f10", new DecimalType(10, 3), new ArrowType.Decimal(10, 3), DecimalWriter.DecimalWriterForRow.class, ArrowDecimalColumnVector.class));
testFields.add(Tuple5.of("f11", new DateType(), new ArrowType.Date(DateUnit.DAY), DateWriter.DateWriterForRow.class, ArrowDateColumnVector.class));
testFields.add(Tuple5.of("f13", new TimeType(0), new ArrowType.Time(TimeUnit.SECOND, 32), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
testFields.add(Tuple5.of("f14", new TimeType(2), new ArrowType.Time(TimeUnit.MILLISECOND, 32), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
testFields.add(Tuple5.of("f15", new TimeType(4), new ArrowType.Time(TimeUnit.MICROSECOND, 64), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
testFields.add(Tuple5.of("f16", new TimeType(8), new ArrowType.Time(TimeUnit.NANOSECOND, 64), TimeWriter.TimeWriterForRow.class, ArrowTimeColumnVector.class));
testFields.add(Tuple5.of("f17", new LocalZonedTimestampType(0), new ArrowType.Timestamp(TimeUnit.SECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
testFields.add(Tuple5.of("f18", new LocalZonedTimestampType(2), new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
testFields.add(Tuple5.of("f19", new LocalZonedTimestampType(4), new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
testFields.add(Tuple5.of("f20", new LocalZonedTimestampType(8), new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
testFields.add(Tuple5.of("f21", new TimestampType(0), new ArrowType.Timestamp(TimeUnit.SECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
testFields.add(Tuple5.of("f22", new TimestampType(2), new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
testFields.add(Tuple5.of("f23", new TimestampType(4), new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
testFields.add(Tuple5.of("f24", new TimestampType(8), new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), TimestampWriter.TimestampWriterForRow.class, ArrowTimestampColumnVector.class));
testFields.add(Tuple5.of("f25", new ArrayType(new VarCharType()), ArrowType.List.INSTANCE, ArrayWriter.ArrayWriterForRow.class, ArrowArrayColumnVector.class));
RowType rowFieldType = new RowType(Arrays.asList(new RowType.RowField("a", new IntType()), new RowType.RowField("b", new VarCharType()), new RowType.RowField("c", new ArrayType(new VarCharType())), new RowType.RowField("d", new TimestampType(2)), new RowType.RowField("e", new RowType((Arrays.asList(new RowType.RowField("e1", new IntType()), new RowType.RowField("e2", new VarCharType())))))));
testFields.add(Tuple5.of("f26", rowFieldType, ArrowType.Struct.INSTANCE, RowWriter.RowWriterForRow.class, ArrowRowColumnVector.class));
List<RowType.RowField> rowFields = new ArrayList<>();
for (Tuple5<String, LogicalType, ArrowType, Class<?>, Class<?>> field : testFields) {
rowFields.add(new RowType.RowField(field.f0, field.f1));
}
rowType = new RowType(rowFields);
allocator = ArrowUtils.getRootAllocator().newChildAllocator("stdout", 0, Long.MAX_VALUE);
}
use of org.apache.flink.table.types.logical.DecimalType in project flink by apache.
the class TableauStyle method columnWidthsByType.
// Package private and private static methods to deal with complexity of string writing and
// formatting
/**
* Try to derive column width based on column types. If result set is not small enough to be
* stored in java heap memory, we can't determine column widths based on column values.
*/
static int[] columnWidthsByType(List<Column> columns, int maxColumnWidth, boolean printNullAsEmpty, boolean printRowKind) {
// fill width with field names first
final int[] colWidths = columns.stream().mapToInt(col -> col.getName().length()).toArray();
// determine proper column width based on types
for (int i = 0; i < columns.size(); ++i) {
LogicalType type = columns.get(i).getDataType().getLogicalType();
int len;
switch(type.getTypeRoot()) {
case TINYINT:
// extra for negative value
len = TinyIntType.PRECISION + 1;
break;
case SMALLINT:
// extra for negative value
len = SmallIntType.PRECISION + 1;
break;
case INTEGER:
// extra for negative value
len = IntType.PRECISION + 1;
break;
case BIGINT:
// extra for negative value
len = BigIntType.PRECISION + 1;
break;
case DECIMAL:
len = ((DecimalType) type).getPrecision() + // extra for negative value and decimal point
2;
break;
case BOOLEAN:
// "true" or "false"
len = 5;
break;
case DATE:
// e.g. 9999-12-31
len = 10;
break;
case TIME_WITHOUT_TIME_ZONE:
int precision = ((TimeType) type).getPrecision();
// 23:59:59[.999999999]
len = precision == 0 ? 8 : precision + 9;
break;
case TIMESTAMP_WITHOUT_TIME_ZONE:
precision = ((TimestampType) type).getPrecision();
len = timestampTypeColumnWidth(precision);
break;
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
precision = ((LocalZonedTimestampType) type).getPrecision();
len = timestampTypeColumnWidth(precision);
break;
default:
len = maxColumnWidth;
}
// adjust column width with potential null values
len = printNullAsEmpty ? len : Math.max(len, PrintStyle.NULL_VALUE.length());
colWidths[i] = Math.max(colWidths[i], len);
}
// add an extra column for row kind if necessary
if (printRowKind) {
final int[] ret = new int[columns.size() + 1];
ret[0] = ROW_KIND_COLUMN.length();
System.arraycopy(colWidths, 0, ret, 1, columns.size());
return ret;
} else {
return colWidths;
}
}
Aggregations