Search in sources :

Example 46 with DecimalType

use of io.trino.spi.type.DecimalType in project trino by trinodb.

the class HiveWriteUtils method getRowColumnInspector.

public static ObjectInspector getRowColumnInspector(Type type) {
    if (type.equals(BOOLEAN)) {
        return writableBooleanObjectInspector;
    }
    if (type.equals(BIGINT)) {
        return writableLongObjectInspector;
    }
    if (type.equals(INTEGER)) {
        return writableIntObjectInspector;
    }
    if (type.equals(SMALLINT)) {
        return writableShortObjectInspector;
    }
    if (type.equals(TINYINT)) {
        return writableByteObjectInspector;
    }
    if (type.equals(REAL)) {
        return writableFloatObjectInspector;
    }
    if (type.equals(DOUBLE)) {
        return writableDoubleObjectInspector;
    }
    if (type instanceof VarcharType) {
        VarcharType varcharType = (VarcharType) type;
        if (varcharType.isUnbounded()) {
            // Values for such columns must be stored as STRING in Hive
            return writableStringObjectInspector;
        }
        if (varcharType.getBoundedLength() <= HiveVarchar.MAX_VARCHAR_LENGTH) {
            // VARCHAR columns with the length less than or equal to 65535 are supported natively by Hive
            return getPrimitiveWritableObjectInspector(getVarcharTypeInfo(varcharType.getBoundedLength()));
        }
    }
    if (type instanceof CharType) {
        CharType charType = (CharType) type;
        int charLength = charType.getLength();
        return getPrimitiveWritableObjectInspector(getCharTypeInfo(charLength));
    }
    if (type.equals(VARBINARY)) {
        return writableBinaryObjectInspector;
    }
    if (type.equals(DATE)) {
        return writableDateObjectInspector;
    }
    if (type instanceof TimestampType) {
        return writableTimestampObjectInspector;
    }
    if (type instanceof DecimalType) {
        DecimalType decimalType = (DecimalType) type;
        return getPrimitiveWritableObjectInspector(new DecimalTypeInfo(decimalType.getPrecision(), decimalType.getScale()));
    }
    if (isArrayType(type) || isMapType(type) || isRowType(type)) {
        return getJavaObjectInspector(type);
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) VarcharType(io.trino.spi.type.VarcharType) TimestampType(io.trino.spi.type.TimestampType) DecimalType(io.trino.spi.type.DecimalType) CharType(io.trino.spi.type.CharType)

Example 47 with DecimalType

use of io.trino.spi.type.DecimalType in project trino by trinodb.

the class HiveCoercionPolicy method canCoerce.

private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType) {
    Type fromType = typeManager.getType(fromHiveType.getTypeSignature());
    Type toType = typeManager.getType(toHiveType.getTypeSignature());
    if (fromType instanceof VarcharType) {
        return toType instanceof VarcharType || toHiveType.equals(HIVE_BYTE) || toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG);
    }
    if (toType instanceof VarcharType) {
        return fromHiveType.equals(HIVE_BYTE) || fromHiveType.equals(HIVE_SHORT) || fromHiveType.equals(HIVE_INT) || fromHiveType.equals(HIVE_LONG);
    }
    if (fromHiveType.equals(HIVE_BYTE)) {
        return toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG);
    }
    if (fromHiveType.equals(HIVE_SHORT)) {
        return toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG);
    }
    if (fromHiveType.equals(HIVE_INT)) {
        return toHiveType.equals(HIVE_LONG);
    }
    if (fromHiveType.equals(HIVE_FLOAT)) {
        return toHiveType.equals(HIVE_DOUBLE) || toType instanceof DecimalType;
    }
    if (fromHiveType.equals(HIVE_DOUBLE)) {
        return toHiveType.equals(HIVE_FLOAT) || toType instanceof DecimalType;
    }
    if (fromType instanceof DecimalType) {
        return toType instanceof DecimalType || toHiveType.equals(HIVE_FLOAT) || toHiveType.equals(HIVE_DOUBLE);
    }
    return canCoerceForList(fromHiveType, toHiveType) || canCoerceForMap(fromHiveType, toHiveType) || canCoerceForStruct(fromHiveType, toHiveType);
}
Also used : Type(io.trino.spi.type.Type) HiveType(io.trino.plugin.hive.HiveType) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType)

Example 48 with DecimalType

use of io.trino.spi.type.DecimalType in project trino by trinodb.

the class HiveUtil method getPrefilledColumnValue.

public static NullableValue getPrefilledColumnValue(HiveColumnHandle columnHandle, HivePartitionKey partitionKey, Path path, OptionalInt bucketNumber, long fileSize, long fileModifiedTime, String partitionName) {
    String columnValue;
    if (partitionKey != null) {
        columnValue = partitionKey.getValue();
    } else if (isPathColumnHandle(columnHandle)) {
        columnValue = path.toString();
    } else if (isBucketColumnHandle(columnHandle)) {
        columnValue = String.valueOf(bucketNumber.getAsInt());
    } else if (isFileSizeColumnHandle(columnHandle)) {
        columnValue = String.valueOf(fileSize);
    } else if (isFileModifiedTimeColumnHandle(columnHandle)) {
        columnValue = HIVE_TIMESTAMP_PARSER.print(fileModifiedTime);
    } else if (isPartitionColumnHandle(columnHandle)) {
        columnValue = partitionName;
    } else {
        throw new TrinoException(NOT_SUPPORTED, "unsupported hidden column: " + columnHandle);
    }
    byte[] bytes = columnValue.getBytes(UTF_8);
    String name = columnHandle.getName();
    Type type = columnHandle.getType();
    if (isHiveNull(bytes)) {
        return NullableValue.asNull(type);
    } else if (type.equals(BOOLEAN)) {
        return NullableValue.of(type, booleanPartitionKey(columnValue, name));
    } else if (type.equals(BIGINT)) {
        return NullableValue.of(type, bigintPartitionKey(columnValue, name));
    } else if (type.equals(INTEGER)) {
        return NullableValue.of(type, integerPartitionKey(columnValue, name));
    } else if (type.equals(SMALLINT)) {
        return NullableValue.of(type, smallintPartitionKey(columnValue, name));
    } else if (type.equals(TINYINT)) {
        return NullableValue.of(type, tinyintPartitionKey(columnValue, name));
    } else if (type.equals(REAL)) {
        return NullableValue.of(type, floatPartitionKey(columnValue, name));
    } else if (type.equals(DOUBLE)) {
        return NullableValue.of(type, doublePartitionKey(columnValue, name));
    } else if (type instanceof VarcharType) {
        return NullableValue.of(type, varcharPartitionKey(columnValue, name, type));
    } else if (type instanceof CharType) {
        return NullableValue.of(type, charPartitionKey(columnValue, name, type));
    } else if (type.equals(DATE)) {
        return NullableValue.of(type, datePartitionKey(columnValue, name));
    } else if (type.equals(TIMESTAMP_MILLIS)) {
        return NullableValue.of(type, timestampPartitionKey(columnValue, name));
    } else if (type.equals(TIMESTAMP_TZ_MILLIS)) {
        // used for $file_modified_time
        return NullableValue.of(type, packDateTimeWithZone(floorDiv(timestampPartitionKey(columnValue, name), MICROSECONDS_PER_MILLISECOND), DateTimeZone.getDefault().getID()));
    } else if (isShortDecimal(type)) {
        return NullableValue.of(type, shortDecimalPartitionKey(columnValue, (DecimalType) type, name));
    } else if (isLongDecimal(type)) {
        return NullableValue.of(type, longDecimalPartitionKey(columnValue, (DecimalType) type, name));
    } else if (type.equals(VarbinaryType.VARBINARY)) {
        return NullableValue.of(type, utf8Slice(columnValue));
    }
    throw new TrinoException(NOT_SUPPORTED, format("Unsupported column type %s for prefilled column: %s", type.getDisplayName(), name));
}
Also used : HiveType(io.trino.plugin.hive.HiveType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) MapType(io.trino.spi.type.MapType) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) VarcharType(io.trino.spi.type.VarcharType) TrinoException(io.trino.spi.TrinoException) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) DecimalType(io.trino.spi.type.DecimalType) CharType(io.trino.spi.type.CharType)

Example 49 with DecimalType

use of io.trino.spi.type.DecimalType in project trino by trinodb.

the class OrcTester method preprocessWriteValueHive.

private static Object preprocessWriteValueHive(Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type instanceof CharType) {
        return new HiveChar((String) value, ((CharType) type).getLength());
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP_MILLIS) || type.equals(TIMESTAMP_MICROS) || type.equals(TIMESTAMP_NANOS)) {
        LocalDateTime dateTime = ((SqlTimestamp) value).toLocalDateTime();
        return Timestamp.ofEpochSecond(dateTime.toEpochSecond(ZoneOffset.UTC), dateTime.getNano());
    }
    if (type.equals(TIMESTAMP_TZ_MILLIS) || type.equals(TIMESTAMP_TZ_MICROS) || type.equals(TIMESTAMP_TZ_NANOS)) {
        SqlTimestampWithTimeZone timestamp = (SqlTimestampWithTimeZone) value;
        int nanosOfMilli = roundDiv(timestamp.getPicosOfMilli(), PICOSECONDS_PER_NANOSECOND);
        return Timestamp.ofEpochMilli(timestamp.getEpochMillis(), nanosOfMilli);
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type instanceof ArrayType) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueHive(elementType, element)).collect(toList());
    }
    if (type instanceof MapType) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueHive(keyType, entry.getKey()), preprocessWriteValueHive(valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type instanceof RowType) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueHive(fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : LocalDateTime(java.time.LocalDateTime) OrcUtil(org.apache.hadoop.hive.ql.io.orc.OrcUtil) DateTimeZone(org.joda.time.DateTimeZone) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) TIMESTAMP_TZ_NANOS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_NANOS) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) NANOSECONDS_PER_MICROSECOND(io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) LongTimestampWithTimeZone(io.trino.spi.type.LongTimestampWithTimeZone) NONE(io.trino.orc.metadata.CompressionKind.NONE) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) Decimals.rescale(io.trino.spi.type.Decimals.rescale) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) Arrays.asList(java.util.Arrays.asList) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ZoneOffset(java.time.ZoneOffset) Assert.assertFalse(org.testng.Assert.assertFalse) IntWritable(org.apache.hadoop.io.IntWritable) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PICOSECONDS_PER_MICROSECOND(io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) UTC_KEY(io.trino.spi.type.TimeZoneKey.UTC_KEY) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DateTimeEncoding.packDateTimeWithZone(io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) ReaderOptions(org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) LocalDateTime(java.time.LocalDateTime) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) DataSize.succinctBytes(io.airlift.units.DataSize.succinctBytes) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) PrimitiveObjectInspectorFactory.javaTimestampTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampTZObjectInspector) ArrayList(java.util.ArrayList) TIMESTAMP_TZ_MILLIS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS) Lists(com.google.common.collect.Lists) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) BOTH(io.trino.orc.OrcWriteValidation.OrcWriteValidationMode.BOTH) OrcType(io.trino.orc.metadata.OrcType) Int128(io.trino.spi.type.Int128) TIMESTAMP_TZ_MICROS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MICROS) Properties(java.util.Properties) MapType(io.trino.spi.type.MapType) AbstractIterator(com.google.common.collect.AbstractIterator) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) StandardTypes(io.trino.spi.type.StandardTypes) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) CompressionKind(io.trino.orc.metadata.CompressionKind) File(java.io.File) ZLIB(io.trino.orc.metadata.CompressionKind.ZLIB) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) MAX_BATCH_SIZE(io.trino.orc.OrcReader.MAX_BATCH_SIZE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TIMESTAMP_MICROS(io.trino.spi.type.TimestampType.TIMESTAMP_MICROS) SqlVarbinary(io.trino.spi.type.SqlVarbinary) VarbinaryType(io.trino.spi.type.VarbinaryType) Varchars.truncateToLength(io.trino.spi.type.Varchars.truncateToLength) CharType(io.trino.spi.type.CharType) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) FloatWritable(org.apache.hadoop.io.FloatWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) TestingOrcPredicate.createOrcPredicate(io.trino.orc.TestingOrcPredicate.createOrcPredicate) LongWritable(org.apache.hadoop.io.LongWritable) Timestamps.roundDiv(io.trino.spi.type.Timestamps.roundDiv) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) SqlTimestamp(io.trino.spi.type.SqlTimestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Block(io.trino.spi.block.Block) OrcConf(org.apache.orc.OrcConf) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) RowType(io.trino.spi.type.RowType) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) SESSION(io.trino.testing.TestingConnectorSession.SESSION) MICROSECONDS_PER_SECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND) ArrayType(io.trino.spi.type.ArrayType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SqlDate(io.trino.spi.type.SqlDate) Objects(java.util.Objects) TIMESTAMP_NANOS(io.trino.spi.type.TimestampType.TIMESTAMP_NANOS) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) DecimalType(io.trino.spi.type.DecimalType) IntStream(java.util.stream.IntStream) ORC_11(io.trino.orc.OrcTester.Format.ORC_11) TypeInfoFactory.getCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo) Assert.assertNull(org.testng.Assert.assertNull) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) PICOSECONDS_PER_NANOSECOND(io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ZSTD(io.trino.orc.metadata.CompressionKind.ZSTD) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Chars.truncateToLengthAndTrimSpaces(io.trino.spi.type.Chars.truncateToLengthAndTrimSpaces) ByteWritable(org.apache.hadoop.io.ByteWritable) RowFieldName(io.trino.spi.type.RowFieldName) BytesWritable(org.apache.hadoop.io.BytesWritable) SNAPPY(io.trino.orc.metadata.CompressionKind.SNAPPY) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) ORC_12(io.trino.orc.OrcTester.Format.ORC_12) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) LongTimestamp(io.trino.spi.type.LongTimestamp) NANOSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.NANOSECONDS_PER_MILLISECOND) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Serializer(org.apache.hadoop.hive.serde2.Serializer) LZ4(io.trino.orc.metadata.CompressionKind.LZ4) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) VarcharType(io.trino.spi.type.VarcharType) HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SqlVarbinary(io.trino.spi.type.SqlVarbinary) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) Arrays.asList(java.util.Arrays.asList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) OrcType(io.trino.orc.metadata.OrcType) MapType(io.trino.spi.type.MapType) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DecimalType(io.trino.spi.type.DecimalType) CharType(io.trino.spi.type.CharType) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 50 with DecimalType

use of io.trino.spi.type.DecimalType in project trino by trinodb.

the class DeltaLakeParquetStatisticsUtils method getMax.

private static Optional<Object> getMax(Type type, Statistics<?> statistics) {
    if (statistics.genericGetMax() == null) {
        return Optional.empty();
    }
    if (type.equals(DateType.DATE)) {
        checkArgument(statistics instanceof IntStatistics, "Column with DATE type contained invalid statistics: %s", statistics);
        IntStatistics intStatistics = (IntStatistics) statistics;
        LocalDate date = LocalDate.ofEpochDay(intStatistics.genericGetMax());
        return Optional.of(date.format(ISO_LOCAL_DATE));
    }
    if (type instanceof TimestampWithTimeZoneType) {
        if (statistics instanceof LongStatistics) {
            Instant ts = Instant.ofEpochMilli(((LongStatistics) statistics).genericGetMax());
            return Optional.of(ISO_INSTANT.format(ZonedDateTime.ofInstant(ts, UTC)));
        } else if (statistics instanceof BinaryStatistics) {
            DecodedTimestamp decodedTimestamp = decodeInt96Timestamp(((BinaryStatistics) statistics).genericGetMax());
            Instant ts = Instant.ofEpochSecond(decodedTimestamp.getEpochSeconds(), decodedTimestamp.getNanosOfSecond());
            ZonedDateTime zonedDateTime = ZonedDateTime.ofInstant(ts, UTC);
            ZonedDateTime truncatedToMillis = zonedDateTime.truncatedTo(MILLIS);
            if (truncatedToMillis.isBefore(zonedDateTime)) {
                truncatedToMillis = truncatedToMillis.plus(1, MILLIS);
            }
            return Optional.of(ISO_INSTANT.format(truncatedToMillis));
        }
    }
    if (type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER)) {
        checkArgument(statistics instanceof IntStatistics || statistics instanceof LongStatistics, "Column with %s type contained invalid statistics: %s", type, statistics);
        return Optional.of(statistics.genericGetMax());
    }
    if (type.equals(REAL)) {
        checkArgument(statistics instanceof FloatStatistics, "Column with REAL type contained invalid statistics: %s", statistics);
        return Optional.of(((FloatStatistics) statistics).genericGetMax());
    }
    if (type.equals(DOUBLE)) {
        checkArgument(statistics instanceof DoubleStatistics, "Column with DOUBLE type contained invalid statistics: %s", statistics);
        return Optional.of(((DoubleStatistics) statistics).genericGetMax());
    }
    if (type instanceof DecimalType) {
        LogicalTypeAnnotation logicalType = statistics.type().getLogicalTypeAnnotation();
        checkArgument(logicalType instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation, "DECIMAL column had invalid Parquet Logical Type: %s", logicalType);
        int scale = ((LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalType).getScale();
        BigDecimal max;
        if (statistics instanceof IntStatistics) {
            max = BigDecimal.valueOf(((IntStatistics) statistics).getMax()).movePointLeft(scale);
            return Optional.of(max.toPlainString());
        } else if (statistics instanceof LongStatistics) {
            max = BigDecimal.valueOf(((LongStatistics) statistics).getMax()).movePointLeft(scale);
            return Optional.of(max.toPlainString());
        } else if (statistics instanceof BinaryStatistics) {
            BigInteger base = new BigInteger(((BinaryStatistics) statistics).genericGetMax().getBytes());
            max = new BigDecimal(base, scale);
            return Optional.of(max.toPlainString());
        }
    }
    if (type instanceof VarcharType) {
        return Optional.of(new String(((BinaryStatistics) statistics).genericGetMax().getBytes(), UTF_8));
    }
    if (type.equals(BOOLEAN)) {
        // Boolean columns do not collect min/max stats
        return Optional.empty();
    }
    LOG.warn("Accumulating Parquet statistics with Trino type: %s and Parquet statistics of type: %s is not supported", type, statistics);
    return Optional.empty();
}
Also used : FloatStatistics(org.apache.parquet.column.statistics.FloatStatistics) VarcharType(io.trino.spi.type.VarcharType) Instant(java.time.Instant) BinaryStatistics(org.apache.parquet.column.statistics.BinaryStatistics) LocalDate(java.time.LocalDate) BigDecimal(java.math.BigDecimal) DecodedTimestamp(io.trino.plugin.base.type.DecodedTimestamp) LongStatistics(org.apache.parquet.column.statistics.LongStatistics) IntStatistics(org.apache.parquet.column.statistics.IntStatistics) ZonedDateTime(java.time.ZonedDateTime) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) DoubleStatistics(org.apache.parquet.column.statistics.DoubleStatistics) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) DecimalType(io.trino.spi.type.DecimalType) BigInteger(java.math.BigInteger)

Aggregations

DecimalType (io.trino.spi.type.DecimalType)79 VarcharType (io.trino.spi.type.VarcharType)50 CharType (io.trino.spi.type.CharType)39 TrinoException (io.trino.spi.TrinoException)31 Type (io.trino.spi.type.Type)29 TimestampType (io.trino.spi.type.TimestampType)23 DecimalType.createDecimalType (io.trino.spi.type.DecimalType.createDecimalType)22 ArrayType (io.trino.spi.type.ArrayType)21 BigDecimal (java.math.BigDecimal)19 Int128 (io.trino.spi.type.Int128)16 BigInteger (java.math.BigInteger)15 Block (io.trino.spi.block.Block)14 Slice (io.airlift.slice.Slice)13 TimeType (io.trino.spi.type.TimeType)13 TimestampWithTimeZoneType (io.trino.spi.type.TimestampWithTimeZoneType)13 VarcharType.createUnboundedVarcharType (io.trino.spi.type.VarcharType.createUnboundedVarcharType)13 MapType (io.trino.spi.type.MapType)12 ArrayList (java.util.ArrayList)12 ImmutableList (com.google.common.collect.ImmutableList)11 RowType (io.trino.spi.type.RowType)11