Search in sources :

Example 1 with TIMESTAMP_MILLIS

use of io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS in project trino by trinodb.

the class ClickHouseClient method toColumnMapping.

@Override
public Optional<ColumnMapping> toColumnMapping(ConnectorSession session, Connection connection, JdbcTypeHandle typeHandle) {
    String jdbcTypeName = typeHandle.getJdbcTypeName().orElseThrow(() -> new TrinoException(JDBC_ERROR, "Type name is missing: " + typeHandle));
    Optional<ColumnMapping> mapping = getForcedMappingToVarchar(typeHandle);
    if (mapping.isPresent()) {
        return mapping;
    }
    ClickHouseColumn column = ClickHouseColumn.of("", jdbcTypeName);
    ClickHouseDataType columnDataType = column.getDataType();
    switch(columnDataType) {
        case UInt8:
            return Optional.of(ColumnMapping.longMapping(SMALLINT, ResultSet::getShort, uInt8WriteFunction()));
        case UInt16:
            return Optional.of(ColumnMapping.longMapping(INTEGER, ResultSet::getInt, uInt16WriteFunction()));
        case UInt32:
            return Optional.of(ColumnMapping.longMapping(BIGINT, ResultSet::getLong, uInt32WriteFunction()));
        case UInt64:
            return Optional.of(ColumnMapping.objectMapping(UINT64_TYPE, longDecimalReadFunction(UINT64_TYPE, UNNECESSARY), uInt64WriteFunction()));
        case IPv4:
            return Optional.of(ipAddressColumnMapping("IPv4StringToNum(?)"));
        case IPv6:
            return Optional.of(ipAddressColumnMapping("IPv6StringToNum(?)"));
        case Enum8:
        case Enum16:
            return Optional.of(ColumnMapping.sliceMapping(createUnboundedVarcharType(), varcharReadFunction(createUnboundedVarcharType()), varcharWriteFunction(), // TODO (https://github.com/trinodb/trino/issues/7100) Currently pushdown would not work and may require a custom bind expression
            DISABLE_PUSHDOWN));
        // FixedString(n)
        case FixedString:
        case String:
            if (isMapStringAsVarchar(session)) {
                return Optional.of(ColumnMapping.sliceMapping(createUnboundedVarcharType(), varcharReadFunction(createUnboundedVarcharType()), varcharWriteFunction(), DISABLE_PUSHDOWN));
            }
            // TODO (https://github.com/trinodb/trino/issues/7100) test & enable predicate pushdown
            return Optional.of(varbinaryColumnMapping());
        case UUID:
            return Optional.of(uuidColumnMapping());
        default:
    }
    switch(typeHandle.getJdbcType()) {
        case Types.TINYINT:
            return Optional.of(tinyintColumnMapping());
        case Types.SMALLINT:
            return Optional.of(smallintColumnMapping());
        case Types.INTEGER:
            return Optional.of(integerColumnMapping());
        case Types.BIGINT:
            return Optional.of(bigintColumnMapping());
        case Types.FLOAT:
        case Types.REAL:
            return Optional.of(ColumnMapping.longMapping(REAL, (resultSet, columnIndex) -> floatToRawIntBits(resultSet.getFloat(columnIndex)), realWriteFunction(), DISABLE_PUSHDOWN));
        case Types.DOUBLE:
            return Optional.of(doubleColumnMapping());
        case Types.DECIMAL:
            int decimalDigits = typeHandle.getRequiredDecimalDigits();
            int precision = typeHandle.getRequiredColumnSize();
            ColumnMapping decimalColumnMapping;
            if (getDecimalRounding(session) == ALLOW_OVERFLOW && precision > Decimals.MAX_PRECISION) {
                int scale = Math.min(decimalDigits, getDecimalDefaultScale(session));
                decimalColumnMapping = decimalColumnMapping(createDecimalType(Decimals.MAX_PRECISION, scale), getDecimalRoundingMode(session));
            } else {
                decimalColumnMapping = decimalColumnMapping(createDecimalType(precision, max(decimalDigits, 0)));
            }
            return Optional.of(new ColumnMapping(decimalColumnMapping.getType(), decimalColumnMapping.getReadFunction(), decimalColumnMapping.getWriteFunction(), // TODO (https://github.com/trinodb/trino/issues/7100) fix, enable and test decimal pushdown
            DISABLE_PUSHDOWN));
        case Types.DATE:
            return Optional.of(dateColumnMappingUsingLocalDate());
        case Types.TIMESTAMP:
            if (columnDataType == ClickHouseDataType.DateTime) {
                verify(typeHandle.getRequiredDecimalDigits() == 0, "Expected 0 as timestamp precision, but got %s", typeHandle.getRequiredDecimalDigits());
                return Optional.of(ColumnMapping.longMapping(TIMESTAMP_SECONDS, timestampReadFunction(TIMESTAMP_SECONDS), timestampSecondsWriteFunction()));
            }
            // TODO (https://github.com/trinodb/trino/issues/10537) Add support for Datetime64 type
            return Optional.of(timestampColumnMappingUsingSqlTimestampWithRounding(TIMESTAMP_MILLIS));
    }
    return Optional.empty();
}
Also used : UNNECESSARY(java.math.RoundingMode.UNNECESSARY) AggregateFunction(io.trino.spi.connector.AggregateFunction) StandardColumnMappings.bigintWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.bigintWriteFunction) NANOSECONDS_PER_MICROSECOND(io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND) ImplementCount(io.trino.plugin.jdbc.aggregation.ImplementCount) InetAddress(java.net.InetAddress) BigDecimal(java.math.BigDecimal) Slices.wrappedBuffer(io.airlift.slice.Slices.wrappedBuffer) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ResultSet(java.sql.ResultSet) Map(java.util.Map) StandardColumnMappings.doubleWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.doubleWriteFunction) BigInteger(java.math.BigInteger) DecimalSessionSessionProperties.getDecimalDefaultScale(io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalDefaultScale) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) MathContext(java.math.MathContext) ImplementAvgFloatingPoint(io.trino.plugin.jdbc.aggregation.ImplementAvgFloatingPoint) LongWriteFunction(io.trino.plugin.jdbc.LongWriteFunction) ImplementCountAll(io.trino.plugin.jdbc.aggregation.ImplementCountAll) PreparedStatement(java.sql.PreparedStatement) SchemaTableName(io.trino.spi.connector.SchemaTableName) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) StandardColumnMappings.smallintWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.smallintWriteFunction) StandardColumnMappings.longDecimalWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.longDecimalWriteFunction) AggregateFunctionRewriter(io.trino.plugin.base.aggregation.AggregateFunctionRewriter) ConnectionFactory(io.trino.plugin.jdbc.ConnectionFactory) JdbcConnectorExpressionRewriterBuilder(io.trino.plugin.jdbc.expression.JdbcConnectorExpressionRewriterBuilder) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) StandardColumnMappings.doubleColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.doubleColumnMapping) Slice(io.airlift.slice.Slice) StandardColumnMappings.timestampColumnMappingUsingSqlTimestampWithRounding(io.trino.plugin.jdbc.StandardColumnMappings.timestampColumnMappingUsingSqlTimestampWithRounding) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) LocalDateTime(java.time.LocalDateTime) StandardColumnMappings.booleanWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.booleanWriteFunction) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) ALLOW_OVERFLOW(io.trino.plugin.jdbc.DecimalConfig.DecimalMapping.ALLOW_OVERFLOW) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) StandardColumnMappings.bigintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.bigintColumnMapping) Enums(com.google.common.base.Enums) OptionalLong(java.util.OptionalLong) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) SQLException(java.sql.SQLException) String.join(java.lang.String.join) UuidType.javaUuidToTrinoUuid(io.trino.spi.type.UuidType.javaUuidToTrinoUuid) StandardColumnMappings.decimalColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.decimalColumnMapping) UuidType.trinoUuidToJavaUuid(io.trino.spi.type.UuidType.trinoUuidToJavaUuid) ColumnHandle(io.trino.spi.connector.ColumnHandle) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) ConnectorExpressionRewriter(io.trino.plugin.base.expression.ConnectorExpressionRewriter) SAMPLE_BY_PROPERTY(io.trino.plugin.clickhouse.ClickHouseTableProperties.SAMPLE_BY_PROPERTY) Math.floorDiv(java.lang.Math.floorDiv) AggregateFunctionRule(io.trino.plugin.base.aggregation.AggregateFunctionRule) Nullable(javax.annotation.Nullable) TIMESTAMP_SECONDS(io.trino.spi.type.TimestampType.TIMESTAMP_SECONDS) Int128(io.trino.spi.type.Int128) StandardColumnMappings.realWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.realWriteFunction) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) QueryBuilder(io.trino.plugin.jdbc.QueryBuilder) ClickHouseSessionProperties.isMapStringAsVarchar(io.trino.plugin.clickhouse.ClickHouseSessionProperties.isMapStringAsVarchar) StandardColumnMappings.smallintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.smallintColumnMapping) StandardTypes(io.trino.spi.type.StandardTypes) ConnectorSession(io.trino.spi.connector.ConnectorSession) UnknownHostException(java.net.UnknownHostException) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) IdentifierMapping(io.trino.plugin.jdbc.mapping.IdentifierMapping) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) DecimalSessionSessionProperties.getDecimalRoundingMode(io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalRoundingMode) TINYINT(io.trino.spi.type.TinyintType.TINYINT) InetAddresses(com.google.common.net.InetAddresses) StandardColumnMappings.shortDecimalWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.shortDecimalWriteFunction) StandardColumnMappings.varbinaryWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.varbinaryWriteFunction) JdbcExpression(io.trino.plugin.jdbc.JdbcExpression) WriteMapping(io.trino.plugin.jdbc.WriteMapping) PRIMARY_KEY_PROPERTY(io.trino.plugin.clickhouse.ClickHouseTableProperties.PRIMARY_KEY_PROPERTY) JDBC_ERROR(io.trino.plugin.jdbc.JdbcErrorCode.JDBC_ERROR) System.arraycopy(java.lang.System.arraycopy) BaseJdbcConfig(io.trino.plugin.jdbc.BaseJdbcConfig) Connection(java.sql.Connection) BiFunction(java.util.function.BiFunction) ImplementSum(io.trino.plugin.jdbc.aggregation.ImplementSum) ObjectWriteFunction(io.trino.plugin.jdbc.ObjectWriteFunction) StandardColumnMappings.dateReadFunctionUsingLocalDate(io.trino.plugin.jdbc.StandardColumnMappings.dateReadFunctionUsingLocalDate) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ColumnMapping(io.trino.plugin.jdbc.ColumnMapping) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Splitter(com.google.common.base.Splitter) TypeSignature(io.trino.spi.type.TypeSignature) INVALID_ARGUMENTS(io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS) ImmutableSet(com.google.common.collect.ImmutableSet) DecimalSessionSessionProperties.getDecimalRounding(io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalRounding) ImmutableMap(com.google.common.collect.ImmutableMap) ClickHouseColumn(com.clickhouse.client.ClickHouseColumn) MICROSECONDS_PER_SECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND) TrinoException(io.trino.spi.TrinoException) UUID(java.util.UUID) String.format(java.lang.String.format) List(java.util.List) JdbcTypeHandle(io.trino.plugin.jdbc.JdbcTypeHandle) StandardColumnMappings.longDecimalReadFunction(io.trino.plugin.jdbc.StandardColumnMappings.longDecimalReadFunction) BIGINT(io.trino.spi.type.BigintType.BIGINT) LocalDate(java.time.LocalDate) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) UTC(java.time.ZoneOffset.UTC) Optional(java.util.Optional) Math.max(java.lang.Math.max) StandardColumnMappings.varcharReadFunction(io.trino.plugin.jdbc.StandardColumnMappings.varcharReadFunction) StandardColumnMappings.tinyintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.tinyintColumnMapping) DecimalType(io.trino.spi.type.DecimalType) StandardColumnMappings.integerColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.integerColumnMapping) Types(java.sql.Types) StandardColumnMappings.varbinaryColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.varbinaryColumnMapping) SliceWriteFunction(io.trino.plugin.jdbc.SliceWriteFunction) Math.floorMod(java.lang.Math.floorMod) ClickHouseDataType(com.clickhouse.client.ClickHouseDataType) StandardColumnMappings.varcharWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.varcharWriteFunction) StandardColumnMappings.tinyintWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.tinyintWriteFunction) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) Shorts(com.google.common.primitives.Shorts) Inject(javax.inject.Inject) VarcharType(io.trino.spi.type.VarcharType) PARTITION_BY_PROPERTY(io.trino.plugin.clickhouse.ClickHouseTableProperties.PARTITION_BY_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) BaseJdbcClient(io.trino.plugin.jdbc.BaseJdbcClient) Math.toIntExact(java.lang.Math.toIntExact) RemoteTableName(io.trino.plugin.jdbc.RemoteTableName) ImplementMinMax(io.trino.plugin.jdbc.aggregation.ImplementMinMax) ORDER_BY_PROPERTY(io.trino.plugin.clickhouse.ClickHouseTableProperties.ORDER_BY_PROPERTY) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) ENGINE_PROPERTY(io.trino.plugin.clickhouse.ClickHouseTableProperties.ENGINE_PROPERTY) StandardColumnMappings.integerWriteFunction(io.trino.plugin.jdbc.StandardColumnMappings.integerWriteFunction) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) DISABLE_PUSHDOWN(io.trino.plugin.jdbc.PredicatePushdownController.DISABLE_PUSHDOWN) StandardColumnMappings.timestampReadFunction(io.trino.plugin.jdbc.StandardColumnMappings.timestampReadFunction) DateTimeFormatter(java.time.format.DateTimeFormatter) TypeManager(io.trino.spi.type.TypeManager) ClickHouseDataType(com.clickhouse.client.ClickHouseDataType) TrinoException(io.trino.spi.TrinoException) ClickHouseColumn(com.clickhouse.client.ClickHouseColumn) StandardColumnMappings.doubleColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.doubleColumnMapping) StandardColumnMappings.bigintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.bigintColumnMapping) StandardColumnMappings.decimalColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.decimalColumnMapping) StandardColumnMappings.smallintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.smallintColumnMapping) ColumnMapping(io.trino.plugin.jdbc.ColumnMapping) StandardColumnMappings.tinyintColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.tinyintColumnMapping) StandardColumnMappings.integerColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.integerColumnMapping) StandardColumnMappings.varbinaryColumnMapping(io.trino.plugin.jdbc.StandardColumnMappings.varbinaryColumnMapping) ImplementAvgFloatingPoint(io.trino.plugin.jdbc.aggregation.ImplementAvgFloatingPoint)

Example 2 with TIMESTAMP_MILLIS

use of io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS in project trino by trinodb.

the class RcFileTester method preprocessWriteValueOld.

private static Object preprocessWriteValueOld(Format format, Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP_MILLIS)) {
        long millis = ((SqlTimestamp) value).getMillis();
        if (format == Format.BINARY) {
            millis = HIVE_STORAGE_TIME_ZONE.convertLocalToUTC(millis, false);
        }
        return Timestamp.ofEpochMilli(millis);
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type instanceof ArrayType) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(format, elementType, element)).collect(toList());
    }
    if (type instanceof MapType) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueOld(format, keyType, entry.getKey()), preprocessWriteValueOld(format, valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type instanceof RowType) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueOld(format, fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY) DateTimeZone(org.joda.time.DateTimeZone) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) Decimals.rescale(io.trino.spi.type.Decimals.rescale) FileSplit(org.apache.hadoop.mapred.FileSplit) RcFileDecoderUtils.findFirstSyncPosition(io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) LazyBinaryArray(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray) IntWritable(org.apache.hadoop.io.IntWritable) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) MICROSECONDS_PER_MILLISECOND(io.trino.type.DateTimes.MICROSECONDS_PER_MILLISECOND) UncheckedIOException(java.io.UncheckedIOException) BooleanWritable(org.apache.hadoop.io.BooleanWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) StructObject(org.apache.hadoop.hive.serde2.StructObject) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) Functions.constant(com.google.common.base.Functions.constant) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) ArrayList(java.util.ArrayList) NONE(io.trino.rcfile.RcFileTester.Compression.NONE) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) Lists(com.google.common.collect.Lists) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) BZIP2(io.trino.rcfile.RcFileTester.Compression.BZIP2) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) HadoopNative(io.trino.hadoop.HadoopNative) LinkedHashSet(java.util.LinkedHashSet) Int128(io.trino.spi.type.Int128) Properties(java.util.Properties) MapType(io.trino.spi.type.MapType) AbstractIterator(com.google.common.collect.AbstractIterator) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) NULL(org.apache.hadoop.mapred.Reporter.NULL) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) SqlVarbinary(io.trino.spi.type.SqlVarbinary) SIZE_OF_LONG(io.airlift.slice.SizeOf.SIZE_OF_LONG) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) FloatWritable(org.apache.hadoop.io.FloatWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) Iterables.transform(com.google.common.collect.Iterables.transform) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) LongWritable(org.apache.hadoop.io.LongWritable) SNAPPY(io.trino.rcfile.RcFileTester.Compression.SNAPPY) TextRcFileEncoding(io.trino.rcfile.text.TextRcFileEncoding) SqlTimestamp(io.trino.spi.type.SqlTimestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Block(io.trino.spi.block.Block) PRESTO_RCFILE_WRITER_VERSION(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) RowType(io.trino.spi.type.RowType) SIZE_OF_INT(io.airlift.slice.SizeOf.SIZE_OF_INT) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) SESSION(io.trino.testing.TestingConnectorSession.SESSION) ArrayType(io.trino.spi.type.ArrayType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SqlDate(io.trino.spi.type.SqlDate) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) LZ4(io.trino.rcfile.RcFileTester.Compression.LZ4) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) DecimalType(io.trino.spi.type.DecimalType) MAP(io.trino.spi.type.StandardTypes.MAP) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Assert.assertNull(org.testng.Assert.assertNull) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) VarcharType(io.trino.spi.type.VarcharType) OutputStreamSliceOutput(io.airlift.slice.OutputStreamSliceOutput) COMPRESS_CODEC(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS_CODEC) ImmutableList(com.google.common.collect.ImmutableList) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Math.toIntExact(java.lang.Math.toIntExact) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) FileInputStream(java.io.FileInputStream) JobConf(org.apache.hadoop.mapred.JobConf) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) Collectors.toList(java.util.stream.Collectors.toList) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Closeable(java.io.Closeable) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Collections(java.util.Collections) InputStream(java.io.InputStream) ZLIB(io.trino.rcfile.RcFileTester.Compression.ZLIB) VarcharType(io.trino.spi.type.VarcharType) HashMap(java.util.HashMap) SqlVarbinary(io.trino.spi.type.SqlVarbinary) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) MapType(io.trino.spi.type.MapType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) StructObject(org.apache.hadoop.hive.serde2.StructObject) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) ImmutableMap(com.google.common.collect.ImmutableMap) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) HashMap(java.util.HashMap)

Example 3 with TIMESTAMP_MILLIS

use of io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS in project trino by trinodb.

the class TupleDomainOrcPredicate method getDomain.

@VisibleForTesting
public static Domain getDomain(Type type, long rowCount, ColumnStatistics columnStatistics) {
    if (rowCount == 0) {
        return Domain.none(type);
    }
    if (columnStatistics == null) {
        return Domain.all(type);
    }
    if (columnStatistics.hasNumberOfValues() && columnStatistics.getNumberOfValues() == 0) {
        return Domain.onlyNull(type);
    }
    boolean hasNullValue = columnStatistics.getNumberOfValues() != rowCount;
    if (type instanceof TimeType && columnStatistics.getIntegerStatistics() != null) {
        // This is the representation of TIME used by Iceberg
        return createDomain(type, hasNullValue, columnStatistics.getIntegerStatistics(), value -> ((long) value) * Timestamps.PICOSECONDS_PER_MICROSECOND);
    }
    if (type.getJavaType() == boolean.class && columnStatistics.getBooleanStatistics() != null) {
        BooleanStatistics booleanStatistics = columnStatistics.getBooleanStatistics();
        boolean hasTrueValues = (booleanStatistics.getTrueValueCount() != 0);
        boolean hasFalseValues = (columnStatistics.getNumberOfValues() != booleanStatistics.getTrueValueCount());
        if (hasTrueValues && hasFalseValues) {
            return Domain.all(BOOLEAN);
        }
        if (hasTrueValues) {
            return Domain.create(ValueSet.of(BOOLEAN, true), hasNullValue);
        }
        if (hasFalseValues) {
            return Domain.create(ValueSet.of(BOOLEAN, false), hasNullValue);
        }
    } else if (isShortDecimal(type) && columnStatistics.getDecimalStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDecimalStatistics(), value -> rescale(value, (DecimalType) type).unscaledValue().longValue());
    } else if (isLongDecimal(type) && columnStatistics.getDecimalStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDecimalStatistics(), value -> Int128.valueOf(rescale(value, (DecimalType) type).unscaledValue()));
    } else if (type instanceof CharType && columnStatistics.getStringStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getStringStatistics(), value -> truncateToLengthAndTrimSpaces(value, type));
    } else if (type instanceof VarcharType && columnStatistics.getStringStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getStringStatistics());
    } else if (type instanceof DateType && columnStatistics.getDateStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDateStatistics(), value -> (long) value);
    } else if ((type.equals(TIMESTAMP_MILLIS) || type.equals(TIMESTAMP_MICROS)) && columnStatistics.getTimestampStatistics() != null) {
        // upper bound of the domain we create must be adjusted accordingly, to includes the rounded timestamp.
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), min -> min * MICROSECONDS_PER_MILLISECOND, max -> (max + 1) * MICROSECONDS_PER_MILLISECOND);
    } else if (type.equals(TIMESTAMP_NANOS) && columnStatistics.getTimestampStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), min -> new LongTimestamp(min * MICROSECONDS_PER_MILLISECOND, 0), max -> new LongTimestamp((max + 1) * MICROSECONDS_PER_MILLISECOND, 0));
    } else if (type.equals(TIMESTAMP_TZ_MILLIS) && columnStatistics.getTimestampStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), value -> packDateTimeWithZone(value, UTC_KEY));
    } else if (type.equals(TIMESTAMP_TZ_MICROS) && (columnStatistics.getTimestampStatistics() != null)) {
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), min -> LongTimestampWithTimeZone.fromEpochMillisAndFraction(min, 0, UTC_KEY), max -> LongTimestampWithTimeZone.fromEpochMillisAndFraction(max, 999_000_000, UTC_KEY));
    } else if (type.equals(TIMESTAMP_TZ_NANOS) && columnStatistics.getTimestampStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), min -> LongTimestampWithTimeZone.fromEpochMillisAndFraction(min, 0, UTC_KEY), max -> LongTimestampWithTimeZone.fromEpochMillisAndFraction(max, 999_999_000, UTC_KEY));
    } else if (type.getJavaType() == long.class && columnStatistics.getIntegerStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getIntegerStatistics());
    } else if (type.getJavaType() == double.class && columnStatistics.getDoubleStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDoubleStatistics());
    } else if (REAL.equals(type) && columnStatistics.getDoubleStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDoubleStatistics(), value -> (long) floatToRawIntBits(value.floatValue()));
    }
    return Domain.create(ValueSet.all(type), hasNullValue);
}
Also used : MICROSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND) DateType(io.trino.spi.type.DateType) TIMESTAMP_TZ_NANOS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_NANOS) LongTimestampWithTimeZone(io.trino.spi.type.LongTimestampWithTimeZone) Decimals.rescale(io.trino.spi.type.Decimals.rescale) RangeStatistics(io.trino.orc.metadata.statistics.RangeStatistics) INTEGER(io.trino.spi.type.IntegerType.INTEGER) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) Range(io.trino.spi.predicate.Range) UTC_KEY(io.trino.spi.type.TimeZoneKey.UTC_KEY) Domain(io.trino.spi.predicate.Domain) Collection(java.util.Collection) DateTimeEncoding.packDateTimeWithZone(io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone) ValueSet(io.trino.spi.predicate.ValueSet) TIMESTAMP_NANOS(io.trino.spi.type.TimestampType.TIMESTAMP_NANOS) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) DecimalType(io.trino.spi.type.DecimalType) ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Timestamps(io.trino.spi.type.Timestamps) Slice(io.airlift.slice.Slice) TimeType(io.trino.spi.type.TimeType) Decimals.isLongDecimal(io.trino.spi.type.Decimals.isLongDecimal) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) Type(io.trino.spi.type.Type) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) Function(java.util.function.Function) ArrayList(java.util.ArrayList) VarcharType(io.trino.spi.type.VarcharType) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) TIMESTAMP_TZ_MILLIS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS) ImmutableList(com.google.common.collect.ImmutableList) Chars.truncateToLengthAndTrimSpaces(io.trino.spi.type.Chars.truncateToLengthAndTrimSpaces) Objects.requireNonNull(java.util.Objects.requireNonNull) Math.floorDiv(java.lang.Math.floorDiv) Decimals.isShortDecimal(io.trino.spi.type.Decimals.isShortDecimal) Int128(io.trino.spi.type.Int128) TIMESTAMP_TZ_MICROS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MICROS) LongTimestamp(io.trino.spi.type.LongTimestamp) BloomFilter(io.trino.orc.metadata.statistics.BloomFilter) ColumnMetadata(io.trino.orc.metadata.ColumnMetadata) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TIMESTAMP_MICROS(io.trino.spi.type.TimestampType.TIMESTAMP_MICROS) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) BooleanStatistics(io.trino.orc.metadata.statistics.BooleanStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TINYINT(io.trino.spi.type.TinyintType.TINYINT) OrcColumnId(io.trino.orc.metadata.OrcColumnId) LongTimestamp(io.trino.spi.type.LongTimestamp) VarcharType(io.trino.spi.type.VarcharType) BooleanStatistics(io.trino.orc.metadata.statistics.BooleanStatistics) DecimalType(io.trino.spi.type.DecimalType) CharType(io.trino.spi.type.CharType) DateType(io.trino.spi.type.DateType) TimeType(io.trino.spi.type.TimeType) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with TIMESTAMP_MILLIS

use of io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS in project trino by trinodb.

the class OrcTester method preprocessWriteValueHive.

private static Object preprocessWriteValueHive(Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type instanceof CharType) {
        return new HiveChar((String) value, ((CharType) type).getLength());
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP_MILLIS) || type.equals(TIMESTAMP_MICROS) || type.equals(TIMESTAMP_NANOS)) {
        LocalDateTime dateTime = ((SqlTimestamp) value).toLocalDateTime();
        return Timestamp.ofEpochSecond(dateTime.toEpochSecond(ZoneOffset.UTC), dateTime.getNano());
    }
    if (type.equals(TIMESTAMP_TZ_MILLIS) || type.equals(TIMESTAMP_TZ_MICROS) || type.equals(TIMESTAMP_TZ_NANOS)) {
        SqlTimestampWithTimeZone timestamp = (SqlTimestampWithTimeZone) value;
        int nanosOfMilli = roundDiv(timestamp.getPicosOfMilli(), PICOSECONDS_PER_NANOSECOND);
        return Timestamp.ofEpochMilli(timestamp.getEpochMillis(), nanosOfMilli);
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type instanceof ArrayType) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueHive(elementType, element)).collect(toList());
    }
    if (type instanceof MapType) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueHive(keyType, entry.getKey()), preprocessWriteValueHive(valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type instanceof RowType) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueHive(fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : LocalDateTime(java.time.LocalDateTime) OrcUtil(org.apache.hadoop.hive.ql.io.orc.OrcUtil) DateTimeZone(org.joda.time.DateTimeZone) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) TIMESTAMP_TZ_NANOS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_NANOS) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) NANOSECONDS_PER_MICROSECOND(io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) LongTimestampWithTimeZone(io.trino.spi.type.LongTimestampWithTimeZone) NONE(io.trino.orc.metadata.CompressionKind.NONE) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) Decimals.rescale(io.trino.spi.type.Decimals.rescale) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) Arrays.asList(java.util.Arrays.asList) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ZoneOffset(java.time.ZoneOffset) Assert.assertFalse(org.testng.Assert.assertFalse) IntWritable(org.apache.hadoop.io.IntWritable) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PICOSECONDS_PER_MICROSECOND(io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) UTC_KEY(io.trino.spi.type.TimeZoneKey.UTC_KEY) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DateTimeEncoding.packDateTimeWithZone(io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) ReaderOptions(org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) LocalDateTime(java.time.LocalDateTime) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) DataSize.succinctBytes(io.airlift.units.DataSize.succinctBytes) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) PrimitiveObjectInspectorFactory.javaTimestampTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampTZObjectInspector) ArrayList(java.util.ArrayList) TIMESTAMP_TZ_MILLIS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS) Lists(com.google.common.collect.Lists) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) BOTH(io.trino.orc.OrcWriteValidation.OrcWriteValidationMode.BOTH) OrcType(io.trino.orc.metadata.OrcType) Int128(io.trino.spi.type.Int128) TIMESTAMP_TZ_MICROS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MICROS) Properties(java.util.Properties) MapType(io.trino.spi.type.MapType) AbstractIterator(com.google.common.collect.AbstractIterator) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) StandardTypes(io.trino.spi.type.StandardTypes) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) CompressionKind(io.trino.orc.metadata.CompressionKind) File(java.io.File) ZLIB(io.trino.orc.metadata.CompressionKind.ZLIB) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) MAX_BATCH_SIZE(io.trino.orc.OrcReader.MAX_BATCH_SIZE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TIMESTAMP_MICROS(io.trino.spi.type.TimestampType.TIMESTAMP_MICROS) SqlVarbinary(io.trino.spi.type.SqlVarbinary) VarbinaryType(io.trino.spi.type.VarbinaryType) Varchars.truncateToLength(io.trino.spi.type.Varchars.truncateToLength) CharType(io.trino.spi.type.CharType) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) FloatWritable(org.apache.hadoop.io.FloatWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) TestingOrcPredicate.createOrcPredicate(io.trino.orc.TestingOrcPredicate.createOrcPredicate) LongWritable(org.apache.hadoop.io.LongWritable) Timestamps.roundDiv(io.trino.spi.type.Timestamps.roundDiv) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) SqlTimestamp(io.trino.spi.type.SqlTimestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Block(io.trino.spi.block.Block) OrcConf(org.apache.orc.OrcConf) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) RowType(io.trino.spi.type.RowType) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) SESSION(io.trino.testing.TestingConnectorSession.SESSION) MICROSECONDS_PER_SECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND) ArrayType(io.trino.spi.type.ArrayType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SqlDate(io.trino.spi.type.SqlDate) Objects(java.util.Objects) TIMESTAMP_NANOS(io.trino.spi.type.TimestampType.TIMESTAMP_NANOS) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) DecimalType(io.trino.spi.type.DecimalType) IntStream(java.util.stream.IntStream) ORC_11(io.trino.orc.OrcTester.Format.ORC_11) TypeInfoFactory.getCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo) Assert.assertNull(org.testng.Assert.assertNull) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) PICOSECONDS_PER_NANOSECOND(io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ZSTD(io.trino.orc.metadata.CompressionKind.ZSTD) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Chars.truncateToLengthAndTrimSpaces(io.trino.spi.type.Chars.truncateToLengthAndTrimSpaces) ByteWritable(org.apache.hadoop.io.ByteWritable) RowFieldName(io.trino.spi.type.RowFieldName) BytesWritable(org.apache.hadoop.io.BytesWritable) SNAPPY(io.trino.orc.metadata.CompressionKind.SNAPPY) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) ORC_12(io.trino.orc.OrcTester.Format.ORC_12) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) LongTimestamp(io.trino.spi.type.LongTimestamp) NANOSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.NANOSECONDS_PER_MILLISECOND) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Serializer(org.apache.hadoop.hive.serde2.Serializer) LZ4(io.trino.orc.metadata.CompressionKind.LZ4) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) VarcharType(io.trino.spi.type.VarcharType) HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SqlVarbinary(io.trino.spi.type.SqlVarbinary) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) Arrays.asList(java.util.Arrays.asList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) OrcType(io.trino.orc.metadata.OrcType) MapType(io.trino.spi.type.MapType) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DecimalType(io.trino.spi.type.DecimalType) CharType(io.trino.spi.type.CharType) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 5 with TIMESTAMP_MILLIS

use of io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS in project trino by trinodb.

the class DeltaLakePageSink method createParquetFileWriter.

private FileWriter createParquetFileWriter(Path path) {
    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxBlockSize(getParquetWriterBlockSize(session)).setMaxPageSize(getParquetWriterPageSize(session)).build();
    CompressionCodecName compressionCodecName = getCompressionCodec(session).getParquetCompressionCodec();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        List<Type> parquetTypes = dataColumnTypes.stream().map(type -> {
            if (type instanceof TimestampWithTimeZoneType) {
                verify(((TimestampWithTimeZoneType) type).getPrecision() == 3, "Unsupported type: %s", type);
                return TIMESTAMP_MILLIS;
            }
            return type;
        }).collect(toImmutableList());
        // we use identity column mapping; input page already contains only data columns per
        // DataLagePageSink.getDataPage()
        int[] identityMapping = new int[dataColumnTypes.size()];
        for (int i = 0; i < identityMapping.length; ++i) {
            identityMapping[i] = i;
        }
        ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(parquetTypes, dataColumnNames);
        return new ParquetFileWriter(fileSystem.create(path), rollbackAction, parquetTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, identityMapping, compressionCodecName, trinoVersion);
    } catch (IOException e) {
        throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Error creating Parquet file", e);
    }
}
Also used : RecordFileWriter(io.trino.plugin.hive.RecordFileWriter) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionConfigUtil.configureCompression(io.trino.plugin.hive.util.CompressionConfigUtil.configureCompression) TransactionLogAccess(io.trino.plugin.deltalake.transactionlog.TransactionLogAccess) Slices.wrappedBuffer(io.airlift.slice.Slices.wrappedBuffer) PARQUET(io.trino.plugin.hive.HiveStorageFormat.PARQUET) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) Block(io.trino.spi.block.Block) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ParquetFileWriter(io.trino.plugin.hive.parquet.ParquetFileWriter) Path(org.apache.hadoop.fs.Path) DeltaLakeSessionProperties.getParquetWriterPageSize(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetWriterPageSize) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ParquetSchemaConverter(io.trino.parquet.writer.ParquetSchemaConverter) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DeltaLakeSessionProperties.isParquetOptimizedWriterEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isParquetOptimizedWriterEnabled) TrinoException(io.trino.spi.TrinoException) FileUtils.escapePathName(org.apache.hadoop.hive.common.FileUtils.escapePathName) String.format(java.lang.String.format) Collectors.joining(java.util.stream.Collectors.joining) DeltaLakeSessionProperties.getParquetWriterBlockSize(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getParquetWriterBlockSize) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Function.identity(java.util.function.Function.identity) FileUtils(org.apache.hadoop.hive.common.FileUtils) Optional(java.util.Optional) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) JsonCodec(io.airlift.json.JsonCodec) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) Type(io.trino.spi.type.Type) ConfigurationUtils.toJobConf(io.trino.plugin.hive.util.ConfigurationUtils.toJobConf) Page(io.trino.spi.Page) Callable(java.util.concurrent.Callable) CompletableFuture(java.util.concurrent.CompletableFuture) IOConstants(org.apache.hadoop.hive.ql.io.IOConstants) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) ArrayList(java.util.ArrayList) FileWriter(io.trino.plugin.hive.FileWriter) HiveType(io.trino.plugin.hive.HiveType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) PageIndexerFactory(io.trino.spi.PageIndexerFactory) Objects.requireNonNull(java.util.Objects.requireNonNull) ParquetWriterOptions(io.trino.parquet.writer.ParquetWriterOptions) HiveWriteUtils(io.trino.plugin.hive.util.HiveWriteUtils) HiveTypeName(io.trino.plugin.hive.HiveTypeName) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) Properties(java.util.Properties) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) MoreFutures(io.airlift.concurrent.MoreFutures) HivePartitionKey(io.trino.plugin.hive.HivePartitionKey) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) Ints(com.google.common.primitives.Ints) JobConf(org.apache.hadoop.mapred.JobConf) DeltaLakeSessionProperties.getCompressionCodec(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getCompressionCodec) PageIndexer(io.trino.spi.PageIndexer) Futures(com.google.common.util.concurrent.Futures) UUID.randomUUID(java.util.UUID.randomUUID) Collectors.toList(java.util.stream.Collectors.toList) TypeManager(io.trino.spi.type.TypeManager) IOException(java.io.IOException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) Type(io.trino.spi.type.Type) HiveType(io.trino.plugin.hive.HiveType) ParquetFileWriter(io.trino.plugin.hive.parquet.ParquetFileWriter) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) FileSystem(org.apache.hadoop.fs.FileSystem) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) TrinoException(io.trino.spi.TrinoException) ParquetWriterOptions(io.trino.parquet.writer.ParquetWriterOptions) ParquetSchemaConverter(io.trino.parquet.writer.ParquetSchemaConverter)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)7 TIMESTAMP_MILLIS (io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS)7 Type (io.trino.spi.type.Type)7 List (java.util.List)7 Optional (java.util.Optional)7 Slice (io.airlift.slice.Slice)6 BIGINT (io.trino.spi.type.BigintType.BIGINT)6 BOOLEAN (io.trino.spi.type.BooleanType.BOOLEAN)6 DecimalType (io.trino.spi.type.DecimalType)6 DOUBLE (io.trino.spi.type.DoubleType.DOUBLE)6 Map (java.util.Map)6 ImmutableSet (com.google.common.collect.ImmutableSet)5 DATE (io.trino.spi.type.DateType.DATE)5 INTEGER (io.trino.spi.type.IntegerType.INTEGER)5 ArrayList (java.util.ArrayList)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 CharType (io.trino.spi.type.CharType)4 Decimals (io.trino.spi.type.Decimals)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 Page (io.trino.spi.Page)3