Search in sources :

Example 1 with TINYINT

use of io.trino.spi.type.TinyintType.TINYINT in project trino by trinodb.

the class AbstractTestHiveFileFormats method checkCursor.

protected void checkCursor(RecordCursor cursor, List<TestColumn> testColumns, int rowCount) {
    List<Type> types = testColumns.stream().map(column -> column.getObjectInspector().getTypeName()).map(type -> HiveType.valueOf(type).getType(TESTING_TYPE_MANAGER)).collect(toImmutableList());
    Map<Type, MethodHandle> distinctFromOperators = types.stream().distinct().collect(toImmutableMap(identity(), HiveTestUtils::distinctFromOperator));
    for (int row = 0; row < rowCount; row++) {
        assertTrue(cursor.advanceNextPosition());
        for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) {
            TestColumn testColumn = testColumns.get(i);
            Type type = types.get(i);
            Object fieldFromCursor = getFieldFromCursor(cursor, type, i);
            if (fieldFromCursor == null) {
                assertEquals(null, testColumn.getExpectedValue(), "Expected null for column " + testColumn.getName());
            } else if (type instanceof DecimalType) {
                DecimalType decimalType = (DecimalType) type;
                fieldFromCursor = new BigDecimal((BigInteger) fieldFromCursor, decimalType.getScale());
                assertEquals(fieldFromCursor, testColumn.getExpectedValue(), "Wrong value for column " + testColumn.getName());
            } else if (testColumn.getObjectInspector().getTypeName().equals("float")) {
                assertEquals((float) fieldFromCursor, (float) testColumn.getExpectedValue(), (float) EPSILON);
            } else if (testColumn.getObjectInspector().getTypeName().equals("double")) {
                assertEquals((double) fieldFromCursor, (double) testColumn.getExpectedValue(), EPSILON);
            } else if (testColumn.getObjectInspector().getTypeName().equals("tinyint")) {
                assertEquals(((Number) fieldFromCursor).byteValue(), testColumn.getExpectedValue());
            } else if (testColumn.getObjectInspector().getTypeName().equals("smallint")) {
                assertEquals(((Number) fieldFromCursor).shortValue(), testColumn.getExpectedValue());
            } else if (testColumn.getObjectInspector().getTypeName().equals("int")) {
                assertEquals(((Number) fieldFromCursor).intValue(), testColumn.getExpectedValue());
            } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) {
                assertEquals(fieldFromCursor, testColumn.getExpectedValue(), "Wrong value for column " + testColumn.getName());
            } else {
                Block expected = (Block) testColumn.getExpectedValue();
                Block actual = (Block) fieldFromCursor;
                boolean distinct = isDistinctFrom(distinctFromOperators.get(type), expected, actual);
                assertFalse(distinct, "Wrong value for column: " + testColumn.getName());
            }
        }
    }
    assertFalse(cursor.advanceNextPosition());
}
Also used : StructuralTestUtil.decimalMapBlockOf(io.trino.testing.StructuralTestUtil.decimalMapBlockOf) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) JavaHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector) SerDeUtils.serializeObject(io.trino.plugin.hive.util.SerDeUtils.serializeObject) DateType(io.trino.spi.type.DateType) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) CharType.createCharType(io.trino.spi.type.CharType.createCharType) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) BigDecimal(java.math.BigDecimal) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) StructuralTestUtil.rowBlockOf(io.trino.testing.StructuralTestUtil.rowBlockOf) BigInteger(java.math.BigInteger) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MaterializedRow(io.trino.testing.MaterializedRow) Assert.assertFalse(org.testng.Assert.assertFalse) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) MICROSECONDS_PER_MILLISECOND(io.trino.type.DateTimes.MICROSECONDS_PER_MILLISECOND) PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) StructuralTestUtil.arrayBlockOf(io.trino.testing.StructuralTestUtil.arrayBlockOf) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) MethodHandle(java.lang.invoke.MethodHandle) Slice(io.airlift.slice.Slice) PageBuilder(io.trino.spi.PageBuilder) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) TimestampType(io.trino.spi.type.TimestampType) ArrayList(java.util.ArrayList) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) Chars.padSpaces(io.trino.spi.type.Chars.padSpaces) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) Math.floorDiv(java.lang.Math.floorDiv) Int128(io.trino.spi.type.Int128) Arrays.fill(java.util.Arrays.fill) RecordCursor(io.trino.spi.connector.RecordCursor) Properties(java.util.Properties) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) StructuralTestUtil.mapBlockOf(io.trino.testing.StructuralTestUtil.mapBlockOf) UTC(org.joda.time.DateTimeZone.UTC) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) SqlVarbinary(io.trino.spi.type.SqlVarbinary) HiveTestUtils.mapType(io.trino.plugin.hive.HiveTestUtils.mapType) CharType(io.trino.spi.type.CharType) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) MaterializedResult(io.trino.testing.MaterializedResult) CompressionConfigUtil.configureCompression(io.trino.plugin.hive.util.CompressionConfigUtil.configureCompression) SqlTimestamp(io.trino.spi.type.SqlTimestamp) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) StructuralTestUtil.decimalArrayBlockOf(io.trino.testing.StructuralTestUtil.decimalArrayBlockOf) Block(io.trino.spi.block.Block) Path(org.apache.hadoop.fs.Path) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) DateTimeFormat(org.joda.time.format.DateTimeFormat) RowType(io.trino.spi.type.RowType) ImmutableMap(com.google.common.collect.ImmutableMap) ArrayType(io.trino.spi.type.ArrayType) MaterializedResult.materializeSourceDataStream(io.trino.testing.MaterializedResult.materializeSourceDataStream) HiveOutputFormat(org.apache.hadoop.hive.ql.io.HiveOutputFormat) Collectors(java.util.stream.Collectors) SqlDate(io.trino.spi.type.SqlDate) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) DecimalType(io.trino.spi.type.DecimalType) TypeInfoFactory.getCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) Type(io.trino.spi.type.Type) HiveColumnProjectionInfo.generatePartialName(io.trino.plugin.hive.HiveColumnProjectionInfo.generatePartialName) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) OptionalInt(java.util.OptionalInt) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) VarcharType(io.trino.spi.type.VarcharType) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) HIVE_DEFAULT_DYNAMIC_PARTITION(io.trino.plugin.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DateTime(org.joda.time.DateTime) HiveTestUtils.isDistinctFrom(io.trino.plugin.hive.HiveTestUtils.isDistinctFrom) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveUtil.isStructuralType(io.trino.plugin.hive.util.HiveUtil.isStructuralType) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) DateType(io.trino.spi.type.DateType) CharType.createCharType(io.trino.spi.type.CharType.createCharType) TimestampType(io.trino.spi.type.TimestampType) HiveTestUtils.mapType(io.trino.plugin.hive.HiveTestUtils.mapType) CharType(io.trino.spi.type.CharType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) VarcharType(io.trino.spi.type.VarcharType) HiveUtil.isStructuralType(io.trino.plugin.hive.util.HiveUtil.isStructuralType) DecimalType(io.trino.spi.type.DecimalType) Block(io.trino.spi.block.Block) SerDeUtils.serializeObject(io.trino.plugin.hive.util.SerDeUtils.serializeObject) BigDecimal(java.math.BigDecimal) MethodHandle(java.lang.invoke.MethodHandle)

Example 2 with TINYINT

use of io.trino.spi.type.TinyintType.TINYINT in project trino by trinodb.

the class FormatFunction method valueConverter.

private static BiFunction<ConnectorSession, Block, Object> valueConverter(FunctionDependencies functionDependencies, Type type, int position) {
    if (type.equals(UNKNOWN)) {
        return (session, block) -> null;
    }
    if (type.equals(BOOLEAN)) {
        return (session, block) -> type.getBoolean(block, position);
    }
    if (type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER) || type.equals(BIGINT)) {
        return (session, block) -> type.getLong(block, position);
    }
    if (type.equals(REAL)) {
        return (session, block) -> intBitsToFloat(toIntExact(type.getLong(block, position)));
    }
    if (type.equals(DOUBLE)) {
        return (session, block) -> type.getDouble(block, position);
    }
    if (type.equals(DATE)) {
        return (session, block) -> LocalDate.ofEpochDay(type.getLong(block, position));
    }
    if (type instanceof TimestampWithTimeZoneType) {
        return (session, block) -> toZonedDateTime(((TimestampWithTimeZoneType) type), block, position);
    }
    if (type instanceof TimestampType) {
        return (session, block) -> toLocalDateTime(((TimestampType) type), block, position);
    }
    if (type instanceof TimeType) {
        return (session, block) -> toLocalTime(type.getLong(block, position));
    }
    // TODO: support TIME WITH TIME ZONE by https://github.com/trinodb/trino/issues/191 + mapping to java.time.OffsetTime
    if (type.equals(JSON)) {
        MethodHandle handle = functionDependencies.getFunctionInvoker(QualifiedName.of("json_format"), ImmutableList.of(JSON), simpleConvention(FAIL_ON_NULL, NEVER_NULL)).getMethodHandle();
        return (session, block) -> convertToString(handle, type.getSlice(block, position));
    }
    if (isShortDecimal(type)) {
        int scale = ((DecimalType) type).getScale();
        return (session, block) -> BigDecimal.valueOf(type.getLong(block, position), scale);
    }
    if (isLongDecimal(type)) {
        int scale = ((DecimalType) type).getScale();
        return (session, block) -> new BigDecimal(((Int128) type.getObject(block, position)).toBigInteger(), scale);
    }
    if (type instanceof VarcharType) {
        return (session, block) -> type.getSlice(block, position).toStringUtf8();
    }
    if (type instanceof CharType) {
        CharType charType = (CharType) type;
        return (session, block) -> padSpaces(type.getSlice(block, position), charType).toStringUtf8();
    }
    BiFunction<ConnectorSession, Block, Object> function;
    if (type.getJavaType() == long.class) {
        function = (session, block) -> type.getLong(block, position);
    } else if (type.getJavaType() == double.class) {
        function = (session, block) -> type.getDouble(block, position);
    } else if (type.getJavaType() == boolean.class) {
        function = (session, block) -> type.getBoolean(block, position);
    } else if (type.getJavaType() == Slice.class) {
        function = (session, block) -> type.getSlice(block, position);
    } else {
        function = (session, block) -> type.getObject(block, position);
    }
    MethodHandle handle = functionDependencies.getCastInvoker(type, VARCHAR, simpleConvention(FAIL_ON_NULL, NEVER_NULL)).getMethodHandle();
    return (session, block) -> convertToString(handle, function.apply(session, block));
}
Also used : FunctionDependencies(io.trino.metadata.FunctionDependencies) SCALAR(io.trino.metadata.FunctionKind.SCALAR) FunctionDependencyDeclarationBuilder(io.trino.metadata.FunctionDependencyDeclaration.FunctionDependencyDeclarationBuilder) FAIL_ON_NULL(io.trino.spi.function.InvocationConvention.InvocationReturnConvention.FAIL_ON_NULL) BiFunction(java.util.function.BiFunction) FunctionNullability(io.trino.metadata.FunctionNullability) UNKNOWN(io.trino.type.UnknownType.UNKNOWN) InvocationConvention.simpleConvention(io.trino.spi.function.InvocationConvention.simpleConvention) Timestamps.roundDiv(io.trino.spi.type.Timestamps.roundDiv) BigDecimal(java.math.BigDecimal) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) Block(io.trino.spi.block.Block) LocalTime(java.time.LocalTime) IllegalFormatException(java.util.IllegalFormatException) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) INTEGER(io.trino.spi.type.IntegerType.INTEGER) FunctionMetadata(io.trino.metadata.FunctionMetadata) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) TypeSignature(io.trino.spi.type.TypeSignature) DateTimes.toLocalDateTime(io.trino.type.DateTimes.toLocalDateTime) FunctionDependencyDeclaration(io.trino.metadata.FunctionDependencyDeclaration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PICOSECONDS_PER_NANOSECOND(io.trino.type.DateTimes.PICOSECONDS_PER_NANOSECOND) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) INVALID_FUNCTION_ARGUMENT(io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT) LocalDate(java.time.LocalDate) DecimalType(io.trino.spi.type.DecimalType) NEVER_NULL(io.trino.spi.function.InvocationConvention.InvocationArgumentConvention.NEVER_NULL) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) MethodHandle(java.lang.invoke.MethodHandle) Slice(io.airlift.slice.Slice) TimeType(io.trino.spi.type.TimeType) Decimals.isLongDecimal(io.trino.spi.type.Decimals.isLongDecimal) Type(io.trino.spi.type.Type) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) DateTimes.toZonedDateTime(io.trino.type.DateTimes.toZonedDateTime) TimestampType(io.trino.spi.type.TimestampType) VarcharType(io.trino.spi.type.VarcharType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Chars.padSpaces(io.trino.spi.type.Chars.padSpaces) Math.toIntExact(java.lang.Math.toIntExact) Signature(io.trino.metadata.Signature) Decimals.isShortDecimal(io.trino.spi.type.Decimals.isShortDecimal) Int128(io.trino.spi.type.Int128) Streams.mapWithIndex(com.google.common.collect.Streams.mapWithIndex) ConnectorSession(io.trino.spi.connector.ConnectorSession) Signature.withVariadicBound(io.trino.metadata.Signature.withVariadicBound) UsedByGeneratedCode(io.trino.annotation.UsedByGeneratedCode) SqlScalarFunction(io.trino.metadata.SqlScalarFunction) Failures.internalError(io.trino.util.Failures.internalError) QualifiedName(io.trino.sql.tree.QualifiedName) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) CharType(io.trino.spi.type.CharType) BoundSignature(io.trino.metadata.BoundSignature) TINYINT(io.trino.spi.type.TinyintType.TINYINT) JSON(io.trino.type.JsonType.JSON) Reflection.methodHandle(io.trino.util.Reflection.methodHandle) VarcharType(io.trino.spi.type.VarcharType) BigDecimal(java.math.BigDecimal) TimeType(io.trino.spi.type.TimeType) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) TimestampType(io.trino.spi.type.TimestampType) DecimalType(io.trino.spi.type.DecimalType) Block(io.trino.spi.block.Block) ConnectorSession(io.trino.spi.connector.ConnectorSession) CharType(io.trino.spi.type.CharType) MethodHandle(java.lang.invoke.MethodHandle)

Example 3 with TINYINT

use of io.trino.spi.type.TinyintType.TINYINT in project trino by trinodb.

the class RcFileTester method preprocessWriteValueOld.

private static Object preprocessWriteValueOld(Format format, Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP_MILLIS)) {
        long millis = ((SqlTimestamp) value).getMillis();
        if (format == Format.BINARY) {
            millis = HIVE_STORAGE_TIME_ZONE.convertLocalToUTC(millis, false);
        }
        return Timestamp.ofEpochMilli(millis);
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type instanceof ArrayType) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(format, elementType, element)).collect(toList());
    }
    if (type instanceof MapType) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueOld(format, keyType, entry.getKey()), preprocessWriteValueOld(format, valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type instanceof RowType) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueOld(format, fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY) DateTimeZone(org.joda.time.DateTimeZone) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) Decimals.rescale(io.trino.spi.type.Decimals.rescale) FileSplit(org.apache.hadoop.mapred.FileSplit) RcFileDecoderUtils.findFirstSyncPosition(io.trino.rcfile.RcFileDecoderUtils.findFirstSyncPosition) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) LazyBinaryArray(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray) IntWritable(org.apache.hadoop.io.IntWritable) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) MICROSECONDS_PER_MILLISECOND(io.trino.type.DateTimes.MICROSECONDS_PER_MILLISECOND) UncheckedIOException(java.io.UncheckedIOException) BooleanWritable(org.apache.hadoop.io.BooleanWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) StructObject(org.apache.hadoop.hive.serde2.StructObject) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) Functions.constant(com.google.common.base.Functions.constant) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) ArrayList(java.util.ArrayList) NONE(io.trino.rcfile.RcFileTester.Compression.NONE) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) Lists(com.google.common.collect.Lists) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) BZIP2(io.trino.rcfile.RcFileTester.Compression.BZIP2) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) HadoopNative(io.trino.hadoop.HadoopNative) LinkedHashSet(java.util.LinkedHashSet) Int128(io.trino.spi.type.Int128) Properties(java.util.Properties) MapType(io.trino.spi.type.MapType) AbstractIterator(com.google.common.collect.AbstractIterator) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) NULL(org.apache.hadoop.mapred.Reporter.NULL) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) SqlVarbinary(io.trino.spi.type.SqlVarbinary) SIZE_OF_LONG(io.airlift.slice.SizeOf.SIZE_OF_LONG) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) FloatWritable(org.apache.hadoop.io.FloatWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) Iterables.transform(com.google.common.collect.Iterables.transform) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) LongWritable(org.apache.hadoop.io.LongWritable) SNAPPY(io.trino.rcfile.RcFileTester.Compression.SNAPPY) TextRcFileEncoding(io.trino.rcfile.text.TextRcFileEncoding) SqlTimestamp(io.trino.spi.type.SqlTimestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Block(io.trino.spi.block.Block) PRESTO_RCFILE_WRITER_VERSION(io.trino.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) RowType(io.trino.spi.type.RowType) SIZE_OF_INT(io.airlift.slice.SizeOf.SIZE_OF_INT) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) SESSION(io.trino.testing.TestingConnectorSession.SESSION) ArrayType(io.trino.spi.type.ArrayType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SqlDate(io.trino.spi.type.SqlDate) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) LZ4(io.trino.rcfile.RcFileTester.Compression.LZ4) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) DecimalType(io.trino.spi.type.DecimalType) MAP(io.trino.spi.type.StandardTypes.MAP) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Assert.assertNull(org.testng.Assert.assertNull) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) VarcharType(io.trino.spi.type.VarcharType) OutputStreamSliceOutput(io.airlift.slice.OutputStreamSliceOutput) COMPRESS_CODEC(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS_CODEC) ImmutableList(com.google.common.collect.ImmutableList) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Math.toIntExact(java.lang.Math.toIntExact) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) FileInputStream(java.io.FileInputStream) JobConf(org.apache.hadoop.mapred.JobConf) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) Collectors.toList(java.util.stream.Collectors.toList) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Closeable(java.io.Closeable) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Collections(java.util.Collections) InputStream(java.io.InputStream) ZLIB(io.trino.rcfile.RcFileTester.Compression.ZLIB) VarcharType(io.trino.spi.type.VarcharType) HashMap(java.util.HashMap) SqlVarbinary(io.trino.spi.type.SqlVarbinary) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) MapType(io.trino.spi.type.MapType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) StructObject(org.apache.hadoop.hive.serde2.StructObject) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) ImmutableMap(com.google.common.collect.ImmutableMap) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) HashMap(java.util.HashMap)

Example 4 with TINYINT

use of io.trino.spi.type.TinyintType.TINYINT in project trino by trinodb.

the class OrcTester method preprocessWriteValueHive.

private static Object preprocessWriteValueHive(Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type instanceof CharType) {
        return new HiveChar((String) value, ((CharType) type).getLength());
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP_MILLIS) || type.equals(TIMESTAMP_MICROS) || type.equals(TIMESTAMP_NANOS)) {
        LocalDateTime dateTime = ((SqlTimestamp) value).toLocalDateTime();
        return Timestamp.ofEpochSecond(dateTime.toEpochSecond(ZoneOffset.UTC), dateTime.getNano());
    }
    if (type.equals(TIMESTAMP_TZ_MILLIS) || type.equals(TIMESTAMP_TZ_MICROS) || type.equals(TIMESTAMP_TZ_NANOS)) {
        SqlTimestampWithTimeZone timestamp = (SqlTimestampWithTimeZone) value;
        int nanosOfMilli = roundDiv(timestamp.getPicosOfMilli(), PICOSECONDS_PER_NANOSECOND);
        return Timestamp.ofEpochMilli(timestamp.getEpochMillis(), nanosOfMilli);
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type instanceof ArrayType) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueHive(elementType, element)).collect(toList());
    }
    if (type instanceof MapType) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueHive(keyType, entry.getKey()), preprocessWriteValueHive(valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type instanceof RowType) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueHive(fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : LocalDateTime(java.time.LocalDateTime) OrcUtil(org.apache.hadoop.hive.ql.io.orc.OrcUtil) DateTimeZone(org.joda.time.DateTimeZone) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) TIMESTAMP_TZ_NANOS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_NANOS) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) NANOSECONDS_PER_MICROSECOND(io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) LongTimestampWithTimeZone(io.trino.spi.type.LongTimestampWithTimeZone) NONE(io.trino.orc.metadata.CompressionKind.NONE) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) Decimals.rescale(io.trino.spi.type.Decimals.rescale) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) Arrays.asList(java.util.Arrays.asList) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ZoneOffset(java.time.ZoneOffset) Assert.assertFalse(org.testng.Assert.assertFalse) IntWritable(org.apache.hadoop.io.IntWritable) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PICOSECONDS_PER_MICROSECOND(io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) UTC_KEY(io.trino.spi.type.TimeZoneKey.UTC_KEY) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DateTimeEncoding.packDateTimeWithZone(io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) ReaderOptions(org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) LocalDateTime(java.time.LocalDateTime) Page(io.trino.spi.Page) SqlDecimal(io.trino.spi.type.SqlDecimal) DataSize.succinctBytes(io.airlift.units.DataSize.succinctBytes) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) PrimitiveObjectInspectorFactory.javaTimestampTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampTZObjectInspector) ArrayList(java.util.ArrayList) TIMESTAMP_TZ_MILLIS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS) Lists(com.google.common.collect.Lists) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) BOTH(io.trino.orc.OrcWriteValidation.OrcWriteValidationMode.BOTH) OrcType(io.trino.orc.metadata.OrcType) Int128(io.trino.spi.type.Int128) TIMESTAMP_TZ_MICROS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MICROS) Properties(java.util.Properties) MapType(io.trino.spi.type.MapType) AbstractIterator(com.google.common.collect.AbstractIterator) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) StandardTypes(io.trino.spi.type.StandardTypes) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) CompressionKind(io.trino.orc.metadata.CompressionKind) File(java.io.File) ZLIB(io.trino.orc.metadata.CompressionKind.ZLIB) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) MAX_BATCH_SIZE(io.trino.orc.OrcReader.MAX_BATCH_SIZE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TIMESTAMP_MICROS(io.trino.spi.type.TimestampType.TIMESTAMP_MICROS) SqlVarbinary(io.trino.spi.type.SqlVarbinary) VarbinaryType(io.trino.spi.type.VarbinaryType) Varchars.truncateToLength(io.trino.spi.type.Varchars.truncateToLength) CharType(io.trino.spi.type.CharType) TINYINT(io.trino.spi.type.TinyintType.TINYINT) BlockBuilder(io.trino.spi.block.BlockBuilder) FloatWritable(org.apache.hadoop.io.FloatWritable) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) TestingOrcPredicate.createOrcPredicate(io.trino.orc.TestingOrcPredicate.createOrcPredicate) LongWritable(org.apache.hadoop.io.LongWritable) Timestamps.roundDiv(io.trino.spi.type.Timestamps.roundDiv) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) SqlTimestamp(io.trino.spi.type.SqlTimestamp) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Block(io.trino.spi.block.Block) OrcConf(org.apache.orc.OrcConf) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) RowType(io.trino.spi.type.RowType) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) SESSION(io.trino.testing.TestingConnectorSession.SESSION) MICROSECONDS_PER_SECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND) ArrayType(io.trino.spi.type.ArrayType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SqlDate(io.trino.spi.type.SqlDate) Objects(java.util.Objects) TIMESTAMP_NANOS(io.trino.spi.type.TimestampType.TIMESTAMP_NANOS) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Decimals(io.trino.spi.type.Decimals) Entry(java.util.Map.Entry) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) DecimalType(io.trino.spi.type.DecimalType) IntStream(java.util.stream.IntStream) ORC_11(io.trino.orc.OrcTester.Format.ORC_11) TypeInfoFactory.getCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo) Assert.assertNull(org.testng.Assert.assertNull) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) PICOSECONDS_PER_NANOSECOND(io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ZSTD(io.trino.orc.metadata.CompressionKind.ZSTD) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Chars.truncateToLengthAndTrimSpaces(io.trino.spi.type.Chars.truncateToLengthAndTrimSpaces) ByteWritable(org.apache.hadoop.io.ByteWritable) RowFieldName(io.trino.spi.type.RowFieldName) BytesWritable(org.apache.hadoop.io.BytesWritable) SNAPPY(io.trino.orc.metadata.CompressionKind.SNAPPY) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) ORC_12(io.trino.orc.OrcTester.Format.ORC_12) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) LongTimestamp(io.trino.spi.type.LongTimestamp) NANOSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.NANOSECONDS_PER_MILLISECOND) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Serializer(org.apache.hadoop.hive.serde2.Serializer) LZ4(io.trino.orc.metadata.CompressionKind.LZ4) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) VarcharType(io.trino.spi.type.VarcharType) HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SqlVarbinary(io.trino.spi.type.SqlVarbinary) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) SqlTimestamp(io.trino.spi.type.SqlTimestamp) MapType(io.trino.spi.type.MapType) ArrayType(io.trino.spi.type.ArrayType) Arrays.asList(java.util.Arrays.asList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) OrcType(io.trino.orc.metadata.OrcType) MapType(io.trino.spi.type.MapType) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DecimalType(io.trino.spi.type.DecimalType) CharType(io.trino.spi.type.CharType) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 5 with TINYINT

use of io.trino.spi.type.TinyintType.TINYINT in project trino by trinodb.

the class TupleDomainParquetPredicate method getDomain.

/**
 * Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
 * Both arrays must have the same length.
 */
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue, DateTimeZone timeZone) {
    checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
    if (type.equals(BOOLEAN)) {
        boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
        boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
        if (hasTrueValues && hasFalseValues) {
            return Domain.all(type);
        }
        if (hasTrueValues) {
            return Domain.create(ValueSet.of(type, true), hasNullValue);
        }
        if (hasFalseValues) {
            return Domain.create(ValueSet.of(type, false), hasNullValue);
        }
        // All nulls case is handled earlier
        throw new VerifyException("Impossible boolean statistics");
    }
    if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(DATE) || type.equals(SMALLINT) || type.equals(TINYINT)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            long min = asLong(minimums.get(i));
            long max = asLong(maximums.get(i));
            if (isStatisticsOverflow(type, min, max)) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof DecimalType) {
        DecimalType decimalType = (DecimalType) type;
        List<Range> ranges = new ArrayList<>();
        if (decimalType.isShort()) {
            for (int i = 0; i < minimums.size(); i++) {
                Object min = minimums.get(i);
                Object max = maximums.get(i);
                long minValue = min instanceof Binary ? getShortDecimalValue(((Binary) min).getBytes()) : asLong(min);
                long maxValue = min instanceof Binary ? getShortDecimalValue(((Binary) max).getBytes()) : asLong(max);
                if (isStatisticsOverflow(type, minValue, maxValue)) {
                    return Domain.create(ValueSet.all(type), hasNullValue);
                }
                ranges.add(Range.range(type, minValue, true, maxValue, true));
            }
        } else {
            for (int i = 0; i < minimums.size(); i++) {
                Int128 min = Int128.fromBigEndian(((Binary) minimums.get(i)).getBytes());
                Int128 max = Int128.fromBigEndian(((Binary) maximums.get(i)).getBytes());
                ranges.add(Range.range(type, min, true, max, true));
            }
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(REAL)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Float min = (Float) minimums.get(i);
            Float max = (Float) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(DOUBLE)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Double min = (Double) minimums.get(i);
            Double max = (Double) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof VarcharType) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
            Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof TimestampType) {
        if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT96)) {
            TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, timeZone);
            List<Object> values = new ArrayList<>();
            for (int i = 0; i < minimums.size(); i++) {
                Object min = minimums.get(i);
                Object max = maximums.get(i);
                // available and valid in that special case
                if (!(min instanceof Binary) || !(max instanceof Binary) || !min.equals(max)) {
                    return Domain.create(ValueSet.all(type), hasNullValue);
                }
                values.add(timestampEncoder.getTimestamp(decodeInt96Timestamp((Binary) min)));
            }
            return Domain.multipleValues(type, values, hasNullValue);
        }
        if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT64)) {
            LogicalTypeAnnotation logicalTypeAnnotation = column.getPrimitiveType().getLogicalTypeAnnotation();
            if (!(logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation)) {
                // Invalid statistics. Unit and UTC adjustment are not known
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            TimestampLogicalTypeAnnotation timestampTypeAnnotation = (TimestampLogicalTypeAnnotation) logicalTypeAnnotation;
            // Bail out if the precision is not known
            if (timestampTypeAnnotation.getUnit() == null) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, DateTimeZone.UTC);
            List<Range> ranges = new ArrayList<>();
            for (int i = 0; i < minimums.size(); i++) {
                long min = (long) minimums.get(i);
                long max = (long) maximums.get(i);
                ranges.add(Range.range(type, timestampEncoder.getTimestamp(decodeInt64Timestamp(min, timestampTypeAnnotation.getUnit())), true, timestampEncoder.getTimestamp(decodeInt64Timestamp(max, timestampTypeAnnotation.getUnit())), true));
            }
            return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
        }
    }
    return Domain.create(ValueSet.all(type), hasNullValue);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) DateTimeZone(org.joda.time.DateTimeZone) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) FilterApi(org.apache.parquet.filter2.predicate.FilterApi) ByteBuffer(java.nio.ByteBuffer) INT96(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96) TrinoTimestampEncoderFactory.createTimestampEncoder(io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Slices(io.airlift.slice.Slices) Map(java.util.Map) INTEGER(io.trino.spi.type.IntegerType.INTEGER) UserDefinedPredicate(org.apache.parquet.filter2.predicate.UserDefinedPredicate) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PredicateUtils.isStatisticsOverflow(io.trino.parquet.predicate.PredicateUtils.isStatisticsOverflow) Range(io.trino.spi.predicate.Range) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) Domain(io.trino.spi.predicate.Domain) ParquetTimestampUtils.decodeInt96Timestamp(io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp) Dictionary(io.trino.parquet.dictionary.Dictionary) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Binary(org.apache.parquet.io.api.Binary) Serializable(java.io.Serializable) INT64(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64) List(java.util.List) TrinoTimestampEncoder(io.trino.plugin.base.type.TrinoTimestampEncoder) BIGINT(io.trino.spi.type.BigintType.BIGINT) LITTLE_ENDIAN(java.nio.ByteOrder.LITTLE_ENDIAN) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Optional(java.util.Optional) Operators(org.apache.parquet.filter2.predicate.Operators) DecimalType(io.trino.spi.type.DecimalType) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Slice(io.airlift.slice.Slice) Type(io.trino.spi.type.Type) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Function(java.util.function.Function) TimestampType(io.trino.spi.type.TimestampType) ArrayList(java.util.ArrayList) VarcharType(io.trino.spi.type.VarcharType) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) VerifyException(com.google.common.base.VerifyException) Int128(io.trino.spi.type.Int128) Statistics(org.apache.parquet.column.statistics.Statistics) DictionaryPage(io.trino.parquet.DictionaryPage) ParquetTimestampUtils.decodeInt64Timestamp(io.trino.parquet.ParquetTimestampUtils.decodeInt64Timestamp) ParquetTypeUtils.getShortDecimalValue(io.trino.parquet.ParquetTypeUtils.getShortDecimalValue) TupleDomain(io.trino.spi.predicate.TupleDomain) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TINYINT(io.trino.spi.type.TinyintType.TINYINT) VarcharType(io.trino.spi.type.VarcharType) ArrayList(java.util.ArrayList) Range(io.trino.spi.predicate.Range) VerifyException(com.google.common.base.VerifyException) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Slice(io.airlift.slice.Slice) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) DecimalType(io.trino.spi.type.DecimalType) TimestampType(io.trino.spi.type.TimestampType) Binary(org.apache.parquet.io.api.Binary) Int128(io.trino.spi.type.Int128)

Aggregations

BIGINT (io.trino.spi.type.BigintType.BIGINT)6 BOOLEAN (io.trino.spi.type.BooleanType.BOOLEAN)6 DecimalType (io.trino.spi.type.DecimalType)6 DOUBLE (io.trino.spi.type.DoubleType.DOUBLE)6 INTEGER (io.trino.spi.type.IntegerType.INTEGER)6 REAL (io.trino.spi.type.RealType.REAL)6 SMALLINT (io.trino.spi.type.SmallintType.SMALLINT)6 TINYINT (io.trino.spi.type.TinyintType.TINYINT)6 Type (io.trino.spi.type.Type)6 VarcharType (io.trino.spi.type.VarcharType)6 List (java.util.List)6 ImmutableList (com.google.common.collect.ImmutableList)5 Slice (io.airlift.slice.Slice)5 DATE (io.trino.spi.type.DateType.DATE)5 Int128 (io.trino.spi.type.Int128)5 Slices (io.airlift.slice.Slices)4 CharType (io.trino.spi.type.CharType)4 TimestampType (io.trino.spi.type.TimestampType)4 VARBINARY (io.trino.spi.type.VarbinaryType.VARBINARY)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3