Search in sources :

Example 21 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class TestStatistics method testFromComputedStatistics.

@Test
public void testFromComputedStatistics() {
    Function<Integer, Block> singleIntegerValueBlock = value -> BigintType.BIGINT.createBlockBuilder(null, 1).writeLong(value).build();
    ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleIntegerValueBlock.apply(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("b_column", NUMBER_OF_NON_NULL_VALUES), singleIntegerValueBlock.apply(4)).build();
    Map<String, Type> columnTypes = ImmutableMap.of("a_column", INTEGER, "b_column", VARCHAR);
    Map<String, HiveColumnStatistics> columnStatistics = Statistics.fromComputedStatistics(statistics.getColumnStatistics(), columnTypes, 5);
    assertThat(columnStatistics).hasSize(2);
    assertThat(columnStatistics.keySet()).contains("a_column", "b_column");
    assertThat(columnStatistics.get("a_column")).isEqualTo(HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(1), OptionalLong.of(5))).setNullsCount(0).setDistinctValuesCount(5).build());
    assertThat(columnStatistics.get("b_column")).isEqualTo(HiveColumnStatistics.builder().setNullsCount(1).build());
}
Also used : DateStatistics(io.trino.plugin.hive.metastore.DateStatistics) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Type(io.trino.spi.type.Type) OptionalDouble(java.util.OptionalDouble) HiveColumnStatistics.createBinaryColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) Test(org.testng.annotations.Test) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) NUMBER_OF_DISTINCT_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) Function(java.util.function.Function) Float.floatToIntBits(java.lang.Float.floatToIntBits) ADD(io.trino.plugin.hive.util.Statistics.ReduceOperator.ADD) Utils.nativeValueToBlock(io.trino.spi.predicate.Utils.nativeValueToBlock) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) BigintType(io.trino.spi.type.BigintType) BigDecimal(java.math.BigDecimal) OptionalLong(java.util.OptionalLong) Statistics.merge(io.trino.plugin.hive.util.Statistics.merge) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Block(io.trino.spi.block.Block) Map(java.util.Map) Statistics.createHiveColumnStatistics(io.trino.plugin.hive.util.Statistics.createHiveColumnStatistics) INTEGER(io.trino.spi.type.IntegerType.INTEGER) BooleanStatistics(io.trino.plugin.hive.metastore.BooleanStatistics) Statistics.reduce(io.trino.plugin.hive.util.Statistics.reduce) NUMBER_OF_NON_NULL_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) IntegerStatistics(io.trino.plugin.hive.metastore.IntegerStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) ImmutableMap(com.google.common.collect.ImmutableMap) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) DecimalStatistics(io.trino.plugin.hive.metastore.DecimalStatistics) DoubleStatistics(io.trino.plugin.hive.metastore.DoubleStatistics) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveColumnStatistics.createBooleanColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) LocalDate(java.time.LocalDate) SUBTRACT(io.trino.plugin.hive.util.Statistics.ReduceOperator.SUBTRACT) Optional(java.util.Optional) TableStatisticType(io.trino.spi.statistics.TableStatisticType) MIN_VALUE(io.trino.spi.statistics.ColumnStatisticType.MIN_VALUE) REAL(io.trino.spi.type.RealType.REAL) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) Type(io.trino.spi.type.Type) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) BigintType(io.trino.spi.type.BigintType) TableStatisticType(io.trino.spi.statistics.TableStatisticType) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) Utils.nativeValueToBlock(io.trino.spi.predicate.Utils.nativeValueToBlock) Block(io.trino.spi.block.Block) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Statistics.createHiveColumnStatistics(io.trino.plugin.hive.util.Statistics.createHiveColumnStatistics) IntegerStatistics(io.trino.plugin.hive.metastore.IntegerStatistics) Test(org.testng.annotations.Test)

Example 22 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class PagesSpatialIndexSupplier method buildRTree.

private static STRtree buildRTree(LongArrayList addresses, List<List<Block>> channels, int geometryChannel, Optional<Integer> radiusChannel, Optional<Integer> partitionChannel) {
    STRtree rtree = new STRtree();
    Operator relateOperator = OperatorFactoryLocal.getInstance().getOperator(Operator.Type.Relate);
    for (int position = 0; position < addresses.size(); position++) {
        long pageAddress = addresses.getLong(position);
        int blockIndex = decodeSliceIndex(pageAddress);
        int blockPosition = decodePosition(pageAddress);
        Block block = channels.get(geometryChannel).get(blockIndex);
        // TODO Consider pushing is-null and is-empty checks into a filter below the join
        if (block.isNull(blockPosition)) {
            continue;
        }
        Slice slice = block.getSlice(blockPosition, 0, block.getSliceLength(blockPosition));
        OGCGeometry ogcGeometry = deserialize(slice);
        verifyNotNull(ogcGeometry);
        if (ogcGeometry.isEmpty()) {
            continue;
        }
        double radius = radiusChannel.map(channel -> DOUBLE.getDouble(channels.get(channel).get(blockIndex), blockPosition)).orElse(0.0);
        if (radius < 0) {
            continue;
        }
        if (radiusChannel.isEmpty()) {
            // If radiusChannel is supplied, this is a distance query, for which our acceleration won't help.
            accelerateGeometry(ogcGeometry, relateOperator);
        }
        int partition = -1;
        if (partitionChannel.isPresent()) {
            Block partitionBlock = channels.get(partitionChannel.get()).get(blockIndex);
            partition = toIntExact(INTEGER.getLong(partitionBlock, blockPosition));
        }
        rtree.insert(getEnvelope(ogcGeometry, radius), new GeometryWithPosition(ogcGeometry, partition, position));
    }
    rtree.build();
    return rtree;
}
Also used : Operator(com.esri.core.geometry.Operator) OGCGeometry(com.esri.core.geometry.ogc.OGCGeometry) Verify.verifyNotNull(com.google.common.base.Verify.verifyNotNull) GeometrySerde.deserialize(io.trino.geospatial.serde.GeometrySerde.deserialize) Slice(io.airlift.slice.Slice) Rectangle(io.trino.geospatial.Rectangle) JoinFilterFunctionCompiler(io.trino.sql.gen.JoinFilterFunctionCompiler) OGCGeometry(com.esri.core.geometry.ogc.OGCGeometry) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Type(io.trino.spi.type.Type) GeometryWithPosition(io.trino.operator.PagesRTreeIndex.GeometryWithPosition) GeometryCursor(com.esri.core.geometry.GeometryCursor) Supplier(java.util.function.Supplier) Block(io.trino.spi.block.Block) Map(java.util.Map) Math.toIntExact(java.lang.Math.toIntExact) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Operator(com.esri.core.geometry.Operator) OperatorFactoryLocal(com.esri.core.geometry.OperatorFactoryLocal) SyntheticAddress.decodePosition(io.trino.operator.SyntheticAddress.decodePosition) ItemBoundable(org.locationtech.jts.index.strtree.ItemBoundable) SpatialPredicate(io.trino.operator.SpatialIndexBuilderOperator.SpatialPredicate) EMPTY_INDEX(io.trino.operator.PagesSpatialIndex.EMPTY_INDEX) Geometry(com.esri.core.geometry.Geometry) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) DataSize(io.airlift.units.DataSize) AbstractNode(org.locationtech.jts.index.strtree.AbstractNode) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Optional(java.util.Optional) SyntheticAddress.decodeSliceIndex(io.trino.operator.SyntheticAddress.decodeSliceIndex) Envelope(org.locationtech.jts.geom.Envelope) Session(io.trino.Session) STRtree(org.locationtech.jts.index.strtree.STRtree) Slice(io.airlift.slice.Slice) GeometryWithPosition(io.trino.operator.PagesRTreeIndex.GeometryWithPosition) STRtree(org.locationtech.jts.index.strtree.STRtree) Block(io.trino.spi.block.Block)

Example 23 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class TestSignatureBinder method testFunction.

@Test
public void testFunction() {
    Signature simple = functionSignature().returnType(BOOLEAN.getTypeSignature()).argumentTypes(functionType(INTEGER.getTypeSignature(), INTEGER.getTypeSignature())).build();
    assertThat(simple).boundTo(INTEGER).fails();
    assertThat(simple).boundTo(new FunctionType(ImmutableList.of(INTEGER), INTEGER)).succeeds();
    // TODO: Support coercion of return type of lambda
    assertThat(simple).boundTo(new FunctionType(ImmutableList.of(INTEGER), SMALLINT)).withCoercion().fails();
    assertThat(simple).boundTo(new FunctionType(ImmutableList.of(INTEGER), BIGINT)).withCoercion().fails();
    Signature applyTwice = functionSignature().returnType(new TypeSignature("V")).argumentTypes(new TypeSignature("T"), functionType(new TypeSignature("T"), new TypeSignature("U")), functionType(new TypeSignature("U"), new TypeSignature("V"))).typeVariableConstraints(typeVariable("T"), typeVariable("U"), typeVariable("V")).build();
    assertThat(applyTwice).boundTo(INTEGER, INTEGER, INTEGER).fails();
    assertThat(applyTwice).boundTo(INTEGER, new FunctionType(ImmutableList.of(INTEGER), VARCHAR), new FunctionType(ImmutableList.of(VARCHAR), DOUBLE)).produces(new BoundVariables().setTypeVariable("T", INTEGER).setTypeVariable("U", VARCHAR).setTypeVariable("V", DOUBLE));
    assertThat(applyTwice).boundTo(INTEGER, new TypeSignatureProvider(functionArgumentTypes -> new FunctionType(ImmutableList.of(INTEGER), VARCHAR).getTypeSignature()), new TypeSignatureProvider(functionArgumentTypes -> new FunctionType(ImmutableList.of(VARCHAR), DOUBLE).getTypeSignature())).produces(new BoundVariables().setTypeVariable("T", INTEGER).setTypeVariable("U", VARCHAR).setTypeVariable("V", DOUBLE));
    assertThat(applyTwice).boundTo(// pass function argument to non-function position of a function
    new TypeSignatureProvider(functionArgumentTypes -> new FunctionType(ImmutableList.of(INTEGER), VARCHAR).getTypeSignature()), new TypeSignatureProvider(functionArgumentTypes -> new FunctionType(ImmutableList.of(INTEGER), VARCHAR).getTypeSignature()), new TypeSignatureProvider(functionArgumentTypes -> new FunctionType(ImmutableList.of(VARCHAR), DOUBLE).getTypeSignature())).fails();
    assertThat(applyTwice).boundTo(new TypeSignatureProvider(functionArgumentTypes -> new FunctionType(ImmutableList.of(INTEGER), VARCHAR).getTypeSignature()), // pass non-function argument to function position of a function
    INTEGER, new TypeSignatureProvider(functionArgumentTypes -> new FunctionType(ImmutableList.of(VARCHAR), DOUBLE).getTypeSignature())).fails();
    Signature flatMap = functionSignature().returnType(arrayType(new TypeSignature("T"))).argumentTypes(arrayType(new TypeSignature("T")), functionType(new TypeSignature("T"), arrayType(new TypeSignature("T")))).typeVariableConstraints(typeVariable("T")).build();
    assertThat(flatMap).boundTo(new ArrayType(INTEGER), new FunctionType(ImmutableList.of(INTEGER), new ArrayType(INTEGER))).produces(new BoundVariables().setTypeVariable("T", INTEGER));
    Signature varargApply = functionSignature().returnType(new TypeSignature("T")).argumentTypes(new TypeSignature("T"), functionType(new TypeSignature("T"), new TypeSignature("T"))).typeVariableConstraints(typeVariable("T")).setVariableArity(true).build();
    assertThat(varargApply).boundTo(INTEGER, new FunctionType(ImmutableList.of(INTEGER), INTEGER), new FunctionType(ImmutableList.of(INTEGER), INTEGER), new FunctionType(ImmutableList.of(INTEGER), INTEGER)).produces(new BoundVariables().setTypeVariable("T", INTEGER));
    assertThat(varargApply).boundTo(INTEGER, new FunctionType(ImmutableList.of(INTEGER), INTEGER), new FunctionType(ImmutableList.of(INTEGER), DOUBLE), new FunctionType(ImmutableList.of(DOUBLE), DOUBLE)).fails();
    Signature loop = functionSignature().returnType(new TypeSignature("T")).argumentTypes(new TypeSignature("T"), functionType(new TypeSignature("T"), new TypeSignature("T"))).typeVariableConstraints(typeVariable("T")).build();
    assertThat(loop).boundTo(INTEGER, new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, BIGINT).getTypeSignature())).fails();
    assertThat(loop).boundTo(INTEGER, new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, BIGINT).getTypeSignature())).withCoercion().produces(new BoundVariables().setTypeVariable("T", BIGINT));
    // TODO: Support coercion of return type of lambda
    assertThat(loop).withCoercion().boundTo(INTEGER, new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, SMALLINT).getTypeSignature())).fails();
    // TODO: Support coercion of return type of lambda
    // Without coercion support for return type of lambda, the return type of lambda must be `varchar(x)` to avoid need for coercions.
    Signature varcharApply = functionSignature().returnType(VARCHAR.getTypeSignature()).argumentTypes(VARCHAR.getTypeSignature(), functionType(VARCHAR.getTypeSignature(), new TypeSignature("varchar", TypeSignatureParameter.typeVariable("x")))).build();
    assertThat(varcharApply).withCoercion().boundTo(createVarcharType(10), new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, createVarcharType(1)).getTypeSignature())).succeeds();
    Signature sortByKey = functionSignature().returnType(arrayType(new TypeSignature("T"))).argumentTypes(arrayType(new TypeSignature("T")), functionType(new TypeSignature("T"), new TypeSignature("E"))).typeVariableConstraints(typeVariable("T"), orderableTypeParameter("E")).build();
    assertThat(sortByKey).boundTo(new ArrayType(INTEGER), new TypeSignatureProvider(paramTypes -> new FunctionType(paramTypes, VARCHAR).getTypeSignature())).produces(new BoundVariables().setTypeVariable("T", INTEGER).setTypeVariable("E", VARCHAR));
}
Also used : TypeSignatureProvider(io.trino.sql.analyzer.TypeSignatureProvider) ArrayType(io.trino.spi.type.ArrayType) TypeSignatureProvider.fromTypes(io.trino.sql.analyzer.TypeSignatureProvider.fromTypes) TypeSignatureTranslator.parseTypeSignature(io.trino.sql.analyzer.TypeSignatureTranslator.parseTypeSignature) UNKNOWN(io.trino.type.UnknownType.UNKNOWN) Test(org.testng.annotations.Test) TypeSignatureParameter.anonymousField(io.trino.spi.type.TypeSignatureParameter.anonymousField) FunctionType(io.trino.type.FunctionType) TEST_SESSION(io.trino.SessionTestUtils.TEST_SESSION) Signature.castableFromTypeParameter(io.trino.metadata.Signature.castableFromTypeParameter) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertFalse(org.testng.Assert.assertFalse) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) TypeSignature(io.trino.spi.type.TypeSignature) RowType(io.trino.spi.type.RowType) ImmutableSet(com.google.common.collect.ImmutableSet) Signature.castableToTypeParameter(io.trino.metadata.Signature.castableToTypeParameter) ArrayType(io.trino.spi.type.ArrayType) Assert.assertNotNull(org.testng.Assert.assertNotNull) String.format(java.lang.String.format) Signature.orderableTypeParameter(io.trino.metadata.Signature.orderableTypeParameter) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Signature.typeVariable(io.trino.metadata.Signature.typeVariable) Optional(java.util.Optional) TypeSignature.mapType(io.trino.spi.type.TypeSignature.mapType) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) Signature.comparableTypeParameter(io.trino.metadata.Signature.comparableTypeParameter) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) Type(io.trino.spi.type.Type) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) Assert.assertEquals(org.testng.Assert.assertEquals) HYPER_LOG_LOG(io.trino.spi.type.HyperLogLogType.HYPER_LOG_LOG) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Objects.requireNonNull(java.util.Objects.requireNonNull) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) TypeSignature.functionType(io.trino.spi.type.TypeSignature.functionType) TypeSignatureParameter.numericParameter(io.trino.spi.type.TypeSignatureParameter.numericParameter) TypeSignature.arrayType(io.trino.spi.type.TypeSignature.arrayType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) Assert.fail(org.testng.Assert.fail) Signature.withVariadicBound(io.trino.metadata.Signature.withVariadicBound) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TypeSignature.rowType(io.trino.spi.type.TypeSignature.rowType) PLANNER_CONTEXT(io.trino.sql.planner.TestingPlannerContext.PLANNER_CONTEXT) TypeSignatureProvider(io.trino.sql.analyzer.TypeSignatureProvider) Assert.assertTrue(org.testng.Assert.assertTrue) TINYINT(io.trino.spi.type.TinyintType.TINYINT) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) JSON(io.trino.type.JsonType.JSON) TypeSignatureTranslator.parseTypeSignature(io.trino.sql.analyzer.TypeSignatureTranslator.parseTypeSignature) TypeSignature(io.trino.spi.type.TypeSignature) TypeSignatureTranslator.parseTypeSignature(io.trino.sql.analyzer.TypeSignatureTranslator.parseTypeSignature) TypeSignature(io.trino.spi.type.TypeSignature) FunctionType(io.trino.type.FunctionType) Test(org.testng.annotations.Test)

Example 24 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class TestHivePageSink method writeTestFile.

private static long writeTestFile(HiveConfig config, HiveMetastore metastore, String outputPath) {
    HiveTransactionHandle transaction = new HiveTransactionHandle(false);
    HiveWriterStats stats = new HiveWriterStats();
    ConnectorPageSink pageSink = createPageSink(transaction, config, metastore, new Path("file:///" + outputPath), stats);
    List<LineItemColumn> columns = getTestColumns();
    List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(TESTING_TYPE_MANAGER)).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    int rows = 0;
    for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
        rows++;
        if (rows >= NUM_ROWS) {
            break;
        }
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            LineItemColumn column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
                    break;
                case DATE:
                    DATE.writeLong(blockBuilder, column.getDate(lineItem));
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
    }
    Page page = pageBuilder.build();
    pageSink.appendPage(page);
    getFutureValue(pageSink.finish());
    File outputDir = new File(outputPath);
    List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
    File outputFile = getOnlyElement(files);
    long length = outputFile.length();
    ConnectorPageSource pageSource = createPageSource(transaction, config, outputFile);
    List<Page> pages = new ArrayList<>();
    while (!pageSource.isFinished()) {
        Page nextPage = pageSource.getNextPage();
        if (nextPage != null) {
            pages.add(nextPage.getLoadedPage());
        }
    }
    MaterializedResult expectedResults = toMaterializedResult(getHiveSession(config), columnTypes, ImmutableList.of(page));
    MaterializedResult results = toMaterializedResult(getHiveSession(config), columnTypes, pages);
    assertEquals(results, expectedResults);
    assertEquals(round(stats.getInputPageSizeInBytes().getAllTime().getMax()), page.getRetainedSizeInBytes());
    return length;
}
Also used : Path(org.apache.hadoop.fs.Path) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) TypeOperators(io.trino.spi.type.TypeOperators) SplitWeight(io.trino.spi.SplitWeight) HiveMetastoreFactory(io.trino.plugin.hive.metastore.HiveMetastoreFactory) TpchColumnType(io.trino.tpch.TpchColumnType) Math.round(java.lang.Math.round) Slices(io.airlift.slice.Slices) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Path(org.apache.hadoop.fs.Path) LineItemColumn(io.trino.tpch.LineItemColumn) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) HiveTestUtils.getDefaultHiveRecordCursorProviders(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProviders) TestingNodeManager(io.trino.testing.TestingNodeManager) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HIVE_DATE(io.trino.plugin.hive.HiveType.HIVE_DATE) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) LineItemGenerator(io.trino.tpch.LineItemGenerator) LineItem(io.trino.tpch.LineItem) List(java.util.List) Stream(java.util.stream.Stream) BIGINT(io.trino.spi.type.BigintType.BIGINT) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) HivePageSinkMetadata(io.trino.plugin.hive.metastore.HivePageSinkMetadata) DATE(io.trino.spi.type.DateType.DATE) Joiner(com.google.common.base.Joiner) JsonCodec(io.airlift.json.JsonCodec) DIRECT_TO_TARGET_NEW_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY) HiveTestUtils.getDefaultHivePageSourceFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories) HIVE_DOUBLE(io.trino.plugin.hive.HiveType.HIVE_DOUBLE) PageBuilder(io.trino.spi.PageBuilder) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) JoinCompiler(io.trino.sql.gen.JoinCompiler) OptionalInt(java.util.OptionalInt) GroupByHashPageIndexerFactory(io.trino.operator.GroupByHashPageIndexerFactory) ArrayList(java.util.ArrayList) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) NONE(io.trino.plugin.hive.HiveCompressionCodec.NONE) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) FileHiveMetastore.createTestingFileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) BlockTypeOperators(io.trino.type.BlockTypeOperators) Properties(java.util.Properties) HIVE_LONG(io.trino.plugin.hive.HiveType.HIVE_LONG) HiveTestUtils.getDefaultHiveFileWriterFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) PAGE_SORTER(io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER) File(java.io.File) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) TpchColumnTypes(io.trino.tpch.TpchColumnTypes) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) HIVE_INT(io.trino.plugin.hive.HiveType.HIVE_INT) Collectors.toList(java.util.stream.Collectors.toList) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) BlockBuilder(io.trino.spi.block.BlockBuilder) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) LineItemColumn(io.trino.tpch.LineItemColumn) ArrayList(java.util.ArrayList) LineItem(io.trino.tpch.LineItem) Page(io.trino.spi.Page) PageBuilder(io.trino.spi.PageBuilder) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) TpchColumnType(io.trino.tpch.TpchColumnType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) File(java.io.File) LineItemGenerator(io.trino.tpch.LineItemGenerator) BlockBuilder(io.trino.spi.block.BlockBuilder)

Example 25 with INTEGER

use of io.trino.spi.type.IntegerType.INTEGER in project trino by trinodb.

the class TupleDomainParquetPredicate method getDomain.

/**
 * Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
 * Both arrays must have the same length.
 */
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue, DateTimeZone timeZone) {
    checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
    if (type.equals(BOOLEAN)) {
        boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
        boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
        if (hasTrueValues && hasFalseValues) {
            return Domain.all(type);
        }
        if (hasTrueValues) {
            return Domain.create(ValueSet.of(type, true), hasNullValue);
        }
        if (hasFalseValues) {
            return Domain.create(ValueSet.of(type, false), hasNullValue);
        }
        // All nulls case is handled earlier
        throw new VerifyException("Impossible boolean statistics");
    }
    if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(DATE) || type.equals(SMALLINT) || type.equals(TINYINT)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            long min = asLong(minimums.get(i));
            long max = asLong(maximums.get(i));
            if (isStatisticsOverflow(type, min, max)) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof DecimalType) {
        DecimalType decimalType = (DecimalType) type;
        List<Range> ranges = new ArrayList<>();
        if (decimalType.isShort()) {
            for (int i = 0; i < minimums.size(); i++) {
                Object min = minimums.get(i);
                Object max = maximums.get(i);
                long minValue = min instanceof Binary ? getShortDecimalValue(((Binary) min).getBytes()) : asLong(min);
                long maxValue = min instanceof Binary ? getShortDecimalValue(((Binary) max).getBytes()) : asLong(max);
                if (isStatisticsOverflow(type, minValue, maxValue)) {
                    return Domain.create(ValueSet.all(type), hasNullValue);
                }
                ranges.add(Range.range(type, minValue, true, maxValue, true));
            }
        } else {
            for (int i = 0; i < minimums.size(); i++) {
                Int128 min = Int128.fromBigEndian(((Binary) minimums.get(i)).getBytes());
                Int128 max = Int128.fromBigEndian(((Binary) maximums.get(i)).getBytes());
                ranges.add(Range.range(type, min, true, max, true));
            }
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(REAL)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Float min = (Float) minimums.get(i);
            Float max = (Float) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(DOUBLE)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Double min = (Double) minimums.get(i);
            Double max = (Double) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof VarcharType) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
            Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof TimestampType) {
        if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT96)) {
            TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, timeZone);
            List<Object> values = new ArrayList<>();
            for (int i = 0; i < minimums.size(); i++) {
                Object min = minimums.get(i);
                Object max = maximums.get(i);
                // available and valid in that special case
                if (!(min instanceof Binary) || !(max instanceof Binary) || !min.equals(max)) {
                    return Domain.create(ValueSet.all(type), hasNullValue);
                }
                values.add(timestampEncoder.getTimestamp(decodeInt96Timestamp((Binary) min)));
            }
            return Domain.multipleValues(type, values, hasNullValue);
        }
        if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT64)) {
            LogicalTypeAnnotation logicalTypeAnnotation = column.getPrimitiveType().getLogicalTypeAnnotation();
            if (!(logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation)) {
                // Invalid statistics. Unit and UTC adjustment are not known
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            TimestampLogicalTypeAnnotation timestampTypeAnnotation = (TimestampLogicalTypeAnnotation) logicalTypeAnnotation;
            // Bail out if the precision is not known
            if (timestampTypeAnnotation.getUnit() == null) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, DateTimeZone.UTC);
            List<Range> ranges = new ArrayList<>();
            for (int i = 0; i < minimums.size(); i++) {
                long min = (long) minimums.get(i);
                long max = (long) maximums.get(i);
                ranges.add(Range.range(type, timestampEncoder.getTimestamp(decodeInt64Timestamp(min, timestampTypeAnnotation.getUnit())), true, timestampEncoder.getTimestamp(decodeInt64Timestamp(max, timestampTypeAnnotation.getUnit())), true));
            }
            return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
        }
    }
    return Domain.create(ValueSet.all(type), hasNullValue);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) DateTimeZone(org.joda.time.DateTimeZone) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) FilterApi(org.apache.parquet.filter2.predicate.FilterApi) ByteBuffer(java.nio.ByteBuffer) INT96(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96) TrinoTimestampEncoderFactory.createTimestampEncoder(io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Slices(io.airlift.slice.Slices) Map(java.util.Map) INTEGER(io.trino.spi.type.IntegerType.INTEGER) UserDefinedPredicate(org.apache.parquet.filter2.predicate.UserDefinedPredicate) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PredicateUtils.isStatisticsOverflow(io.trino.parquet.predicate.PredicateUtils.isStatisticsOverflow) Range(io.trino.spi.predicate.Range) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) Domain(io.trino.spi.predicate.Domain) ParquetTimestampUtils.decodeInt96Timestamp(io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp) Dictionary(io.trino.parquet.dictionary.Dictionary) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Binary(org.apache.parquet.io.api.Binary) Serializable(java.io.Serializable) INT64(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64) List(java.util.List) TrinoTimestampEncoder(io.trino.plugin.base.type.TrinoTimestampEncoder) BIGINT(io.trino.spi.type.BigintType.BIGINT) LITTLE_ENDIAN(java.nio.ByteOrder.LITTLE_ENDIAN) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Optional(java.util.Optional) Operators(org.apache.parquet.filter2.predicate.Operators) DecimalType(io.trino.spi.type.DecimalType) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Slice(io.airlift.slice.Slice) Type(io.trino.spi.type.Type) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Function(java.util.function.Function) TimestampType(io.trino.spi.type.TimestampType) ArrayList(java.util.ArrayList) VarcharType(io.trino.spi.type.VarcharType) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) VerifyException(com.google.common.base.VerifyException) Int128(io.trino.spi.type.Int128) Statistics(org.apache.parquet.column.statistics.Statistics) DictionaryPage(io.trino.parquet.DictionaryPage) ParquetTimestampUtils.decodeInt64Timestamp(io.trino.parquet.ParquetTimestampUtils.decodeInt64Timestamp) ParquetTypeUtils.getShortDecimalValue(io.trino.parquet.ParquetTypeUtils.getShortDecimalValue) TupleDomain(io.trino.spi.predicate.TupleDomain) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TINYINT(io.trino.spi.type.TinyintType.TINYINT) VarcharType(io.trino.spi.type.VarcharType) ArrayList(java.util.ArrayList) Range(io.trino.spi.predicate.Range) VerifyException(com.google.common.base.VerifyException) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Slice(io.airlift.slice.Slice) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) DecimalType(io.trino.spi.type.DecimalType) TimestampType(io.trino.spi.type.TimestampType) Binary(org.apache.parquet.io.api.Binary) Int128(io.trino.spi.type.Int128)

Aggregations

INTEGER (io.trino.spi.type.IntegerType.INTEGER)29 ImmutableList (com.google.common.collect.ImmutableList)27 Optional (java.util.Optional)24 Type (io.trino.spi.type.Type)22 List (java.util.List)20 ImmutableMap (com.google.common.collect.ImmutableMap)19 BIGINT (io.trino.spi.type.BigintType.BIGINT)19 DOUBLE (io.trino.spi.type.DoubleType.DOUBLE)16 Map (java.util.Map)16 Test (org.testng.annotations.Test)16 String.format (java.lang.String.format)14 ImmutableSet (com.google.common.collect.ImmutableSet)12 VARCHAR (io.trino.spi.type.VarcharType.VARCHAR)11 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)10 Page (io.trino.spi.Page)10 Slice (io.airlift.slice.Slice)9 Assert.assertEquals (org.testng.Assert.assertEquals)9 Assert.assertFalse (org.testng.Assert.assertFalse)9 RowType (io.trino.spi.type.RowType)8 Session (io.trino.Session)7