Search in sources :

Example 1 with ColumnStatisticMetadata

use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.

the class TestHiveGlueMetastore method testUpdatePartitionedStatisticsOnCreate.

@Test
public void testUpdatePartitionedStatisticsOnCreate() {
    SchemaTableName tableName = temporaryTable("update_partitioned_statistics_create");
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT), new ColumnMetadata("part_column", BigintType.BIGINT));
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE, ImmutableList.of("part_column")));
        ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
        MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT, BigintType.BIGINT).row(1L, 1L).row(2L, 1L).row(3L, 1L).row(4L, 2L).row(5L, 2L).build();
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // prepare statistics
        ComputedStatistics statistics1 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(1))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(3)).build();
        ComputedStatistics statistics2 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(2))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(4)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(2)).build();
        // finish CTAS
        metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics1, statistics2));
        transaction.commit();
    } finally {
        dropTable(tableName);
    }
}
Also used : ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Example 2 with ColumnStatisticMetadata

use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.

the class TestHiveGlueMetastore method testUpdateStatisticsOnCreate.

@Test
public void testUpdateStatisticsOnCreate() {
    SchemaTableName tableName = temporaryTable("update_statistics_create");
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT));
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE));
        ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
        MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT).row(1L).row(2L).row(3L).row(4L).row(5L).build();
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // prepare statistics
        ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(5)).build();
        // finish CTAS
        metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics));
        transaction.commit();
    } finally {
        dropTable(tableName);
    }
}
Also used : ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Example 3 with ColumnStatisticMetadata

use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.

the class DeltaLakeMetadata method getStatisticsCollectionMetadata.

@Override
public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) {
    ImmutableSet.Builder<ColumnStatisticMetadata> columnStatistics = ImmutableSet.builder();
    Optional<Set<String>> analyzeColumnNames = DeltaLakeTableProperties.getAnalyzeColumns(tableMetadata.getProperties());
    tableMetadata.getColumns().stream().filter(DeltaLakeMetadata::shouldCollectExtendedStatistics).filter(columnMetadata -> analyzeColumnNames.map(columnNames -> columnNames.contains(columnMetadata.getName())).orElse(true)).map(columnMetadata -> new ColumnStatisticMetadata(columnMetadata.getName(), NUMBER_OF_DISTINCT_VALUES_SUMMARY)).forEach(columnStatistics::add);
    // collect max(file modification time) for sake of incremental ANALYZE
    columnStatistics.add(new ColumnStatisticMetadata(FILE_MODIFIED_TIME_COLUMN_NAME, MAX_VALUE));
    return new TableStatisticsMetadata(columnStatistics.build(), ImmutableSet.of(), ImmutableList.of());
}
Also used : TransactionLogUtil.getTransactionLogDir(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir) FileSystem(org.apache.hadoop.fs.FileSystem) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) DeltaLakeSchemaSupport.validateType(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TransactionLogWriterFactory(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ValueSet.ofRanges(io.trino.spi.predicate.ValueSet.ofRanges) Column(io.trino.plugin.hive.metastore.Column) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) PARTITIONED_BY_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.PARTITIONED_BY_PROPERTY) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TABLE_PROVIDER_PROPERTY(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY) HiveWriteUtils.pathExists(io.trino.plugin.hive.util.HiveWriteUtils.pathExists) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) HyperLogLog(io.airlift.stats.cardinality.HyperLogLog) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) Predicate.not(java.util.function.Predicate.not) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.ANALYZE_COLUMNS_PROPERTY) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) TransactionLogParser.getMandatoryCurrentVersion(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.getMandatoryCurrentVersion) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeTableProcedureId(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) DeltaLakeTableProperties.getLocation(io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation) Range.greaterThanOrEqual(io.trino.spi.predicate.Range.greaterThanOrEqual) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) HiveType(io.trino.plugin.hive.HiveType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) DeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.DeltaLakeStatisticsAccess) DeltaLakeSchemaSupport.extractPartitionColumns(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) DeltaLakeSchemaSupport.serializeStatsAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson) Nullable(javax.annotation.Nullable) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) MapType(io.trino.spi.type.MapType) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) CheckpointWriterManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriterManager) ROW_ID_COLUMN_TYPE(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_TYPE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) DeltaLakeSessionProperties.isTableStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled) LOCATION_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.LOCATION_PROPERTY) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) TINYINT(io.trino.spi.type.TinyintType.TINYINT) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) NodeManager(io.trino.spi.NodeManager) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) SYNTHESIZED(io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED) TABLE_PROVIDER_VALUE(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) HyperLogLogType(io.trino.spi.type.HyperLogLogType) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) Range.range(io.trino.spi.predicate.Range.range) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) Collection(java.util.Collection) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Instant(java.time.Instant) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ROW_ID_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) Assignment(io.trino.spi.connector.Assignment) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) DecimalType(io.trino.spi.type.DecimalType) OPTIMIZE(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE) JsonCodec(io.airlift.json.JsonCodec) Comparators(com.google.common.collect.Comparators) Constraint(io.trino.spi.connector.Constraint) Range.lessThanOrEqual(io.trino.spi.predicate.Range.lessThanOrEqual) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Logger(io.airlift.log.Logger) DeltaLakeSchemaSupport.serializeSchemaAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeSchemaAsJson) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) CHECKPOINT_INTERVAL_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY) StorageFormat.create(io.trino.plugin.hive.metastore.StorageFormat.create) MetadataEntry.buildDeltaMetadataConfiguration(io.trino.plugin.deltalake.transactionlog.MetadataEntry.buildDeltaMetadataConfiguration) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TupleDomain(io.trino.spi.predicate.TupleDomain) DeltaLakeTableProperties.getPartitionedBy(io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SchemaTableName.schemaTableName(io.trino.spi.connector.SchemaTableName.schemaTableName) UUID.randomUUID(java.util.UUID.randomUUID) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) NUMBER_OF_DISTINCT_VALUES_SUMMARY(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES_SUMMARY) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet)

Example 4 with ColumnStatisticMetadata

use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.

the class StatisticsAggregationPlanner method createStatisticsAggregation.

public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMetadata statisticsMetadata, Map<String, Symbol> columnToSymbolMap) {
    StatisticAggregationsDescriptor.Builder<Symbol> descriptor = StatisticAggregationsDescriptor.builder();
    List<String> groupingColumns = statisticsMetadata.getGroupingColumns();
    List<Symbol> groupingSymbols = groupingColumns.stream().map(columnToSymbolMap::get).collect(toImmutableList());
    for (int i = 0; i < groupingSymbols.size(); i++) {
        descriptor.addGrouping(groupingColumns.get(i), groupingSymbols.get(i));
    }
    ImmutableMap.Builder<Symbol, AggregationNode.Aggregation> aggregations = ImmutableMap.builder();
    for (TableStatisticType type : statisticsMetadata.getTableStatistics()) {
        if (type != ROW_COUNT) {
            throw new TrinoException(NOT_SUPPORTED, "Table-wide statistic type not supported: " + type);
        }
        AggregationNode.Aggregation aggregation = new AggregationNode.Aggregation(metadata.resolveFunction(session, QualifiedName.of("count"), ImmutableList.of()), ImmutableList.of(), false, Optional.empty(), Optional.empty(), Optional.empty());
        Symbol symbol = symbolAllocator.newSymbol("rowCount", BIGINT);
        aggregations.put(symbol, aggregation);
        descriptor.addTableStatistic(ROW_COUNT, symbol);
    }
    for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) {
        String columnName = columnStatisticMetadata.getColumnName();
        ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType();
        Symbol inputSymbol = columnToSymbolMap.get(columnName);
        verifyNotNull(inputSymbol, "inputSymbol is null");
        Type inputType = symbolAllocator.getTypes().get(inputSymbol);
        verifyNotNull(inputType, "inputType is null for symbol: %s", inputSymbol);
        ColumnStatisticsAggregation aggregation = createColumnAggregation(statisticType, inputSymbol, inputType);
        Symbol symbol = symbolAllocator.newSymbol(statisticType + ":" + columnName, aggregation.getOutputType());
        aggregations.put(symbol, aggregation.getAggregation());
        descriptor.addColumnStatistic(columnStatisticMetadata, symbol);
    }
    StatisticAggregations aggregation = new StatisticAggregations(aggregations.buildOrThrow(), groupingSymbols);
    return new TableStatisticAggregation(aggregation, descriptor.build());
}
Also used : ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) AggregationNode(io.trino.sql.planner.plan.AggregationNode) ImmutableMap(com.google.common.collect.ImmutableMap) StatisticAggregations(io.trino.sql.planner.plan.StatisticAggregations) Type(io.trino.spi.type.Type) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) TableStatisticType(io.trino.spi.statistics.TableStatisticType) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) TrinoException(io.trino.spi.TrinoException) StatisticAggregationsDescriptor(io.trino.sql.planner.plan.StatisticAggregationsDescriptor) TableStatisticType(io.trino.spi.statistics.TableStatisticType)

Example 5 with ColumnStatisticMetadata

use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.

the class TestStatistics method testFromComputedStatistics.

@Test
public void testFromComputedStatistics() {
    Function<Integer, Block> singleIntegerValueBlock = value -> BigintType.BIGINT.createBlockBuilder(null, 1).writeLong(value).build();
    ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleIntegerValueBlock.apply(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("b_column", NUMBER_OF_NON_NULL_VALUES), singleIntegerValueBlock.apply(4)).build();
    Map<String, Type> columnTypes = ImmutableMap.of("a_column", INTEGER, "b_column", VARCHAR);
    Map<String, HiveColumnStatistics> columnStatistics = Statistics.fromComputedStatistics(statistics.getColumnStatistics(), columnTypes, 5);
    assertThat(columnStatistics).hasSize(2);
    assertThat(columnStatistics.keySet()).contains("a_column", "b_column");
    assertThat(columnStatistics.get("a_column")).isEqualTo(HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(1), OptionalLong.of(5))).setNullsCount(0).setDistinctValuesCount(5).build());
    assertThat(columnStatistics.get("b_column")).isEqualTo(HiveColumnStatistics.builder().setNullsCount(1).build());
}
Also used : DateStatistics(io.trino.plugin.hive.metastore.DateStatistics) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Type(io.trino.spi.type.Type) OptionalDouble(java.util.OptionalDouble) HiveColumnStatistics.createBinaryColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) Test(org.testng.annotations.Test) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) NUMBER_OF_DISTINCT_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) Function(java.util.function.Function) Float.floatToIntBits(java.lang.Float.floatToIntBits) ADD(io.trino.plugin.hive.util.Statistics.ReduceOperator.ADD) Utils.nativeValueToBlock(io.trino.spi.predicate.Utils.nativeValueToBlock) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) BigintType(io.trino.spi.type.BigintType) BigDecimal(java.math.BigDecimal) OptionalLong(java.util.OptionalLong) Statistics.merge(io.trino.plugin.hive.util.Statistics.merge) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Block(io.trino.spi.block.Block) Map(java.util.Map) Statistics.createHiveColumnStatistics(io.trino.plugin.hive.util.Statistics.createHiveColumnStatistics) INTEGER(io.trino.spi.type.IntegerType.INTEGER) BooleanStatistics(io.trino.plugin.hive.metastore.BooleanStatistics) Statistics.reduce(io.trino.plugin.hive.util.Statistics.reduce) NUMBER_OF_NON_NULL_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) IntegerStatistics(io.trino.plugin.hive.metastore.IntegerStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) ImmutableMap(com.google.common.collect.ImmutableMap) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) DecimalStatistics(io.trino.plugin.hive.metastore.DecimalStatistics) DoubleStatistics(io.trino.plugin.hive.metastore.DoubleStatistics) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveColumnStatistics.createBooleanColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) LocalDate(java.time.LocalDate) SUBTRACT(io.trino.plugin.hive.util.Statistics.ReduceOperator.SUBTRACT) Optional(java.util.Optional) TableStatisticType(io.trino.spi.statistics.TableStatisticType) MIN_VALUE(io.trino.spi.statistics.ColumnStatisticType.MIN_VALUE) REAL(io.trino.spi.type.RealType.REAL) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) Type(io.trino.spi.type.Type) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) BigintType(io.trino.spi.type.BigintType) TableStatisticType(io.trino.spi.statistics.TableStatisticType) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) Utils.nativeValueToBlock(io.trino.spi.predicate.Utils.nativeValueToBlock) Block(io.trino.spi.block.Block) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Statistics.createHiveColumnStatistics(io.trino.plugin.hive.util.Statistics.createHiveColumnStatistics) IntegerStatistics(io.trino.plugin.hive.metastore.IntegerStatistics) Test(org.testng.annotations.Test)

Aggregations

ColumnStatisticMetadata (io.trino.spi.statistics.ColumnStatisticMetadata)9 Test (org.testng.annotations.Test)5 ColumnStatisticType (io.trino.spi.statistics.ColumnStatisticType)4 ComputedStatistics (io.trino.spi.statistics.ComputedStatistics)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 Slice (io.airlift.slice.Slice)3 Block (io.trino.spi.block.Block)3 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)3 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)3 ConnectorOutputTableHandle (io.trino.spi.connector.ConnectorOutputTableHandle)3 ConnectorSession (io.trino.spi.connector.ConnectorSession)3 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)3 SchemaTableName (io.trino.spi.connector.SchemaTableName)3 Type (io.trino.spi.type.Type)3 ImmutableList (com.google.common.collect.ImmutableList)2 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)2 ConnectorPageSink (io.trino.spi.connector.ConnectorPageSink)2 TableStatisticType (io.trino.spi.statistics.TableStatisticType)2 StatisticAggregationsDescriptor (io.trino.sql.planner.plan.StatisticAggregationsDescriptor)2 MaterializedResult (io.trino.testing.MaterializedResult)2