Search in sources :

Example 36 with ColumnMetadata

use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.

the class TestHiveGlueMetastore method doGetPartitionsFilterTest.

/**
 * @param filterList should be same sized list as expectedValuesList
 * @param expectedValuesList
 * @throws Exception
 */
private void doGetPartitionsFilterTest(List<ColumnMetadata> columnMetadata, List<String> partitionColumnNames, List<PartitionValues> partitionValues, List<TupleDomain<String>> filterList, List<List<PartitionValues>> expectedValuesList) throws Exception {
    try (CloseableSchamaTableName closeableTableName = new CloseableSchamaTableName(temporaryTable(("get_partitions")))) {
        SchemaTableName tableName = closeableTableName.getSchemaTableName();
        createDummyPartitionedTable(tableName, columnMetadata, partitionColumnNames, partitionValues);
        HiveMetastore metastoreClient = getMetastoreClient();
        for (int i = 0; i < filterList.size(); i++) {
            TupleDomain<String> filter = filterList.get(i);
            List<PartitionValues> expectedValues = expectedValuesList.get(i);
            List<String> expectedResults = expectedValues.stream().map(expectedPartitionValues -> makePartName(partitionColumnNames, expectedPartitionValues.getValues())).collect(toImmutableList());
            Optional<List<String>> partitionNames = metastoreClient.getPartitionNamesByFilter(tableName.getSchemaName(), tableName.getTableName(), partitionColumnNames, filter);
            assertTrue(partitionNames.isPresent());
            assertEquals(partitionNames.get(), expectedResults, format("lists \nactual: %s\nexpected: %s\nmismatch for filter %s (input index %d)\n", partitionNames.get(), expectedResults, filter, i));
        }
    }
}
Also used : Arrays(java.util.Arrays) DateType(io.trino.spi.type.DateType) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) NUMBER_OF_DISTINCT_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) DeleteTableRequest(com.amazonaws.services.glue.model.DeleteTableRequest) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Map(java.util.Map) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) ENGLISH(java.util.Locale.ENGLISH) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) Table(io.trino.plugin.hive.metastore.Table) Range(io.trino.spi.predicate.Range) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TableInput(com.amazonaws.services.glue.model.TableInput) UpdateTableRequest(com.amazonaws.services.glue.model.UpdateTableRequest) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) SchemaTableName(io.trino.spi.connector.SchemaTableName) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) AWSGlueAsync(com.amazonaws.services.glue.AWSGlueAsync) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) ArrayList(java.util.ArrayList) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) CreateTableRequest(com.amazonaws.services.glue.model.CreateTableRequest) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) NUMBER_OF_NON_NULL_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) Executor(java.util.concurrent.Executor) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) File(java.io.File) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) AwsSdkUtil.getPaginatedResults(io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) Database(com.amazonaws.services.glue.model.Database) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ICEBERG_TABLE_TYPE_VALUE(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE) ICEBERG_TABLE_TYPE_NAME(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME) GetDatabasesRequest(com.amazonaws.services.glue.model.GetDatabasesRequest) Block(io.trino.spi.block.Block) SmallintType(io.trino.spi.type.SmallintType) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) AWSGlueAsyncClientBuilder(com.amazonaws.services.glue.AWSGlueAsyncClientBuilder) ImmutableMap(com.google.common.collect.ImmutableMap) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) List(java.util.List) DECIMAL_TYPE(io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.DECIMAL_TYPE) BIGINT(io.trino.spi.type.BigintType.BIGINT) PartitionFilterBuilder.decimalOf(io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.decimalOf) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) TableStatisticType(io.trino.spi.statistics.TableStatisticType) System.currentTimeMillis(java.lang.System.currentTimeMillis) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Assert.assertEquals(org.testng.Assert.assertEquals) BigintType(io.trino.spi.type.BigintType) VarcharType(io.trino.spi.type.VarcharType) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) DAYS(java.util.concurrent.TimeUnit.DAYS) GlueInputConverter(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter) DeleteDatabaseRequest(com.amazonaws.services.glue.model.DeleteDatabaseRequest) TinyintType(io.trino.spi.type.TinyintType) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) IntegerType(io.trino.spi.type.IntegerType) DELTA_LAKE_PROVIDER(io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) TupleDomain(io.trino.spi.predicate.TupleDomain) UUID.randomUUID(java.util.UUID.randomUUID) AbstractTestHiveLocal(io.trino.plugin.hive.AbstractTestHiveLocal) Assert.assertTrue(org.testng.Assert.assertTrue) Collections(java.util.Collections) MIN_VALUE(io.trino.spi.statistics.ColumnStatisticType.MIN_VALUE) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SchemaTableName(io.trino.spi.connector.SchemaTableName)

Example 37 with ColumnMetadata

use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.

the class TestHiveGlueMetastore method testUpdatePartitionedStatisticsOnCreate.

@Test
public void testUpdatePartitionedStatisticsOnCreate() {
    SchemaTableName tableName = temporaryTable("update_partitioned_statistics_create");
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT), new ColumnMetadata("part_column", BigintType.BIGINT));
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE, ImmutableList.of("part_column")));
        ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
        MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT, BigintType.BIGINT).row(1L, 1L).row(2L, 1L).row(3L, 1L).row(4L, 2L).row(5L, 2L).build();
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // prepare statistics
        ComputedStatistics statistics1 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(1))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(3)).build();
        ComputedStatistics statistics2 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(2))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(4)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(2)).build();
        // finish CTAS
        metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics1, statistics2));
        transaction.commit();
    } finally {
        dropTable(tableName);
    }
}
Also used : ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Example 38 with ColumnMetadata

use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.

the class TestHiveGlueMetastore method createDummyPartitionedTable.

private void createDummyPartitionedTable(SchemaTableName tableName, List<ColumnMetadata> columns, List<String> partitionColumnNames, List<PartitionValues> partitionValues) throws Exception {
    doCreateEmptyTable(tableName, ORC, columns, partitionColumnNames);
    HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient());
    Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    List<PartitionWithStatistics> partitions = new ArrayList<>();
    List<String> partitionNames = new ArrayList<>();
    partitionValues.stream().map(partitionValue -> makePartName(partitionColumnNames, partitionValue.values)).forEach(partitionName -> {
        partitions.add(new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty()));
        partitionNames.add(partitionName);
    });
    metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions);
    partitionNames.forEach(partitionName -> metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName, currentStatistics -> EMPTY_TABLE_STATISTICS));
}
Also used : Arrays(java.util.Arrays) DateType(io.trino.spi.type.DateType) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) NUMBER_OF_DISTINCT_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) DeleteTableRequest(com.amazonaws.services.glue.model.DeleteTableRequest) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Map(java.util.Map) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) ENGLISH(java.util.Locale.ENGLISH) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) Table(io.trino.plugin.hive.metastore.Table) Range(io.trino.spi.predicate.Range) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TableInput(com.amazonaws.services.glue.model.TableInput) UpdateTableRequest(com.amazonaws.services.glue.model.UpdateTableRequest) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) SchemaTableName(io.trino.spi.connector.SchemaTableName) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) AWSGlueAsync(com.amazonaws.services.glue.AWSGlueAsync) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) ArrayList(java.util.ArrayList) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) CreateTableRequest(com.amazonaws.services.glue.model.CreateTableRequest) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) NUMBER_OF_NON_NULL_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) Executor(java.util.concurrent.Executor) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) File(java.io.File) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) AwsSdkUtil.getPaginatedResults(io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) Database(com.amazonaws.services.glue.model.Database) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ICEBERG_TABLE_TYPE_VALUE(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE) ICEBERG_TABLE_TYPE_NAME(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME) GetDatabasesRequest(com.amazonaws.services.glue.model.GetDatabasesRequest) Block(io.trino.spi.block.Block) SmallintType(io.trino.spi.type.SmallintType) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) AWSGlueAsyncClientBuilder(com.amazonaws.services.glue.AWSGlueAsyncClientBuilder) ImmutableMap(com.google.common.collect.ImmutableMap) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) List(java.util.List) DECIMAL_TYPE(io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.DECIMAL_TYPE) BIGINT(io.trino.spi.type.BigintType.BIGINT) PartitionFilterBuilder.decimalOf(io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.decimalOf) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) TableStatisticType(io.trino.spi.statistics.TableStatisticType) System.currentTimeMillis(java.lang.System.currentTimeMillis) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Assert.assertEquals(org.testng.Assert.assertEquals) BigintType(io.trino.spi.type.BigintType) VarcharType(io.trino.spi.type.VarcharType) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) DAYS(java.util.concurrent.TimeUnit.DAYS) GlueInputConverter(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter) DeleteDatabaseRequest(com.amazonaws.services.glue.model.DeleteDatabaseRequest) TinyintType(io.trino.spi.type.TinyintType) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) IntegerType(io.trino.spi.type.IntegerType) DELTA_LAKE_PROVIDER(io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) TupleDomain(io.trino.spi.predicate.TupleDomain) UUID.randomUUID(java.util.UUID.randomUUID) AbstractTestHiveLocal(io.trino.plugin.hive.AbstractTestHiveLocal) Assert.assertTrue(org.testng.Assert.assertTrue) Collections(java.util.Collections) MIN_VALUE(io.trino.spi.statistics.ColumnStatisticType.MIN_VALUE) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) ArrayList(java.util.ArrayList) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure)

Example 39 with ColumnMetadata

use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.

the class TestHiveGlueMetastore method testInvalidColumnStatisticsMetadata.

@Test
public void testInvalidColumnStatisticsMetadata() throws Exception {
    SchemaTableName tableName = temporaryTable("test_statistics_invalid_column_metadata");
    try {
        List<ColumnMetadata> columns = List.of(new ColumnMetadata("column1", BIGINT));
        Map<String, HiveColumnStatistics> columnStatistics = Map.of("column1", INTEGER_COLUMN_STATISTICS);
        PartitionStatistics partitionStatistics = PartitionStatistics.builder().setBasicStatistics(HIVE_BASIC_STATISTICS).setColumnStatistics(columnStatistics).build();
        doCreateEmptyTable(tableName, ORC, columns);
        // set table statistics for column1
        metastore.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), NO_ACID_TRANSACTION, actualStatistics -> {
            assertThat(actualStatistics).isEqualTo(EMPTY_TABLE_STATISTICS);
            return partitionStatistics;
        });
        Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).get();
        TableInput tableInput = GlueInputConverter.convertTable(table);
        tableInput.setParameters(ImmutableMap.<String, String>builder().putAll(tableInput.getParameters()).put("column_stats_bad_data", "bad data").buildOrThrow());
        getGlueClient().updateTable(new UpdateTableRequest().withDatabaseName(tableName.getSchemaName()).withTableInput(tableInput));
        assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())).isEqualTo(partitionStatistics);
    } finally {
        dropTable(tableName);
    }
}
Also used : TableInput(com.amazonaws.services.glue.model.TableInput) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) UpdateTableRequest(com.amazonaws.services.glue.model.UpdateTableRequest) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) SchemaTableName(io.trino.spi.connector.SchemaTableName) Test(org.testng.annotations.Test)

Example 40 with ColumnMetadata

use of io.trino.spi.connector.ColumnMetadata in project trino by trinodb.

the class TestHiveGlueMetastore method testUpdateStatisticsOnCreate.

@Test
public void testUpdateStatisticsOnCreate() {
    SchemaTableName tableName = temporaryTable("update_statistics_create");
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT));
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE));
        ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
        MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT).row(1L).row(2L).row(3L).row(4L).row(5L).build();
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // prepare statistics
        ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(5)).build();
        // finish CTAS
        metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics));
        transaction.commit();
    } finally {
        dropTable(tableName);
    }
}
Also used : ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Aggregations

ColumnMetadata (io.trino.spi.connector.ColumnMetadata)154 SchemaTableName (io.trino.spi.connector.SchemaTableName)75 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)73 Test (org.testng.annotations.Test)64 ImmutableList (com.google.common.collect.ImmutableList)63 ImmutableMap (com.google.common.collect.ImmutableMap)55 List (java.util.List)45 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)43 Optional (java.util.Optional)43 ConnectorSession (io.trino.spi.connector.ConnectorSession)41 TrinoException (io.trino.spi.TrinoException)38 ColumnHandle (io.trino.spi.connector.ColumnHandle)38 Map (java.util.Map)38 Type (io.trino.spi.type.Type)35 Constraint (io.trino.spi.connector.Constraint)32 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)31 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)30 BIGINT (io.trino.spi.type.BigintType.BIGINT)30 HashMap (java.util.HashMap)27 Set (java.util.Set)27