Search in sources :

Example 1 with HiveMetastoreClosure

use of io.trino.plugin.hive.HiveMetastoreClosure in project trino by trinodb.

the class CreateEmptyPartitionProcedure method doCreateEmptyPartition.

private void doCreateEmptyPartition(ConnectorSession session, ConnectorAccessControl accessControl, String schemaName, String tableName, List<String> partitionColumnNames, List<String> partitionValues) {
    TransactionalMetadata hiveMetadata = hiveMetadataFactory.create(session.getIdentity(), true);
    HiveTableHandle tableHandle = (HiveTableHandle) hiveMetadata.getTableHandle(session, new SchemaTableName(schemaName, tableName));
    if (tableHandle == null) {
        throw new TrinoException(INVALID_PROCEDURE_ARGUMENT, format("Table '%s' does not exist", new SchemaTableName(schemaName, tableName)));
    }
    accessControl.checkCanInsertIntoTable(null, new SchemaTableName(schemaName, tableName));
    List<String> actualPartitionColumnNames = tableHandle.getPartitionColumns().stream().map(HiveColumnHandle::getName).collect(toImmutableList());
    if (!Objects.equals(partitionColumnNames, actualPartitionColumnNames)) {
        throw new TrinoException(INVALID_PROCEDURE_ARGUMENT, "Provided partition column names do not match actual partition column names: " + actualPartitionColumnNames);
    }
    HiveMetastoreClosure metastore = hiveMetadata.getMetastore().unsafeGetRawHiveMetastoreClosure();
    if (metastore.getPartition(schemaName, tableName, partitionValues).isPresent()) {
        throw new TrinoException(ALREADY_EXISTS, "Partition already exists");
    }
    HiveInsertTableHandle hiveInsertTableHandle = (HiveInsertTableHandle) hiveMetadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
    String partitionName = FileUtils.makePartName(actualPartitionColumnNames, partitionValues);
    WriteInfo writeInfo = locationService.getPartitionWriteInfo(hiveInsertTableHandle.getLocationHandle(), Optional.empty(), partitionName);
    Slice serializedPartitionUpdate = Slices.wrappedBuffer(partitionUpdateJsonCodec.toJsonBytes(new PartitionUpdate(partitionName, UpdateMode.NEW, writeInfo.getWritePath(), writeInfo.getTargetPath(), ImmutableList.of(), 0, 0, 0)));
    hiveMetadata.finishInsert(session, hiveInsertTableHandle, ImmutableList.of(serializedPartitionUpdate), ImmutableList.of());
    hiveMetadata.commit();
}
Also used : HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) Slice(io.airlift.slice.Slice) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) TransactionalMetadata(io.trino.plugin.hive.TransactionalMetadata) TrinoException(io.trino.spi.TrinoException) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) SchemaTableName(io.trino.spi.connector.SchemaTableName) HiveInsertTableHandle(io.trino.plugin.hive.HiveInsertTableHandle) PartitionUpdate(io.trino.plugin.hive.PartitionUpdate)

Example 2 with HiveMetastoreClosure

use of io.trino.plugin.hive.HiveMetastoreClosure in project trino by trinodb.

the class TestCachingHiveMetastore method testUpdatePartitionStatistics.

@Test
public void testUpdatePartitionStatistics() {
    assertEquals(mockClient.getAccessCount(), 0);
    HiveMetastoreClosure hiveMetastoreClosure = new HiveMetastoreClosure(metastore);
    Table table = hiveMetastoreClosure.getTable(TEST_DATABASE, TEST_TABLE).get();
    assertEquals(mockClient.getAccessCount(), 1);
    hiveMetastoreClosure.updatePartitionStatistics(table.getDatabaseName(), table.getTableName(), TEST_PARTITION1, identity());
    assertEquals(mockClient.getAccessCount(), 5);
}
Also used : Table(io.trino.plugin.hive.metastore.Table) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) Test(org.testng.annotations.Test)

Example 3 with HiveMetastoreClosure

use of io.trino.plugin.hive.HiveMetastoreClosure in project trino by trinodb.

the class TestHiveGlueMetastore method createDummyPartitionedTable.

private void createDummyPartitionedTable(SchemaTableName tableName, List<ColumnMetadata> columns, List<String> partitionColumnNames, List<PartitionValues> partitionValues) throws Exception {
    doCreateEmptyTable(tableName, ORC, columns, partitionColumnNames);
    HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient());
    Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    List<PartitionWithStatistics> partitions = new ArrayList<>();
    List<String> partitionNames = new ArrayList<>();
    partitionValues.stream().map(partitionValue -> makePartName(partitionColumnNames, partitionValue.values)).forEach(partitionName -> {
        partitions.add(new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty()));
        partitionNames.add(partitionName);
    });
    metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions);
    partitionNames.forEach(partitionName -> metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName, currentStatistics -> EMPTY_TABLE_STATISTICS));
}
Also used : Arrays(java.util.Arrays) DateType(io.trino.spi.type.DateType) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) NUMBER_OF_DISTINCT_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) DeleteTableRequest(com.amazonaws.services.glue.model.DeleteTableRequest) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Map(java.util.Map) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) ENGLISH(java.util.Locale.ENGLISH) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) Table(io.trino.plugin.hive.metastore.Table) Range(io.trino.spi.predicate.Range) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TableInput(com.amazonaws.services.glue.model.TableInput) UpdateTableRequest(com.amazonaws.services.glue.model.UpdateTableRequest) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) SchemaTableName(io.trino.spi.connector.SchemaTableName) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) AWSGlueAsync(com.amazonaws.services.glue.AWSGlueAsync) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) ArrayList(java.util.ArrayList) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) CreateTableRequest(com.amazonaws.services.glue.model.CreateTableRequest) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) NUMBER_OF_NON_NULL_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) Executor(java.util.concurrent.Executor) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) File(java.io.File) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) AwsSdkUtil.getPaginatedResults(io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) Database(com.amazonaws.services.glue.model.Database) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ICEBERG_TABLE_TYPE_VALUE(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE) ICEBERG_TABLE_TYPE_NAME(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME) GetDatabasesRequest(com.amazonaws.services.glue.model.GetDatabasesRequest) Block(io.trino.spi.block.Block) SmallintType(io.trino.spi.type.SmallintType) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) AWSGlueAsyncClientBuilder(com.amazonaws.services.glue.AWSGlueAsyncClientBuilder) ImmutableMap(com.google.common.collect.ImmutableMap) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) List(java.util.List) DECIMAL_TYPE(io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.DECIMAL_TYPE) BIGINT(io.trino.spi.type.BigintType.BIGINT) PartitionFilterBuilder.decimalOf(io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.decimalOf) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) TableStatisticType(io.trino.spi.statistics.TableStatisticType) System.currentTimeMillis(java.lang.System.currentTimeMillis) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Assert.assertEquals(org.testng.Assert.assertEquals) BigintType(io.trino.spi.type.BigintType) VarcharType(io.trino.spi.type.VarcharType) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) DAYS(java.util.concurrent.TimeUnit.DAYS) GlueInputConverter(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter) DeleteDatabaseRequest(com.amazonaws.services.glue.model.DeleteDatabaseRequest) TinyintType(io.trino.spi.type.TinyintType) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) IntegerType(io.trino.spi.type.IntegerType) DELTA_LAKE_PROVIDER(io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) TupleDomain(io.trino.spi.predicate.TupleDomain) UUID.randomUUID(java.util.UUID.randomUUID) AbstractTestHiveLocal(io.trino.plugin.hive.AbstractTestHiveLocal) Assert.assertTrue(org.testng.Assert.assertTrue) Collections(java.util.Collections) MIN_VALUE(io.trino.spi.statistics.ColumnStatisticType.MIN_VALUE) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) ArrayList(java.util.ArrayList) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure)

Example 4 with HiveMetastoreClosure

use of io.trino.plugin.hive.HiveMetastoreClosure in project trino by trinodb.

the class DropStatsProcedure method doDropStats.

private void doDropStats(ConnectorSession session, ConnectorAccessControl accessControl, String schema, String table, List<?> partitionValues) {
    TransactionalMetadata hiveMetadata = hiveMetadataFactory.create(session.getIdentity(), true);
    HiveTableHandle handle = (HiveTableHandle) hiveMetadata.getTableHandle(session, new SchemaTableName(schema, table));
    if (handle == null) {
        throw new TrinoException(INVALID_PROCEDURE_ARGUMENT, format("Table '%s' does not exist", new SchemaTableName(schema, table)));
    }
    accessControl.checkCanInsertIntoTable(null, new SchemaTableName(schema, table));
    Map<String, ColumnHandle> columns = hiveMetadata.getColumnHandles(session, handle);
    List<String> partitionColumns = columns.values().stream().map(HiveColumnHandle.class::cast).filter(HiveColumnHandle::isPartitionKey).map(HiveColumnHandle::getName).collect(toImmutableList());
    HiveMetastoreClosure metastore = hiveMetadata.getMetastore().unsafeGetRawHiveMetastoreClosure();
    if (partitionValues != null) {
        // drop stats for specified partitions
        List<List<String>> partitionStringValues = partitionValues.stream().map(DropStatsProcedure::validateParameterType).collect(toImmutableList());
        validatePartitions(partitionStringValues, partitionColumns);
        partitionStringValues.forEach(values -> metastore.updatePartitionStatistics(schema, table, makePartName(partitionColumns, values), stats -> PartitionStatistics.empty()));
    } else {
        // no partition specified, so drop stats for the entire table
        if (partitionColumns.isEmpty()) {
            // for non-partitioned tables, just wipe table stats
            metastore.updateTableStatistics(schema, table, NO_ACID_TRANSACTION, stats -> PartitionStatistics.empty());
        } else {
            // the table is partitioned; remove stats for every partition
            metastore.getPartitionNamesByFilter(handle.getSchemaName(), handle.getTableName(), partitionColumns, TupleDomain.all()).ifPresent(partitions -> partitions.forEach(partitionName -> metastore.updatePartitionStatistics(schema, table, partitionName, stats -> PartitionStatistics.empty())));
        }
    }
    hiveMetadata.commit();
}
Also used : PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) MethodHandle(java.lang.invoke.MethodHandle) Provider(javax.inject.Provider) TransactionalMetadataFactory(io.trino.plugin.hive.TransactionalMetadataFactory) MethodHandleUtil.methodHandle(io.trino.spi.block.MethodHandleUtil.methodHandle) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) Inject(javax.inject.Inject) INVALID_PROCEDURE_ARGUMENT(io.trino.spi.StandardErrorCode.INVALID_PROCEDURE_ARGUMENT) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) Procedure(io.trino.spi.procedure.Procedure) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) TransactionalMetadata(io.trino.plugin.hive.TransactionalMetadata) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) Argument(io.trino.spi.procedure.Procedure.Argument) FileUtils.makePartName(org.apache.hadoop.hive.metastore.utils.FileUtils.makePartName) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorAccessControl(io.trino.spi.connector.ConnectorAccessControl) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) ThreadContextClassLoader(io.trino.spi.classloader.ThreadContextClassLoader) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) List(java.util.List) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) ColumnHandle(io.trino.spi.connector.ColumnHandle) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) TransactionalMetadata(io.trino.plugin.hive.TransactionalMetadata) SchemaTableName(io.trino.spi.connector.SchemaTableName) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) TrinoException(io.trino.spi.TrinoException) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 5 with HiveMetastoreClosure

use of io.trino.plugin.hive.HiveMetastoreClosure in project trino by trinodb.

the class TestHiveGlueMetastore method setup.

@BeforeClass
public void setup() {
    metastore = new HiveMetastoreClosure(metastoreClient);
    glueClient = AWSGlueAsyncClientBuilder.defaultClient();
}
Also used : HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) BeforeClass(org.testng.annotations.BeforeClass)

Aggregations

HiveMetastoreClosure (io.trino.plugin.hive.HiveMetastoreClosure)5 TrinoException (io.trino.spi.TrinoException)3 SchemaTableName (io.trino.spi.connector.SchemaTableName)3 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 Slice (io.airlift.slice.Slice)2 HiveTableHandle (io.trino.plugin.hive.HiveTableHandle)2 PartitionStatistics (io.trino.plugin.hive.PartitionStatistics)2 TransactionalMetadata (io.trino.plugin.hive.TransactionalMetadata)2 NO_ACID_TRANSACTION (io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION)2 Table (io.trino.plugin.hive.metastore.Table)2 Test (org.testng.annotations.Test)2 AWSGlueAsync (com.amazonaws.services.glue.AWSGlueAsync)1 AWSGlueAsyncClientBuilder (com.amazonaws.services.glue.AWSGlueAsyncClientBuilder)1 CreateTableRequest (com.amazonaws.services.glue.model.CreateTableRequest)1 Database (com.amazonaws.services.glue.model.Database)1 DeleteDatabaseRequest (com.amazonaws.services.glue.model.DeleteDatabaseRequest)1 DeleteTableRequest (com.amazonaws.services.glue.model.DeleteTableRequest)1 EntityNotFoundException (com.amazonaws.services.glue.model.EntityNotFoundException)1 GetDatabasesRequest (com.amazonaws.services.glue.model.GetDatabasesRequest)1