Search in sources :

Example 1 with GlueInputConverter.convertPartition

use of io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition in project trino by trinodb.

the class GlueHiveMetastore method addPartitions.

@Override
public void addPartitions(String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
    try {
        stats.getCreatePartitions().call(() -> {
            List<Future<BatchCreatePartitionResult>> futures = new ArrayList<>();
            for (List<PartitionWithStatistics> partitionBatch : Lists.partition(partitions, BATCH_CREATE_PARTITION_MAX_PAGE_SIZE)) {
                List<PartitionInput> partitionInputs = mappedCopy(partitionBatch, partition -> GlueInputConverter.convertPartition(partition));
                long startTime = System.currentTimeMillis();
                futures.add(glueClient.batchCreatePartitionAsync(new BatchCreatePartitionRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withPartitionInputList(partitionInputs), new StatsRecordingAsyncHandler(stats.getBatchCreatePartition(), startTime)));
            }
            for (Future<BatchCreatePartitionResult> future : futures) {
                try {
                    BatchCreatePartitionResult result = future.get();
                    propagatePartitionErrorToTrinoException(databaseName, tableName, result.getErrors());
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new TrinoException(HIVE_METASTORE_ERROR, e);
                }
            }
            Set<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> updates = partitions.stream().map(partitionWithStatistics -> new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partitionWithStatistics.getPartition(), partitionWithStatistics.getStatistics().getColumnStatistics())).collect(toImmutableSet());
            columnStatisticsProvider.updatePartitionStatistics(updates);
            return null;
        });
    } catch (AmazonServiceException | ExecutionException e) {
        throw new TrinoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : ThriftMetastoreUtil.updateStatisticsParameters(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters) AWSStaticCredentialsProvider(com.amazonaws.auth.AWSStaticCredentialsProvider) UnaryOperator.identity(java.util.function.UnaryOperator.identity) DefaultAWSCredentialsProviderChain(com.amazonaws.auth.DefaultAWSCredentialsProviderChain) USER(io.trino.spi.security.PrincipalType.USER) RequestMetricCollector(com.amazonaws.metrics.RequestMetricCollector) DeleteTableRequest(com.amazonaws.services.glue.model.DeleteTableRequest) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) Future(java.util.concurrent.Future) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) BatchCreatePartitionRequest(com.amazonaws.services.glue.model.BatchCreatePartitionRequest) GetTablesResult(com.amazonaws.services.glue.model.GetTablesResult) RequestHandler2(com.amazonaws.handlers.RequestHandler2) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) AmazonServiceException(com.amazonaws.AmazonServiceException) GlueToTrinoConverter.mappedCopy(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.mappedCopy) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DeletePartitionRequest(com.amazonaws.services.glue.model.DeletePartitionRequest) Set(java.util.Set) DatabaseInput(com.amazonaws.services.glue.model.DatabaseInput) TableInput(com.amazonaws.services.glue.model.TableInput) UpdateTableRequest(com.amazonaws.services.glue.model.UpdateTableRequest) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BatchUpdatePartitionRequestEntry(com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry) PartitionInput(com.amazonaws.services.glue.model.PartitionInput) GlueInputConverter.convertPartition(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) AWSGlueAsync(com.amazonaws.services.glue.AWSGlueAsync) Partition(io.trino.plugin.hive.metastore.Partition) GetPartitionsRequest(com.amazonaws.services.glue.model.GetPartitionsRequest) Segment(com.amazonaws.services.glue.model.Segment) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) HiveUtil(io.trino.plugin.hive.util.HiveUtil) Iterables(com.google.common.collect.Iterables) GetPartitionResult(com.amazonaws.services.glue.model.GetPartitionResult) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) ColumnNotFoundException(io.trino.spi.connector.ColumnNotFoundException) ArrayList(java.util.ArrayList) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) Comparators.lexicographical(com.google.common.collect.Comparators.lexicographical) Lists(com.google.common.collect.Lists) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) AlreadyExistsException(com.amazonaws.services.glue.model.AlreadyExistsException) AWSCredentialsProvider(com.amazonaws.auth.AWSCredentialsProvider) CreateTableRequest(com.amazonaws.services.glue.model.CreateTableRequest) SchemaAlreadyExistsException(io.trino.plugin.hive.SchemaAlreadyExistsException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) CreateDatabaseRequest(com.amazonaws.services.glue.model.CreateDatabaseRequest) HiveWriteUtils(io.trino.plugin.hive.util.HiveWriteUtils) Nullable(javax.annotation.Nullable) Executor(java.util.concurrent.Executor) MoreFutures(io.airlift.concurrent.MoreFutures) PartitionValueList(com.amazonaws.services.glue.model.PartitionValueList) GetTableResult(com.amazonaws.services.glue.model.GetTableResult) RoleGrant(io.trino.spi.security.RoleGrant) ExecutionException(java.util.concurrent.ExecutionException) ClientConfiguration(com.amazonaws.ClientConfiguration) AwsSdkUtil.getPaginatedResults(io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults) AsyncHandler(com.amazonaws.handlers.AsyncHandler) GetPartitionsResult(com.amazonaws.services.glue.model.GetPartitionsResult) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) Database(io.trino.plugin.hive.metastore.Database) GetDatabaseRequest(com.amazonaws.services.glue.model.GetDatabaseRequest) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) CompletionService(java.util.concurrent.CompletionService) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) GetDatabasesRequest(com.amazonaws.services.glue.model.GetDatabasesRequest) Collectors.toMap(java.util.stream.Collectors.toMap) ALREADY_EXISTS(io.trino.spi.StandardErrorCode.ALREADY_EXISTS) Path(org.apache.hadoop.fs.Path) BatchUpdatePartitionResult(com.amazonaws.services.glue.model.BatchUpdatePartitionResult) ImmutableSet(com.google.common.collect.ImmutableSet) AWSGlueAsyncClientBuilder(com.amazonaws.services.glue.AWSGlueAsyncClientBuilder) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) GluePartitionConverter(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter.GluePartitionConverter) TableAlreadyExistsException(io.trino.plugin.hive.TableAlreadyExistsException) TrinoException(io.trino.spi.TrinoException) STSAssumeRoleSessionCredentialsProvider(com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider) String.format(java.lang.String.format) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) GetTableRequest(com.amazonaws.services.glue.model.GetTableRequest) PartitionError(com.amazonaws.services.glue.model.PartitionError) Entry(java.util.Map.Entry) Optional(java.util.Optional) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) UpdateDatabaseRequest(com.amazonaws.services.glue.model.UpdateDatabaseRequest) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Logger(io.airlift.log.Logger) Type(io.trino.spi.type.Type) Function(java.util.function.Function) Inject(javax.inject.Inject) EndpointConfiguration(com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration) GetPartitionRequest(com.amazonaws.services.glue.model.GetPartitionRequest) Collectors.toCollection(java.util.stream.Collectors.toCollection) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) HIVE_METASTORE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR) Objects.requireNonNull(java.util.Objects.requireNonNull) GlueInputConverter(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter) DeleteDatabaseRequest(com.amazonaws.services.glue.model.DeleteDatabaseRequest) Comparator.comparing(java.util.Comparator.comparing) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) AwsCurrentRegionHolder.getCurrentRegionFromEC2Metadata(io.trino.plugin.hive.aws.AwsCurrentRegionHolder.getCurrentRegionFromEC2Metadata) BatchGetPartitionRequest(com.amazonaws.services.glue.model.BatchGetPartitionRequest) AmazonWebServiceRequest(com.amazonaws.AmazonWebServiceRequest) BatchCreatePartitionResult(com.amazonaws.services.glue.model.BatchCreatePartitionResult) BasicAWSCredentials(com.amazonaws.auth.BasicAWSCredentials) BatchGetPartitionResult(com.amazonaws.services.glue.model.BatchGetPartitionResult) ErrorDetail(com.amazonaws.services.glue.model.ErrorDetail) TupleDomain(io.trino.spi.predicate.TupleDomain) BatchUpdatePartitionRequest(com.amazonaws.services.glue.model.BatchUpdatePartitionRequest) GlueToTrinoConverter(io.trino.plugin.hive.metastore.glue.converter.GlueToTrinoConverter) GetDatabaseResult(com.amazonaws.services.glue.model.GetDatabaseResult) GetTablesRequest(com.amazonaws.services.glue.model.GetTablesRequest) MetastoreUtil.verifyCanDropColumn(io.trino.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn) UpdatePartitionRequest(com.amazonaws.services.glue.model.UpdatePartitionRequest) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) Comparator(java.util.Comparator) ArrayList(java.util.ArrayList) PartitionInput(com.amazonaws.services.glue.model.PartitionInput) BatchCreatePartitionRequest(com.amazonaws.services.glue.model.BatchCreatePartitionRequest) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) BatchCreatePartitionResult(com.amazonaws.services.glue.model.BatchCreatePartitionResult) AmazonServiceException(com.amazonaws.AmazonServiceException) Future(java.util.concurrent.Future) TrinoException(io.trino.spi.TrinoException) ExecutionException(java.util.concurrent.ExecutionException)

Example 2 with GlueInputConverter.convertPartition

use of io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition in project trino by trinodb.

the class GlueHiveMetastore method updatePartitionStatisticsBatch.

private void updatePartitionStatisticsBatch(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
    ImmutableList.Builder<BatchUpdatePartitionRequestEntry> partitionUpdateRequests = ImmutableList.builder();
    ImmutableSet.Builder<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> columnStatisticsUpdates = ImmutableSet.builder();
    Map<List<String>, String> partitionValuesToName = updates.keySet().stream().collect(toImmutableMap(HiveUtil::toPartitionValues, identity()));
    List<Partition> partitions = batchGetPartition(table, ImmutableList.copyOf(updates.keySet()));
    Map<Partition, Map<String, HiveColumnStatistics>> statisticsPerPartition = columnStatisticsProvider.getPartitionColumnStatistics(partitions);
    statisticsPerPartition.forEach((partition, columnStatistics) -> {
        Function<PartitionStatistics, PartitionStatistics> update = updates.get(partitionValuesToName.get(partition.getValues()));
        PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatistics);
        PartitionStatistics updatedStatistics = update.apply(currentStatistics);
        Map<String, String> updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics());
        partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build();
        Map<String, HiveColumnStatistics> updatedColumnStatistics = updatedStatistics.getColumnStatistics();
        PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
        partitionInput.setParameters(partition.getParameters());
        partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry().withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
        columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics));
    });
    List<List<BatchUpdatePartitionRequestEntry>> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE);
    List<Future<BatchUpdatePartitionResult>> partitionUpdateRequestsFutures = new ArrayList<>();
    partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> {
        // Update basic statistics
        long startTimestamp = System.currentTimeMillis();
        partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withEntries(partitionUpdateRequestsPartition), new StatsRecordingAsyncHandler(stats.getBatchUpdatePartition(), startTimestamp)));
    });
    try {
        // Update column statistics
        columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build());
        // Don't block on the batch update call until the column statistics have finished updating
        partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue);
    } catch (AmazonServiceException e) {
        throw new TrinoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) PartitionInput(com.amazonaws.services.glue.model.PartitionInput) BatchUpdatePartitionRequestEntry(com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) PartitionValueList(com.amazonaws.services.glue.model.PartitionValueList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GlueInputConverter.convertPartition(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition) Partition(io.trino.plugin.hive.metastore.Partition) BatchUpdatePartitionRequest(com.amazonaws.services.glue.model.BatchUpdatePartitionRequest) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) AmazonServiceException(com.amazonaws.AmazonServiceException) Future(java.util.concurrent.Future) TrinoException(io.trino.spi.TrinoException) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) ImmutableMap(com.google.common.collect.ImmutableMap) MoreFutures(io.airlift.concurrent.MoreFutures)

Aggregations

AmazonServiceException (com.amazonaws.AmazonServiceException)2 BatchUpdatePartitionRequest (com.amazonaws.services.glue.model.BatchUpdatePartitionRequest)2 BatchUpdatePartitionRequestEntry (com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry)2 AmazonWebServiceRequest (com.amazonaws.AmazonWebServiceRequest)1 ClientConfiguration (com.amazonaws.ClientConfiguration)1 AWSCredentialsProvider (com.amazonaws.auth.AWSCredentialsProvider)1 AWSStaticCredentialsProvider (com.amazonaws.auth.AWSStaticCredentialsProvider)1 BasicAWSCredentials (com.amazonaws.auth.BasicAWSCredentials)1 DefaultAWSCredentialsProviderChain (com.amazonaws.auth.DefaultAWSCredentialsProviderChain)1 STSAssumeRoleSessionCredentialsProvider (com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider)1 EndpointConfiguration (com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration)1 AsyncHandler (com.amazonaws.handlers.AsyncHandler)1 RequestHandler2 (com.amazonaws.handlers.RequestHandler2)1 RequestMetricCollector (com.amazonaws.metrics.RequestMetricCollector)1 AWSGlueAsync (com.amazonaws.services.glue.AWSGlueAsync)1 AWSGlueAsyncClientBuilder (com.amazonaws.services.glue.AWSGlueAsyncClientBuilder)1 AlreadyExistsException (com.amazonaws.services.glue.model.AlreadyExistsException)1 BatchCreatePartitionRequest (com.amazonaws.services.glue.model.BatchCreatePartitionRequest)1 BatchCreatePartitionResult (com.amazonaws.services.glue.model.BatchCreatePartitionResult)1 BatchGetPartitionRequest (com.amazonaws.services.glue.model.BatchGetPartitionRequest)1